/* $Id: HMVMXR0.cpp 97224 2022-10-18 22:56:19Z vboxsync $ */ /** @file * HM VMX (Intel VT-x) - Host Context Ring-0. */ /* * Copyright (C) 2012-2022 Oracle and/or its affiliates. * * This file is part of VirtualBox base platform packages, as * available from https://www.virtualbox.org. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, in version 3 of the * License. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . * * SPDX-License-Identifier: GPL-3.0-only */ /********************************************************************************************************************************* * Header Files * *********************************************************************************************************************************/ #define LOG_GROUP LOG_GROUP_HM #define VMCPU_INCL_CPUM_GST_CTX #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "HMInternal.h" #include #include #include "HMVMXR0.h" #include "VMXInternal.h" #include "dtrace/VBoxVMM.h" /********************************************************************************************************************************* * Defined Constants And Macros * *********************************************************************************************************************************/ #ifdef DEBUG_ramshankar # define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS # define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE # define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE # define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE # define HMVMX_ALWAYS_CLEAN_TRANSIENT # define HMVMX_ALWAYS_CHECK_GUEST_STATE # define HMVMX_ALWAYS_TRAP_ALL_XCPTS # define HMVMX_ALWAYS_TRAP_PF # define HMVMX_ALWAYS_FLUSH_TLB # define HMVMX_ALWAYS_SWAP_EFER #endif /********************************************************************************************************************************* * Structures and Typedefs * *********************************************************************************************************************************/ /** * VMX page allocation information. */ typedef struct { uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */ uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */ PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */ PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */ } VMXPAGEALLOCINFO; /** Pointer to VMX page-allocation info. */ typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO; /** Pointer to a const VMX page-allocation info. */ typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO; AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8); /********************************************************************************************************************************* * Internal Functions * *********************************************************************************************************************************/ static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient); static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo); /** * Checks if the given MSR is part of the lastbranch-from-IP MSR stack. * @returns @c true if it's part of LBR stack, @c false otherwise. * * @param pVM The cross context VM structure. * @param idMsr The MSR. * @param pidxMsr Where to store the index of the MSR in the LBR MSR array. * Optional, can be NULL. * * @remarks Must only be called when LBR is enabled. */ DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr) { Assert(pVM->hmr0.s.vmx.fLbr); Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst); uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1; uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst; if (idxMsr < cLbrStack) { if (pidxMsr) *pidxMsr = idxMsr; return true; } return false; } /** * Checks if the given MSR is part of the lastbranch-to-IP MSR stack. * @returns @c true if it's part of LBR stack, @c false otherwise. * * @param pVM The cross context VM structure. * @param idMsr The MSR. * @param pidxMsr Where to store the index of the MSR in the LBR MSR array. * Optional, can be NULL. * * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs * are supported by the CPU (see hmR0VmxSetupLbrMsrRange). */ DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr) { Assert(pVM->hmr0.s.vmx.fLbr); if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst) { uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1; uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst; if (idxMsr < cLbrStack) { if (pidxMsr) *pidxMsr = idxMsr; return true; } } return false; } /** * Gets the active (in use) VMCS info. object for the specified VCPU. * * This is either the guest or nested-guest VMCS info. and need not necessarily * pertain to the "current" VMCS (in the VMX definition of the term). For instance, * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the * current VMCS while returning to ring-3. However, the VMCS info. object for that * VMCS would still be active and returned here so that we could dump the VMCS * fields to ring-3 for diagnostics. This function is thus only used to * distinguish between the nested-guest or guest VMCS. * * @returns The active VMCS information. * @param pVCpu The cross context virtual CPU structure. * * @thread EMT. * @remarks This function may be called with preemption or interrupts disabled! */ DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu) { if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs) return &pVCpu->hmr0.s.vmx.VmcsInfo; return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst; } /** * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load * area. * * @returns @c true if it's different, @c false otherwise. * @param pVmcsInfo The VMCS info. object. */ DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo) { return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad && pVmcsInfo->pvGuestMsrStore); } /** * Sets the given Processor-based VM-execution controls. * * @param pVmxTransient The VMX-transient structure. * @param uProcCtls The Processor-based VM-execution controls to set. */ static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls) { PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls) { pVmcsInfo->u32ProcCtls |= uProcCtls; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls); AssertRC(rc); } } /** * Removes the given Processor-based VM-execution controls. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * @param uProcCtls The Processor-based VM-execution controls to remove. * * @remarks When executing a nested-guest, this will not remove any of the specified * controls if the nested hypervisor has set any one of them. */ static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls) { PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; if (pVmcsInfo->u32ProcCtls & uProcCtls) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX if ( !pVmxTransient->fIsNestedGuest || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls)) #else NOREF(pVCpu); if (!pVmxTransient->fIsNestedGuest) #endif { pVmcsInfo->u32ProcCtls &= ~uProcCtls; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls); AssertRC(rc); } } } /** * Sets the TSC offset for the current VMCS. * * @param uTscOffset The TSC offset to set. * @param pVmcsInfo The VMCS info. object. */ static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset) { if (pVmcsInfo->u64TscOffset != uTscOffset) { int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset); AssertRC(rc); pVmcsInfo->u64TscOffset = uTscOffset; } } /** * Loads the VMCS specified by the VMCS info. object. * * @returns VBox status code. * @param pVmcsInfo The VMCS info. object. * * @remarks Can be called with interrupts disabled. */ static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo) { Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs); if (RT_SUCCESS(rc)) pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT; return rc; } /** * Clears the VMCS specified by the VMCS info. object. * * @returns VBox status code. * @param pVmcsInfo The VMCS info. object. * * @remarks Can be called with interrupts disabled. */ static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo) { Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs); if (RT_SUCCESS(rc)) pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR; return rc; } /** * Checks whether the MSR belongs to the set of guest MSRs that we restore * lazily while leaving VT-x. * * @returns true if it does, false otherwise. * @param pVCpu The cross context virtual CPU structure. * @param idMsr The MSR to check. */ static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr) { if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests) { switch (idMsr) { case MSR_K8_LSTAR: case MSR_K6_STAR: case MSR_K8_SF_MASK: case MSR_K8_KERNEL_GS_BASE: return true; } } return false; } /** * Loads a set of guests MSRs to allow read/passthru to the guest. * * The name of this function is slightly confusing. This function does NOT * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a * common prefix for functions dealing with "lazy restoration" of the shared * MSRs. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST); if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests) { /* * If the guest MSRs are not loaded -and- if all the guest MSRs are identical * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then * we can skip a few MSR writes. * * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the * guest MSR values in the guest-CPU context might be different to what's currently * loaded in the CPU. In either case, we need to write the new guest MSR values to the * CPU, see @bugref{8728}. */ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask) { #ifdef VBOX_STRICT Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE); Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR); Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR); Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK); #endif } else { ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE); ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR); ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR); /* The system call flag mask register isn't as benign and accepting of all values as the above, so mask it to avoid #GP'ing on corrupted input. */ Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX)); ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX); } } pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST; } /** * Checks if the specified guest MSR is part of the VM-entry MSR-load area. * * @returns @c true if found, @c false otherwise. * @param pVmcsInfo The VMCS info. object. * @param idMsr The MSR to find. */ static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr) { PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad; uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad; Assert(pMsrs); Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE); for (uint32_t i = 0; i < cMsrs; i++) { if (pMsrs[i].u32Msr == idMsr) return true; } return false; } /** * Performs lazy restoration of the set of host MSRs if they were previously * loaded with guest MSR values. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! * @remarks The guest MSRs should have been saved back into the guest-CPU * context by hmR0VmxImportGuestState()!!! */ static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) { Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST); if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests) { ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar); ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar); ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask); ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase); } } pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST); } /** * Sets pfnStartVm to the best suited variant. * * This must be called whenever anything changes relative to the hmR0VmXStartVm * variant selection: * - pVCpu->hm.s.fLoadSaveGuestXcr0 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher * - Perhaps: CPUMIsGuestFPUStateActive() (windows only) * - Perhaps: CPUMCTX.fXStateMask (windows only) * * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT * cannot be changed at runtime. */ static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu) { static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] = { { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit }, { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit }, { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit }, }; uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0) | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0) | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0) | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0) | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0); PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn; if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm) pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm; } /** * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's * stack. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault. * @param pVCpu The cross context virtual CPU structure. * @param uValue The value to push to the guest stack. */ static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue) { /* * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound". */ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if (pCtx->sp == 1) return VINF_EM_RESET; pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */ int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t)); AssertRC(rc); return rc; } /** * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about * unreferenced local parameters in the template code... */ DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val) { RT_NOREF(pVCpu); return VMXWriteVmcs16(uFieldEnc, u16Val); } /** * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about * unreferenced local parameters in the template code... */ DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val) { RT_NOREF(pVCpu); return VMXWriteVmcs32(uFieldEnc, u32Val); } /** * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about * unreferenced local parameters in the template code... */ DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val) { RT_NOREF(pVCpu); return VMXWriteVmcs64(uFieldEnc, u64Val); } /** * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about * unreferenced local parameters in the template code... */ DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val) { RT_NOREF(pVCpu); return VMXReadVmcs16(uFieldEnc, pu16Val); } /** * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about * unreferenced local parameters in the template code... */ DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val) { RT_NOREF(pVCpu); return VMXReadVmcs32(uFieldEnc, pu32Val); } /** * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about * unreferenced local parameters in the template code... */ DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val) { RT_NOREF(pVCpu); return VMXReadVmcs64(uFieldEnc, pu64Val); } /* * Instantiate the code we share with the NEM darwin backend. */ #define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s #define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s #define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest #define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging #define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer #define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr #define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val)) #define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val)) #define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val)) #define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val)) #define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal)) #define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal)) #define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal)) #define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal)) #include "../VMMAll/VMXAllTemplate.cpp.h" #undef VMX_VMCS_WRITE_16 #undef VMX_VMCS_WRITE_32 #undef VMX_VMCS_WRITE_64 #undef VMX_VMCS_WRITE_NW #undef VMX_VMCS_READ_16 #undef VMX_VMCS_READ_32 #undef VMX_VMCS_READ_64 #undef VMX_VMCS_READ_NW #undef VM_IS_VMX_PREEMPT_TIMER_USED #undef VM_IS_VMX_NESTED_PAGING #undef VM_IS_VMX_UNRESTRICTED_GUEST #undef VCPU_2_VMXSTATS #undef VCPU_2_VMXSTATE /** * Updates the VM's last error record. * * If there was a VMX instruction error, reads the error data from the VMCS and * updates VCPU's last error record as well. * * @param pVCpu The cross context virtual CPU structure of the calling EMT. * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or * VERR_VMX_INVALID_VMCS_FIELD. * @param rc The error code. */ static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc) { if ( rc == VERR_VMX_INVALID_VMCS_FIELD || rc == VERR_VMX_UNABLE_TO_START_VM) { AssertPtrReturnVoid(pVCpu); VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); } pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc; } /** * Enters VMX root mode operation on the current CPU. * * @returns VBox status code. * @param pHostCpu The HM physical-CPU structure. * @param pVM The cross context VM structure. Can be * NULL, after a resume. * @param HCPhysCpuPage Physical address of the VMXON region. * @param pvCpuPage Pointer to the VMXON region. */ static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage) { Assert(pHostCpu); Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS); Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage); Assert(pvCpuPage); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); if (pVM) { /* Write the VMCS revision identifier to the VMXON region. */ *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID); } /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */ RTCCUINTREG const fEFlags = ASMIntDisableFlags(); /* Enable the VMX bit in CR4 if necessary. */ RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX); /* Record whether VMXE was already prior to us enabling it above. */ pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE); /* Enter VMX root mode. */ int rc = VMXEnable(HCPhysCpuPage); if (RT_FAILURE(rc)) { /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */ if (!pHostCpu->fVmxeAlreadyEnabled) SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE); if (pVM) pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage; } /* Restore interrupts. */ ASMSetFlags(fEFlags); return rc; } /** * Exits VMX root mode operation on the current CPU. * * @returns VBox status code. * @param pHostCpu The HM physical-CPU structure. */ static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */ RTCCUINTREG const fEFlags = ASMIntDisableFlags(); /* If we're for some reason not in VMX root mode, then don't leave it. */ RTCCUINTREG const uHostCr4 = ASMGetCR4(); int rc; if (uHostCr4 & X86_CR4_VMXE) { /* Exit VMX root mode and clear the VMX bit in CR4. */ VMXDisable(); /* Clear CR4.VMXE only if it was clear prior to use setting it. */ if (!pHostCpu->fVmxeAlreadyEnabled) SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE); rc = VINF_SUCCESS; } else rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE; /* Restore interrupts. */ ASMSetFlags(fEFlags); return rc; } /** * Allocates pages specified as specified by an array of VMX page allocation info * objects. * * The pages contents are zero'd after allocation. * * @returns VBox status code. * @param phMemObj Where to return the handle to the allocation. * @param paAllocInfo The pointer to the first element of the VMX * page-allocation info object array. * @param cEntries The number of elements in the @a paAllocInfo array. */ static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries) { *phMemObj = NIL_RTR0MEMOBJ; /* Figure out how many pages to allocate. */ uint32_t cPages = 0; for (uint32_t iPage = 0; iPage < cEntries; iPage++) cPages += !!paAllocInfo[iPage].fValid; /* Allocate the pages. */ if (cPages) { size_t const cbPages = cPages << HOST_PAGE_SHIFT; int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */); if (RT_FAILURE(rc)) return rc; /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */ void *pvFirstPage = RTR0MemObjAddress(*phMemObj); RT_BZERO(pvFirstPage, cbPages); uint32_t iPage = 0; for (uint32_t i = 0; i < cEntries; i++) if (paAllocInfo[i].fValid) { RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage); void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT)); Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS); AssertPtr(pvPage); Assert(paAllocInfo[iPage].pHCPhys); Assert(paAllocInfo[iPage].ppVirt); *paAllocInfo[iPage].pHCPhys = HCPhysPage; *paAllocInfo[iPage].ppVirt = pvPage; /* Move to next page. */ ++iPage; } /* Make sure all valid (requested) pages have been assigned. */ Assert(iPage == cPages); } return VINF_SUCCESS; } /** * Frees pages allocated using hmR0VmxPagesAllocZ. * * @param phMemObj Pointer to the memory object handle. Will be set to * NIL. */ DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj) { /* We can cleanup wholesale since it's all one allocation. */ if (*phMemObj != NIL_RTR0MEMOBJ) { RTR0MemObjFree(*phMemObj, true /* fFreeMappings */); *phMemObj = NIL_RTR0MEMOBJ; } } /** * Initializes a VMCS info. object. * * @param pVmcsInfo The VMCS info. object. * @param pVmcsInfoShared The VMCS info. object shared with ring-3. */ static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared) { RT_ZERO(*pVmcsInfo); RT_ZERO(*pVmcsInfoShared); pVmcsInfo->pShared = pVmcsInfoShared; Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ); pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS; pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS; pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS; pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS; pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS; pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS; pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS; pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS; pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS; pVmcsInfo->idHostCpuState = NIL_RTCPUID; pVmcsInfo->idHostCpuExec = NIL_RTCPUID; } /** * Frees the VT-x structures for a VMCS info. object. * * @param pVmcsInfo The VMCS info. object. * @param pVmcsInfoShared The VMCS info. object shared with ring-3. */ static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared) { hmR0VmxPagesFree(&pVmcsInfo->hMemObj); hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared); } /** * Allocates the VT-x structures for a VMCS info. object. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * @param fIsNstGstVmcs Whether this is a nested-guest VMCS. * * @remarks The caller is expected to take care of any and all allocation failures. * This function will not perform any cleanup for failures half-way * through. */ static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs) { PVMCC pVM = pVCpu->CTX_SUFF(pVM); bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS); bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing; Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */ VMXPAGEALLOCINFO aAllocInfo[] = { { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs }, { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad }, { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad }, { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap }, { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs }, }; int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo)); if (RT_FAILURE(rc)) return rc; /* * We use the same page for VM-entry MSR-load and VM-exit MSR store areas. * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit. */ AssertCompile(RT_ELEMENTS(aAllocInfo) > 0); Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS); pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad; pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad; /* * Get the virtual-APIC page rather than allocating them again. */ if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW) { if (!fIsNstGstVmcs) { if (PDMHasApic(pVM)) { rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/); if (RT_FAILURE(rc)) return rc; Assert(pVmcsInfo->pbVirtApic); Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS); } } else { /* These are setup later while marging the nested-guest VMCS. */ Assert(pVmcsInfo->pbVirtApic == NULL); Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS); } } return VINF_SUCCESS; } /** * Free all VT-x structures for the VM. * * @returns IPRT status code. * @param pVM The cross context VM structure. */ static void hmR0VmxStructsFree(PVMCC pVM) { hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX if (pVM->hmr0.s.vmx.fUseVmcsShadowing) { RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields); pVM->hmr0.s.vmx.paShadowVmcsFields = NULL; RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields); pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL; } #endif for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) { PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu); hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX if (pVM->cpum.ro.GuestFeatures.fVmx) hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst); #endif } } /** * Allocate all VT-x structures for the VM. * * @returns IPRT status code. * @param pVM The cross context VM structure. * * @remarks This functions will cleanup on memory allocation failures. */ static int hmR0VmxStructsAlloc(PVMCC pVM) { /* * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations. * The VMCS size cannot be more than 4096 bytes. * * See Intel spec. Appendix A.1 "Basic VMX Information". */ uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE); if (cbVmcs <= X86_PAGE_4K_SIZE) { /* likely */ } else { VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* * Allocate per-VM VT-x structures. */ bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS); bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing; VMXPAGEALLOCINFO aAllocInfo[] = { { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess }, { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap }, { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap }, #ifdef VBOX_WITH_CRASHDUMP_MAGIC { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch }, #endif }; int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo)); if (RT_SUCCESS(rc)) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* Allocate the shadow VMCS-fields array. */ if (fUseVmcsShadowing) { Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields); Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields); pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields)); pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields)); if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields) rc = VERR_NO_MEMORY; } #endif /* * Allocate per-VCPU VT-x structures. */ for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++) { /* Allocate the guest VMCS structures. */ PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu); rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */ if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc)) rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */); #endif } if (RT_SUCCESS(rc)) return VINF_SUCCESS; } hmR0VmxStructsFree(pVM); return rc; } /** * Pre-initializes non-zero fields in VMX structures that will be allocated. * * @param pVM The cross context VM structure. */ static void hmR0VmxStructsInit(PVMCC pVM) { /* Paranoia. */ Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL); #ifdef VBOX_WITH_CRASHDUMP_MAGIC Assert(pVM->hmr0.s.vmx.pbScratch == NULL); #endif /* * Initialize members up-front so we can cleanup en masse on allocation failures. */ #ifdef VBOX_WITH_CRASHDUMP_MAGIC pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS; #endif pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS; pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS; pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS; for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) { PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu); hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo); hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst); } } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not. * * @returns @c true if the MSR is intercepted, @c false otherwise. * @param pbMsrBitmap The MSR bitmap. * @param offMsr The MSR byte offset. * @param iBit The bit offset from the byte offset. */ DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit) { Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE); return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit); } #endif /** * Sets the permission bits for the specified MSR in the given MSR bitmap. * * If the passed VMCS is a nested-guest VMCS, this function ensures that the * read/write intercept is cleared from the MSR bitmap used for hardware-assisted * VMX execution of the nested-guest, only if nested-guest is also not intercepting * the read/write access of this MSR. * * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * @param fIsNstGstVmcs Whether this is a nested-guest VMCS. * @param idMsr The MSR value. * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must * include both a read -and- a write permission! * * @sa CPUMGetVmxMsrPermission. * @remarks Can be called with interrupts disabled. */ static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm) { uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap; Assert(pbMsrBitmap); Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm)); /* * MSR-bitmap Layout: * Byte index MSR range Interpreted as * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits. * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits. * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits. * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits. * * A bit corresponding to an MSR within the above range causes a VM-exit * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of * the MSR range, it always cause a VM-exit. * * See Intel spec. 24.6.9 "MSR-Bitmap Address". */ uint16_t const offBitmapRead = 0; uint16_t const offBitmapWrite = 0x800; uint16_t offMsr; int32_t iBit; if (idMsr <= UINT32_C(0x00001fff)) { offMsr = 0; iBit = idMsr; } else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff)) { offMsr = 0x400; iBit = idMsr - UINT32_C(0xc0000000); } else AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr)); /* * Set the MSR read permission. */ uint16_t const offMsrRead = offBitmapRead + offMsr; Assert(offMsrRead + (iBit >> 3) < offBitmapWrite); if (fMsrpm & VMXMSRPM_ALLOW_RD) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX bool const fClear = !fIsNstGstVmcs ? true : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit); #else RT_NOREF2(pVCpu, fIsNstGstVmcs); bool const fClear = true; #endif if (fClear) ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit); } else ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit); /* * Set the MSR write permission. */ uint16_t const offMsrWrite = offBitmapWrite + offMsr; Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE); if (fMsrpm & VMXMSRPM_ALLOW_WR) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX bool const fClear = !fIsNstGstVmcs ? true : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit); #else RT_NOREF2(pVCpu, fIsNstGstVmcs); bool const fClear = true; #endif if (fClear) ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit); } else ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit); } /** * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR * area. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * @param cMsrs The number of MSRs. */ static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs) { /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */ uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc); if (RT_LIKELY(cMsrs < cMaxSupportedMsrs)) { /* Commit the MSR counts to the VMCS and update the cache. */ if (pVmcsInfo->cEntryMsrLoad != cMsrs) { int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc); rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc); rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc); pVmcsInfo->cEntryMsrLoad = cMsrs; pVmcsInfo->cExitMsrStore = cMsrs; pVmcsInfo->cExitMsrLoad = cMsrs; } return VINF_SUCCESS; } LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs)); pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /** * Adds a new (or updates the value of an existing) guest/host MSR * pair to be swapped during the world-switch as part of the * auto-load/store MSR area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * @param idMsr The MSR. * @param uGuestMsrValue Value of the guest MSR. * @param fSetReadWrite Whether to set the guest read/write access of this * MSR (thus not causing a VM-exit). * @param fUpdateHostMsr Whether to update the value of the host MSR if * necessary. */ static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue, bool fSetReadWrite, bool fUpdateHostMsr) { PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest; PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad; uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad; uint32_t i; /* Paranoia. */ Assert(pGuestMsrLoad); #ifndef DEBUG_bird LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue)); #endif /* Check if the MSR already exists in the VM-entry MSR-load area. */ for (i = 0; i < cMsrs; i++) { if (pGuestMsrLoad[i].u32Msr == idMsr) break; } bool fAdded = false; if (i == cMsrs) { /* The MSR does not exist, bump the MSR count to make room for the new MSR. */ ++cMsrs; int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs); AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc); /* Set the guest to read/write this MSR without causing VM-exits. */ if ( fSetReadWrite && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)) hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR); Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs)); fAdded = true; } /* Update the MSR value for the newly added or already existing MSR. */ pGuestMsrLoad[i].u32Msr = idMsr; pGuestMsrLoad[i].u64Value = uGuestMsrValue; /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */ if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo)) { PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore; pGuestMsrStore[i].u32Msr = idMsr; pGuestMsrStore[i].u64Value = uGuestMsrValue; } /* Update the corresponding slot in the host MSR area. */ PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad; Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad); Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore); pHostMsr[i].u32Msr = idMsr; /* * Only if the caller requests to update the host MSR value AND we've newly added the * MSR to the host MSR area do we actually update the value. Otherwise, it will be * updated by hmR0VmxUpdateAutoLoadHostMsrs(). * * We do this for performance reasons since reading MSRs may be quite expensive. */ if (fAdded) { if (fUpdateHostMsr) { Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); pHostMsr[i].u64Value = ASMRdMsr(idMsr); } else { /* Someone else can do the work. */ pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false; } } return VINF_SUCCESS; } /** * Removes a guest/host MSR pair to be swapped during the world-switch from the * auto-load/store MSR area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * @param idMsr The MSR. */ static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr) { PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest; PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad; uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad; #ifndef DEBUG_bird LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr)); #endif for (uint32_t i = 0; i < cMsrs; i++) { /* Find the MSR. */ if (pGuestMsrLoad[i].u32Msr == idMsr) { /* * If it's the last MSR, we only need to reduce the MSR count. * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count. */ if (i < cMsrs - 1) { /* Remove it from the VM-entry MSR-load area. */ pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr; pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value; /* Remove it from the VM-exit MSR-store area if it's in a different page. */ if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo)) { PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore; Assert(pGuestMsrStore[i].u32Msr == idMsr); pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr; pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value; } /* Remove it from the VM-exit MSR-load area. */ PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad; Assert(pHostMsr[i].u32Msr == idMsr); pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr; pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value; } /* Reduce the count to reflect the removed MSR and bail. */ --cMsrs; break; } } /* Update the VMCS if the count changed (meaning the MSR was found and removed). */ if (cMsrs != pVmcsInfo->cEntryMsrLoad) { int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs); AssertRCReturn(rc, rc); /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */ if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR); Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs)); return VINF_SUCCESS; } return VERR_NOT_FOUND; } /** * Updates the value of all host MSRs in the VM-exit MSR-load area. * * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * * @remarks No-long-jump zone!!! */ static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo) { RT_NOREF(pVCpu); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad; uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad; Assert(pHostMsrLoad); Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE); LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs)); for (uint32_t i = 0; i < cMsrs; i++) { /* * Performance hack for the host EFER MSR. We use the cached value rather than re-read it. * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}. */ if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER) pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer; else pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr); } } /** * Saves a set of host MSRs to allow read/write passthru access to the guest and * perform lazy restoration of the host MSRs while leaving VT-x. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls(). */ if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST)) { Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */ if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests) { pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR); pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR); pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK); pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); } pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST; } } #ifdef VBOX_STRICT /** * Verifies that our cached host EFER MSR value has not changed since we cached it. * * @param pVmcsInfo The VMCS info. object. */ static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR) { uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER); uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer; uint64_t uVmcsEferMsrVmcs; int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs); AssertRC(rc); AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs, ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs)); AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache, ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache)); } } /** * Verifies whether the guest/host MSR pairs in the auto-load/store area in the * VMCS are correct. * * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * @param fIsNstGstVmcs Whether this is a nested-guest VMCS. */ static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* Read the various MSR-area counts from the VMCS. */ uint32_t cEntryLoadMsrs; uint32_t cExitStoreMsrs; uint32_t cExitLoadMsrs; int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc); /* Verify all the MSR counts are the same. */ Assert(cEntryLoadMsrs == cExitStoreMsrs); Assert(cExitStoreMsrs == cExitLoadMsrs); uint32_t const cMsrs = cExitLoadMsrs; /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */ Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc)); /* Verify the MSR counts are within the allocated page size. */ Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE); /* Verify the relevant contents of the MSR areas match. */ PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad; PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore; PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad; bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo); for (uint32_t i = 0; i < cMsrs; i++) { /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */ if (fSeparateExitMsrStorePage) { AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr, ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n", pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs)); } AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr, ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n", pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs)); uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr); AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr, ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs)); /* Verify that cached host EFER MSR matches what's loaded on the CPU. */ bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER); AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer, ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs)); /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */ if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) { uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr); if (fIsEferMsr) { AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n")); AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n")); } else { /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */ PCVMCC pVM = pVCpu->CTX_SUFF(pVM); if ( pVM->hmr0.s.vmx.fLbr && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */) || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */) || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr)) { AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR, ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n", pGuestMsrLoad->u32Msr, cMsrs)); } else if (!fIsNstGstVmcs) { AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR, ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs)); } else { /* * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to * execute a nested-guest with MSR passthrough. * * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we * allow passthrough too. */ void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap; Assert(pvMsrBitmapNstGst); uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr); AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst, ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n", pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst)); } } } /* Move to the next MSR. */ pHostMsrLoad++; pGuestMsrLoad++; pGuestMsrStore++; } } #endif /* VBOX_STRICT */ /** * Flushes the TLB using EPT. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure of the calling * EMT. Can be NULL depending on @a enmTlbFlush. * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a * enmTlbFlush. * @param enmTlbFlush Type of flush. * * @remarks Caller is responsible for making sure this function is called only * when NestedPaging is supported and providing @a enmTlbFlush that is * supported by the CPU. * @remarks Can be called with interrupts disabled. */ static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush) { uint64_t au64Descriptor[2]; if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS) au64Descriptor[0] = 0; else { Assert(pVCpu); Assert(pVmcsInfo); au64Descriptor[0] = pVmcsInfo->HCPhysEPTP; } au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */ int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]); AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc)); if ( RT_SUCCESS(rc) && pVCpu) STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging); } /** * Flushes the TLB using VPID. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure of the calling * EMT. Can be NULL depending on @a enmTlbFlush. * @param enmTlbFlush Type of flush. * @param GCPtr Virtual address of the page to flush (can be 0 depending * on @a enmTlbFlush). * * @remarks Can be called with interrupts disabled. */ static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr) { Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid); uint64_t au64Descriptor[2]; if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS) { au64Descriptor[0] = 0; au64Descriptor[1] = 0; } else { AssertPtr(pVCpu); AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid)); AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid)); au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid; au64Descriptor[1] = GCPtr; } int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]); AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc)); if ( RT_SUCCESS(rc) && pVCpu) STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid); NOREF(rc); } /** * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID, * otherwise there is nothing really to invalidate. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param GCVirt Guest virtual address of the page to invalidate. */ VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt) { AssertPtr(pVCpu); LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt)); if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH)) { /* * We must invalidate the guest TLB entry in either case, we cannot ignore it even for * the EPT case. See @bugref{6043} and @bugref{6177}. * * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*() * as this function maybe called in a loop with individual addresses. */ PVMCC pVM = pVCpu->CTX_SUFF(pVM); if (pVM->hmr0.s.vmx.fVpid) { if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) { hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt); STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt); } else VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); } else if (pVM->hmr0.s.fNestedPaging) VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); } return VINF_SUCCESS; } /** * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the * case where neither EPT nor VPID is supported by the CPU. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu) { AssertPtr(pVCpu); AssertPtr(pHostCpu); VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); Assert(pHostCpu->idCpu != NIL_RTCPUID); pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu; pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes; pVCpu->hmr0.s.fForceTLBFlush = false; return; } /** * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's * nomenclature. The reason is, to avoid confusion in compare statements * since the host-CPU copies are named "ASID". * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo) { #ifdef VBOX_WITH_STATISTICS bool fTlbFlushed = false; # define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0) # define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \ if (!fTlbFlushed) \ STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \ } while (0) #else # define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0) # define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0) #endif AssertPtr(pVCpu); AssertPtr(pHostCpu); Assert(pHostCpu->idCpu != NIL_RTCPUID); PVMCC pVM = pVCpu->CTX_SUFF(pVM); AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled." "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid)); /* * Force a TLB flush for the first world-switch if the current CPU differs from the one we * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we * cannot reuse the current ASID anymore. */ if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes) { ++pHostCpu->uCurrentAsid; if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid) { pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */ pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ } pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid; pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu; pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes; /* * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}. */ hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt); STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); HMVMX_SET_TAGGED_TLB_FLUSHED(); VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); } else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */ { /* * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT * tables when EPT is in use. Flushing-by-VPID will only flush linear (only * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical * mappings, see @bugref{6568}. * * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information". */ hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt); STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); HMVMX_SET_TAGGED_TLB_FLUSHED(); } else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb) { /* * The nested-guest specifies its own guest-physical address to use as the APIC-access * address which requires flushing the TLB of EPT cached structures. * * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction". */ hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt); pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst); HMVMX_SET_TAGGED_TLB_FLUSHED(); } pVCpu->hmr0.s.fForceTLBFlush = false; HMVMX_UPDATE_FLUSH_SKIPPED_STAT(); Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu); Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes); AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes, ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes)); AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid, ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu, pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes)); AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid, ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid)); /* Update VMCS with the VPID. */ int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid); AssertRC(rc); #undef HMVMX_SET_TAGGED_TLB_FLUSHED } /** * Flushes the tagged-TLB entries for EPT CPUs as necessary. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo) { AssertPtr(pVCpu); AssertPtr(pHostCpu); Assert(pHostCpu->idCpu != NIL_RTCPUID); AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging.")); AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID.")); /* * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last. * A change in the TLB flush count implies the host CPU is online after a suspend/resume. */ if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes) { pVCpu->hmr0.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); } /* Check for explicit TLB flushes. */ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) { pVCpu->hmr0.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); } /* Check for TLB flushes while switching to/from a nested-guest. */ if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb) { pVCpu->hmr0.s.fForceTLBFlush = true; pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst); } pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu; pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes; if (pVCpu->hmr0.s.fForceTLBFlush) { hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt); pVCpu->hmr0.s.fForceTLBFlush = false; } } /** * Flushes the tagged-TLB entries for VPID CPUs as necessary. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu) { AssertPtr(pVCpu); AssertPtr(pHostCpu); Assert(pHostCpu->idCpu != NIL_RTCPUID); AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID.")); AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging")); /* * Force a TLB flush for the first world switch if the current CPU differs from the one we * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we * cannot reuse the current ASID anymore. */ if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes) { pVCpu->hmr0.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); } /* Check for explicit TLB flushes. */ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) { /* * If we ever support VPID flush combinations other than ALL or SINGLE-context (see * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to * include fExplicitFlush's too) - an obscure corner case. */ pVCpu->hmr0.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); } /* Check for TLB flushes while switching to/from a nested-guest. */ if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb) { pVCpu->hmr0.s.fForceTLBFlush = true; pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst); } PVMCC pVM = pVCpu->CTX_SUFF(pVM); pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu; if (pVCpu->hmr0.s.fForceTLBFlush) { ++pHostCpu->uCurrentAsid; if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid) { pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */ pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ } pVCpu->hmr0.s.fForceTLBFlush = false; pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes; pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid; if (pHostCpu->fFlushAsidBeforeUse) { if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT) hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */); else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS) { hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */); pHostCpu->fFlushAsidBeforeUse = false; } else { /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */ AssertMsgFailed(("Unsupported VPID-flush context type.\n")); } } } AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes, ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes)); AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid, ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu, pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes)); AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid, ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid)); int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid); AssertRC(rc); } /** * Flushes the guest TLB entry based on CPU capabilities. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo) { #ifdef HMVMX_ALWAYS_FLUSH_TLB VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); #endif PVMCC pVM = pVCpu->CTX_SUFF(pVM); switch (pVM->hmr0.s.vmx.enmTlbFlushType) { case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break; case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break; case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break; case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break; default: AssertMsgFailed(("Invalid flush-tag function identifier\n")); break; } /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */ } /** * Sets up the appropriate tagged TLB-flush level and handler for flushing guest * TLB entries from the host TLB before VM-entry. * * @returns VBox status code. * @param pVM The cross context VM structure. */ static int hmR0VmxSetupTaggedTlb(PVMCC pVM) { /* * Determine optimal flush type for nested paging. * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup * unrestricted guest execution (see hmR3InitFinalizeR0()). */ if (pVM->hmr0.s.fNestedPaging) { if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT) { if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT) pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT; else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS; else { /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */ pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Make sure the write-back cacheable memory type for EPT is supported. */ if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB))) { pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* EPT requires a page-walk length of 4. */ if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4))) { pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } } else { /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */ pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } } /* * Determine optimal flush type for VPID. */ if (pVM->hmr0.s.vmx.fVpid) { if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID) { if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT) pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT; else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS) pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS; else { /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */ if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n")); if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS) LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n")); pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED; pVM->hmr0.s.vmx.fVpid = false; } } else { /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */ Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n")); pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED; pVM->hmr0.s.vmx.fVpid = false; } } /* * Setup the handler for flushing tagged-TLBs. */ if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid) pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID; else if (pVM->hmr0.s.fNestedPaging) pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT; else if (pVM->hmr0.s.vmx.fVpid) pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID; else pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE; /* * Copy out the result to ring-3. */ pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid; pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType; pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt; pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid; return VINF_SUCCESS; } /** * Sets up the LBR MSR ranges based on the host CPU. * * @returns VBox status code. * @param pVM The cross context VM structure. * * @sa nemR3DarwinSetupLbrMsrRange */ static int hmR0VmxSetupLbrMsrRange(PVMCC pVM) { Assert(pVM->hmr0.s.vmx.fLbr); uint32_t idLbrFromIpMsrFirst; uint32_t idLbrFromIpMsrLast; uint32_t idLbrToIpMsrFirst; uint32_t idLbrToIpMsrLast; uint32_t idLbrTosMsr; /* * Determine the LBR MSRs supported for this host CPU family and model. * * See Intel spec. 17.4.8 "LBR Stack". * See Intel "Model-Specific Registers" spec. */ uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8) | g_CpumHostFeatures.s.uModel; switch (uFamilyModel) { case 0x0f01: case 0x0f02: idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0; idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3; idLbrToIpMsrFirst = 0x0; idLbrToIpMsrLast = 0x0; idLbrTosMsr = MSR_P4_LASTBRANCH_TOS; break; case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e: case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667: case 0x066a: case 0x066c: case 0x067d: case 0x067e: idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP; idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP; idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP; idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP; idLbrTosMsr = MSR_LASTBRANCH_TOS; break; case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c: case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d: case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f: case 0x062e: case 0x0625: case 0x062c: case 0x062f: idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP; idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP; idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP; idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP; idLbrTosMsr = MSR_LASTBRANCH_TOS; break; case 0x0617: case 0x061d: case 0x060f: idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP; idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP; idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP; idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP; idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS; break; /* Atom and related microarchitectures we don't care about: case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a: case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635: case 0x0636: */ /* All other CPUs: */ default: { LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel)); VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } } /* * Validate. */ uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1; PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM); AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr) == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr)); if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)) { LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack)); VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } NOREF(pVCpu0); /* * Update the LBR info. to the VM struct. for use later. */ pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr; pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst; pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast; pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst; pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast; return VINF_SUCCESS; } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** * Sets up the shadow VMCS fields arrays. * * This function builds arrays of VMCS fields to sync the shadow VMCS later while * executing the guest. * * @returns VBox status code. * @param pVM The cross context VM structure. */ static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM) { /* * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest * when the host does not support it. */ bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll; if ( !fGstVmwriteAll || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)) { /* likely. */ } else { LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n")); VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields); uint32_t cRwFields = 0; uint32_t cRoFields = 0; for (uint32_t i = 0; i < cVmcsFields; i++) { VMXVMCSFIELD VmcsField; VmcsField.u = g_aVmcsFields[i]; /* * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS. * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included * in the shadow VMCS fields array as they would be redundant. * * If the VMCS field depends on a CPU feature that is not exposed to the guest, * we must not include it in the shadow VMCS fields array. Guests attempting to * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate * the required behavior. */ if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u)) { /* * Read-only fields are placed in a separate array so that while syncing shadow * VMCS fields later (which is more performance critical) we can avoid branches. * * However, if the guest can write to all fields (including read-only fields), * we treat it a as read/write field. Otherwise, writing to these fields would * cause a VMWRITE instruction error while syncing the shadow VMCS. */ if ( fGstVmwriteAll || !VMXIsVmcsFieldReadOnly(VmcsField.u)) pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u; else pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u; } } /* Update the counts. */ pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields; pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields; return VINF_SUCCESS; } /** * Sets up the VMREAD and VMWRITE bitmaps. * * @param pVM The cross context VM structure. */ static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM) { /* * By default, ensure guest attempts to access any VMCS fields cause VM-exits. */ uint32_t const cbBitmap = X86_PAGE_4K_SIZE; uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap; uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap; ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff)); ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff)); /* * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the * VMREAD and VMWRITE bitmaps. */ { uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields; uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields; for (uint32_t i = 0; i < cShadowVmcsFields; i++) { uint32_t const uVmcsField = paShadowVmcsFields[i]; Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK)); Assert(uVmcsField >> 3 < cbBitmap); ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff); ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff); } } /* * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap * if the host supports VMWRITE to all supported VMCS fields. */ if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL) { uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields; uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields; for (uint32_t i = 0; i < cShadowVmcsRoFields; i++) { uint32_t const uVmcsField = paShadowVmcsRoFields[i]; Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK)); Assert(uVmcsField >> 3 < cbBitmap); ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff); } } } #endif /* VBOX_WITH_NESTED_HWVIRT_VMX */ /** * Sets up the virtual-APIC page address for the VMCS. * * @param pVmcsInfo The VMCS info. object. */ DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo) { RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic; Assert(HCPhysVirtApic != NIL_RTHCPHYS); Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic); AssertRC(rc); } /** * Sets up the MSR-bitmap address for the VMCS. * * @param pVmcsInfo The VMCS info. object. */ DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo) { RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap; Assert(HCPhysMsrBitmap != NIL_RTHCPHYS); Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap); AssertRC(rc); } /** * Sets up the APIC-access page address for the VMCS. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu) { RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess; Assert(HCPhysApicAccess != NIL_RTHCPHYS); Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess); AssertRC(rc); } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** * Sets up the VMREAD bitmap address for the VMCS. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu) { RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap; Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS); Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap); AssertRC(rc); } /** * Sets up the VMWRITE bitmap address for the VMCS. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu) { RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap; Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS); Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap); AssertRC(rc); } #endif /** * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses * in the VMCS. * * @returns VBox status code. * @param pVmcsInfo The VMCS info. object. */ DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo) { RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad; Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS); Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */ RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore; Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS); Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */ RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad; Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS); Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc); rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc); rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc); return VINF_SUCCESS; } /** * Sets up MSR permissions in the MSR bitmap of a VMCS info. object. * * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. */ static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo) { Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS); /* * By default, ensure guest attempts to access any MSR cause VM-exits. * This shall later be relaxed for specific MSRs as necessary. * * Note: For nested-guests, the entire bitmap will be merged prior to * executing the nested-guest using hardware-assisted VMX and hence there * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested. */ Assert(pVmcsInfo->pvMsrBitmap); ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff)); /* * The guest can access the following MSRs (read, write) without causing * VM-exits; they are loaded/stored automatically using fields in the VMCS. */ PVMCC pVM = pVCpu->CTX_SUFF(pVM); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR); /* * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state * associated with then. We never need to intercept access (writes need to be * executed without causing a VM-exit, reads will #GP fault anyway). * * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to * read/write them. We swap the guest/host MSR value using the * auto-load/store MSR area. */ if (pVM->cpum.ro.GuestFeatures.fIbpb) hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR); if (pVM->cpum.ro.GuestFeatures.fFlushCmd) hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR); if (pVM->cpum.ro.GuestFeatures.fIbrs) hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR); /* * Allow full read/write access for the following MSRs (mandatory for VT-x) * required for 64-bit guests. */ if (pVM->hmr0.s.fAllow64BitGuests) { hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR); hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR); } /* * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}. */ #ifdef VBOX_STRICT Assert(pVmcsInfo->pvMsrBitmap); uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER); Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR); #endif } /** * Sets up pin-based VM-execution controls in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. */ static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo) { PVMCC pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */ uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */ fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */ | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */ if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI) fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */ /* Enable the VMX-preemption timer. */ if (pVM->hmr0.s.vmx.fUsePreemptTimer) { Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER); fVal |= VMX_PIN_CTLS_PREEMPT_TIMER; } #if 0 /* Enable posted-interrupt processing. */ if (pVM->hm.s.fPostedIntrs) { Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT); Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT); fVal |= VMX_PIN_CTLS_POSTED_INT; } #endif if ((fVal & fZap) != fVal) { LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n", g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal); AssertRC(rc); pVmcsInfo->u32PinCtls = fVal; return VINF_SUCCESS; } /** * Sets up secondary processor-based VM-execution controls in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. */ static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo) { PVMCC pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */ uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ /* WBINVD causes a VM-exit. */ if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT) fVal |= VMX_PROC_CTLS2_WBINVD_EXIT; /* Enable EPT (aka nested-paging). */ if (pVM->hmr0.s.fNestedPaging) fVal |= VMX_PROC_CTLS2_EPT; /* Enable the INVPCID instruction if we expose it to the guest and is supported by the hardware. Without this, guest executing INVPCID would cause a #UD. */ if ( pVM->cpum.ro.GuestFeatures.fInvpcid && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID)) fVal |= VMX_PROC_CTLS2_INVPCID; /* Enable VPID. */ if (pVM->hmr0.s.vmx.fVpid) fVal |= VMX_PROC_CTLS2_VPID; /* Enable unrestricted guest execution. */ if (pVM->hmr0.s.vmx.fUnrestrictedGuest) fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST; #if 0 if (pVM->hm.s.fVirtApicRegs) { /* Enable APIC-register virtualization. */ Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT); fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT; /* Enable virtual-interrupt delivery. */ Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY); fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY; } #endif /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is where the TPR shadow resides. */ /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be * done dynamically. */ if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) { fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS; hmR0VmxSetupVmcsApicAccessAddr(pVCpu); } /* Enable the RDTSCP instruction if we expose it to the guest and is supported by the hardware. Without this, guest executing RDTSCP would cause a #UD. */ if ( pVM->cpum.ro.GuestFeatures.fRdTscP && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP)) fVal |= VMX_PROC_CTLS2_RDTSCP; /* Enable Pause-Loop exiting. */ if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT) && pVM->hm.s.vmx.cPleGapTicks && pVM->hm.s.vmx.cPleWindowTicks) { fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc); rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc); } if ((fVal & fZap) != fVal) { LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n", g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal); AssertRC(rc); pVmcsInfo->u32ProcCtls2 = fVal; return VINF_SUCCESS; } /** * Sets up processor-based VM-execution controls in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. */ static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo) { PVMCC pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */ uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */ | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */ | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */ | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */ | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */ | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */ | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */ /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */ if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT) || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT)) { pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */ if (!pVM->hmr0.s.fNestedPaging) { Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest); fVal |= VMX_PROC_CTLS_INVLPG_EXIT | VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR3_STORE_EXIT; } /* Use TPR shadowing if supported by the CPU. */ if ( PDMHasApic(pVM) && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)) { fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */ /* CR8 writes cause a VM-exit based on TPR threshold. */ Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT)); Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT)); hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo); } else { /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */ if (pVM->hmr0.s.fAllow64BitGuests) fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */ | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */ } /* Use MSR-bitmaps if supported by the CPU. */ if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS) { fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS; hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo); } /* Use the secondary processor-based VM-execution controls if supported by the CPU. */ if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS) fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS; if ((fVal & fZap) != fVal) { LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n", g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal); AssertRC(rc); pVmcsInfo->u32ProcCtls = fVal; /* Set up MSR permissions that don't change through the lifetime of the VM. */ if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo); /* Set up secondary processor-based VM-execution controls if the CPU supports it. */ if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS) return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo); /* Sanity check, should not really happen. */ if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest)) { /* likely */ } else { pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Old CPUs without secondary processor-based VM-execution controls would end up here. */ return VINF_SUCCESS; } /** * Sets up miscellaneous (everything other than Pin, Processor and secondary * Processor-based VM-execution) control fields in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. */ static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing) { hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu); hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu); } #endif Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS); int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS); AssertRC(rc); rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo); if (RT_SUCCESS(rc)) { uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu); uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu); rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc); pVmcsInfo->u64Cr0Mask = u64Cr0Mask; pVmcsInfo->u64Cr4Mask = u64Cr4Mask; if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr) { rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR); AssertRC(rc); } return VINF_SUCCESS; } else LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc)); return rc; } /** * Sets up the initial exception bitmap in the VMCS based on static conditions. * * We shall setup those exception intercepts that don't change during the * lifetime of the VM here. The rest are done dynamically while loading the * guest state. * * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. */ static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo) { /* * The following exceptions are always intercepted: * * #AC - To prevent the guest from hanging the CPU and for dealing with * split-lock detecting host configs. * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and * recursive #DBs can cause a CPU hang. * #PF - To sync our shadow page tables when nested-paging is not used. */ bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging; uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC) | RT_BIT(X86_XCPT_DB) | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF)); /* Commit it to the VMCS. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap); AssertRC(rc); /* Update our cache of the exception bitmap. */ pVmcsInfo->u32XcptBitmap = uXcptBitmap; } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX. * * @returns VBox status code. * @param pVmcsInfo The VMCS info. object. */ static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo) { Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS); int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS); AssertRC(rc); rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo); if (RT_SUCCESS(rc)) { if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo); /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */ Assert(!pVmcsInfo->u64Cr0Mask); Assert(!pVmcsInfo->u64Cr4Mask); return VINF_SUCCESS; } LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc)); return rc; } #endif /** * Selector FNHMSVMVMRUN implementation. */ static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume) { hmR0VmxUpdateStartVmFunction(pVCpu); return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume); } /** * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted * VMX. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object. * @param fIsNstGstVmcs Whether this is a nested-guest VMCS. */ static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs) { Assert(pVmcsInfo->pvVmcs); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */ *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID); const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS"; LogFlowFunc(("\n")); /* * Initialize the VMCS using VMCLEAR before loading the VMCS. * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine". */ int rc = hmR0VmxClearVmcs(pVmcsInfo); if (RT_SUCCESS(rc)) { rc = hmR0VmxLoadVmcs(pVmcsInfo); if (RT_SUCCESS(rc)) { /* * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS. * The host is always 64-bit since we no longer support 32-bit hosts. * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}. */ if (!fIsNstGstVmcs) { rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo); if (RT_SUCCESS(rc)) { rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo); if (RT_SUCCESS(rc)) { rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo); if (RT_SUCCESS(rc)) { hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * If a shadow VMCS is allocated for the VMCS info. object, initialize the * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS * making it fit for use when VMCS shadowing is later enabled. */ if (pVmcsInfo->pvShadowVmcs) { VMXVMCSREVID VmcsRevId; VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID); VmcsRevId.n.fIsShadowVmcs = 1; *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u; rc = vmxHCClearShadowVmcs(pVmcsInfo); if (RT_SUCCESS(rc)) { /* likely */ } else LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc)); } #endif } else LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc)); } else LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc)); } else LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc)); } else { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo); if (RT_SUCCESS(rc)) { /* likely */ } else LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc)); #else AssertFailed(); #endif } } else LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs)); } else LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs)); /* Sync any CPU internal VMCS data back into our VMCS in memory. */ if (RT_SUCCESS(rc)) { rc = hmR0VmxClearVmcs(pVmcsInfo); if (RT_SUCCESS(rc)) { /* likely */ } else LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs)); } /* * Update the last-error record both for failures and success, so we * can propagate the status code back to ring-3 for diagnostics. */ hmR0VmxUpdateErrorRecord(pVCpu, rc); NOREF(pszVmcs); return rc; } /** * Does global VT-x initialization (called during module initialization). * * @returns VBox status code. */ VMMR0DECL(int) VMXR0GlobalInit(void) { #ifdef HMVMX_USE_FUNCTION_TABLE AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers)); # ifdef VBOX_STRICT for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++) Assert(g_aVMExitHandlers[i].pfn); # endif #endif return VINF_SUCCESS; } /** * Does global VT-x termination (called during module termination). */ VMMR0DECL(void) VMXR0GlobalTerm() { /* Nothing to do currently. */ } /** * Sets up and activates VT-x on the current CPU. * * @returns VBox status code. * @param pHostCpu The HM physical-CPU structure. * @param pVM The cross context VM structure. Can be * NULL after a host resume operation. * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a * fEnabledByHost is @c true). * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if * @a fEnabledByHost is @c true). * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to * enable VT-x on the host. * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs. */ VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost, PCSUPHWVIRTMSRS pHwvirtMsrs) { AssertPtr(pHostCpu); AssertPtr(pHwvirtMsrs); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* Enable VT-x if it's not already enabled by the host. */ if (!fEnabledByHost) { int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage); if (RT_FAILURE(rc)) return rc; } /* * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been * using EPTPs) so we don't retain any stale guest-physical mappings which won't get * invalidated when flushing by VPID. */ if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) { hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS); pHostCpu->fFlushAsidBeforeUse = false; } else pHostCpu->fFlushAsidBeforeUse = true; /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */ ++pHostCpu->cTlbFlushes; return VINF_SUCCESS; } /** * Deactivates VT-x on the current CPU. * * @returns VBox status code. * @param pHostCpu The HM physical-CPU structure. * @param pvCpuPage Pointer to the VMXON region. * @param HCPhysCpuPage Physical address of the VMXON region. * * @remarks This function should never be called when SUPR0EnableVTx() or * similar was used to enable VT-x on the host. */ VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage) { RT_NOREF2(pvCpuPage, HCPhysCpuPage); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); return hmR0VmxLeaveRootMode(pHostCpu); } /** * Does per-VM VT-x initialization. * * @returns VBox status code. * @param pVM The cross context VM structure. */ VMMR0DECL(int) VMXR0InitVM(PVMCC pVM) { AssertPtr(pVM); LogFlowFunc(("pVM=%p\n", pVM)); hmR0VmxStructsInit(pVM); int rc = hmR0VmxStructsAlloc(pVM); if (RT_FAILURE(rc)) { LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc)); return rc; } /* Setup the crash dump page. */ #ifdef VBOX_WITH_CRASHDUMP_MAGIC strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic"); *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef); #endif return VINF_SUCCESS; } /** * Does per-VM VT-x termination. * * @returns VBox status code. * @param pVM The cross context VM structure. */ VMMR0DECL(int) VMXR0TermVM(PVMCC pVM) { AssertPtr(pVM); LogFlowFunc(("pVM=%p\n", pVM)); #ifdef VBOX_WITH_CRASHDUMP_MAGIC if (pVM->hmr0.s.vmx.pbScratch) RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE); #endif hmR0VmxStructsFree(pVM); return VINF_SUCCESS; } /** * Sets up the VM for execution using hardware-assisted VMX. * This function is only called once per-VM during initialization. * * @returns VBox status code. * @param pVM The cross context VM structure. */ VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM) { AssertPtr(pVM); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); LogFlowFunc(("pVM=%p\n", pVM)); /* * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not * without causing a #GP. */ RTCCUINTREG const uHostCr4 = ASMGetCR4(); if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE)) { /* likely */ } else return VERR_VMX_NOT_IN_VMX_ROOT_MODE; /* * Check that nested paging is supported if enabled and copy over the flag to the * ring-0 only structure. */ bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg; AssertReturn( !fNestedPaging || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */ VERR_INCOMPATIBLE_CONFIG); pVM->hmr0.s.fNestedPaging = fNestedPaging; pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg; /* * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must* * always be allocated. We no longer support the highly unlikely case of unrestricted guest * without pRealModeTSS, see hmR3InitFinalizeR0Intel(). */ bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg; AssertReturn( !fUnrestrictedGuest || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST) && fNestedPaging), VERR_INCOMPATIBLE_CONFIG); if ( !fUnrestrictedGuest && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable || !pVM->hm.s.vmx.pRealModeTSS)) { LogRelFunc(("Invalid real-on-v86 state.\n")); return VERR_INTERNAL_ERROR; } pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest; /* Initialize these always, see hmR3InitFinalizeR0().*/ pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE; pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE; /* Setup the tagged-TLB flush handlers. */ int rc = hmR0VmxSetupTaggedTlb(pVM); if (RT_FAILURE(rc)) { LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc)); return rc; } /* Determine LBR capabilities. */ pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg; if (pVM->hmr0.s.vmx.fLbr) { rc = hmR0VmxSetupLbrMsrRange(pVM); if (RT_FAILURE(rc)) { LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc)); return rc; } } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */ if (pVM->hmr0.s.vmx.fUseVmcsShadowing) { rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM); if (RT_SUCCESS(rc)) hmR0VmxSetupVmreadVmwriteBitmaps(pVM); else { LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc)); return rc; } } #endif for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++) { PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu); Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu)); pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector; rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */); if (RT_SUCCESS(rc)) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX if (pVM->cpum.ro.GuestFeatures.fVmx) { rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */); if (RT_SUCCESS(rc)) { /* likely */ } else { LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc)); return rc; } } #endif } else { LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc)); return rc; } } return VINF_SUCCESS; } /** * Saves the host control registers (CR0, CR3, CR4) into the host-state area in * the VMCS. * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs. */ static uint64_t hmR0VmxExportHostControlRegs(void) { int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc); uint64_t uHostCr4 = ASMGetCR4(); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc); return uHostCr4; } /** * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into * the host-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param uHostCr4 The host CR4 value. */ static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4) { /* * If we've executed guest code using hardware-assisted VMX, the host-state bits * will be messed up. We should -not- save the messed up state without restoring * the original host-state, see @bugref{7240}. * * This apparently can happen (most likely the FPU changes), deal with it rather than * asserting. Was observed booting Solaris 10u10 32-bit guest. */ if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED) { Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, pVCpu->idCpu)); VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost); pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0; } /* * Get all the host info. * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set * without also checking the cpuid bit. */ uint32_t fRestoreHostFlags; #if RT_INLINE_ASM_EXTERNAL if (uHostCr4 & X86_CR4_FSGSBASE) { hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/); fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE; } else { hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/); fRestoreHostFlags = 0; } RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES; RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS; RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS; RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS; #else pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR(); pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS(); pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS(); ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr); ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr); if (uHostCr4 & X86_CR4_FSGSBASE) { pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase(); pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase(); fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE; } else { pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE); pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE); fRestoreHostFlags = 0; } RTSEL uSelES, uSelDS, uSelFS, uSelGS; pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS(); pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES(); pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS(); pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS(); #endif /* * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to * gain VM-entry and restore them before we get preempted. * * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers". */ RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS; if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT)) { if (!(uSelAll & X86_SEL_LDT)) { #define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \ do { \ (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \ if ((a_uVmcsVar) & X86_SEL_RPL) \ { \ fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \ (a_uVmcsVar) = 0; \ } \ } while (0) VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS); VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES); VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS); VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS); #undef VMXLOCAL_ADJUST_HOST_SEG } else { #define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \ do { \ (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \ if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \ { \ if (!((a_uVmcsVar) & X86_SEL_LDT)) \ fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \ else \ { \ uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \ if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \ fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \ } \ (a_uVmcsVar) = 0; \ } \ } while (0) VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS); VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES); VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS); VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS); #undef VMXLOCAL_ADJUST_HOST_SEG } } /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */ Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT)); Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT)); Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT)); Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT)); Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT)); /* * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps * them to the maximum limit (0xffff) on every VM-exit. */ if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff) fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR; /* * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior. * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left * at 0xffff on hosts where we are sure it won't cause trouble. */ #if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff) #else if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff) #endif fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR; /* * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and * RPL should be too in most cases. */ RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR; AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb, ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb), VERR_VMX_INVALID_HOST_STATE); PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK)); uintptr_t const uTRBase = X86DESC64_BASE(pDesc); /* * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual * restoration if the host has something else. Task switching is not supported in 64-bit * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0. * * [1] See Intel spec. 3.5 "System Descriptor Types". * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode". */ Assert(pDesc->System.u4Type == 11); if ( pDesc->System.u16LimitLow != 0x67 || pDesc->System.u4LimitHigh) { fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR; /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */ if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY) fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY; if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE) { /* The GDT is read-only but the writable GDT is available. */ fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE; pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb; int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr); AssertRCReturn(rc, rc); } } pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags; /* * Do all the VMCS updates in one block to assist nested virtualization. */ int rc; rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc); rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc); rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc); rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc); rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc); rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc); rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc); return VINF_SUCCESS; } /** * Exports certain host MSRs in the VM-exit MSR-load area and some in the * host-state area of the VMCS. * * These MSRs will be automatically restored on the host after every successful * VM-exit. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu) { AssertPtr(pVCpu); /* * Save MSRs that we restore lazily (due to preemption or transition to ring-3) * rather than swapping them on every VM-entry. */ hmR0VmxLazySaveHostMsrs(pVCpu); /* * Host Sysenter MSRs. */ int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc); rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc); /* * Host EFER MSR. * * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs(). */ if (g_fHmVmxSupportsVmcsEfer) { rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer); AssertRC(rc); } /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see * hmR0VmxExportGuestEntryExitCtls(). */ } /** * Figures out if we need to swap the EFER MSR which is particularly expensive. * * We check all relevant bits. For now, that's everything besides LMA/LME, as * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls(). * * @returns true if we need to load guest EFER, false otherwise. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks Requires EFER, CR4. * @remarks No-long-jump zone!!! */ static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient) { #ifdef HMVMX_ALWAYS_SWAP_EFER RT_NOREF2(pVCpu, pVmxTransient); return true; #else PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer; uint64_t const u64GuestEfer = pCtx->msrEFER; # ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * For nested-guests, we shall honor swapping the EFER MSR when requested by * the nested-guest. */ if ( pVmxTransient->fIsNestedGuest && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR) || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR) || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR))) return true; # else RT_NOREF(pVmxTransient); #endif /* * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}. */ if ( CPUMIsGuestInLongModeEx(pCtx) && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE)) return true; /* * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR * as it affects guest paging. 64-bit paging implies CR4.PAE as well. * * See Intel spec. 4.5 "IA-32e Paging". * See Intel spec. 4.1.1 "Three Paging Modes". * * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to * import CR4 and CR0 from the VMCS here as those bits are always up to date. */ Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE); Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG); if ( (pCtx->cr4 & X86_CR4_PAE) && (pCtx->cr0 & X86_CR0_PG)) { /* * If nested paging is not used, verify that the guest paging mode matches the * shadow paging mode which is/will be placed in the VMCS (which is what will * actually be used while executing the guest and not the CR4 shadow value). */ AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX, ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode)); if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE)) { /* Verify that the host is NX capable. */ Assert(g_CpumHostFeatures.s.fNoExecute); return true; } } return false; #endif } /** * Exports the guest's RSP into the guest-state area in the VMCS. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP); int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp); AssertRC(rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP); Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp)); } } /** * Exports the guest hardware-virtualization state. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * Check if the VMX feature is exposed to the guest and if the host CPU supports * VMCS shadowing. */ if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing) { /* * If the nested hypervisor has loaded a current VMCS and is in VMX root mode, * copy the nested hypervisor's current VMCS into the shadow VMCS and enable * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits. * * We check for VMX root mode here in case the guest executes VMXOFF without * clearing the current VMCS pointer and our VMXOFF instruction emulation does * not clear the current VMCS pointer. */ PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx) && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx) && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx)) { /* Paranoia. */ Assert(!pVmxTransient->fIsNestedGuest); /* * For performance reasons, also check if the nested hypervisor's current VMCS * was newly loaded or modified before copying it to the shadow VMCS. */ if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs) { int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true; } vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo); } else vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo); } #else NOREF(pVmxTransient); #endif ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT); } return VINF_SUCCESS; } /** * Exports the guest debug registers into the guest-state area in the VMCS. * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3). * * This also sets up whether \#DB and MOV DRx accesses cause VM-exits. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction * stepping. */ PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; if (pVmxTransient->fIsNestedGuest) { int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu)); AssertRC(rc); /* * We don't want to always intercept MOV DRx for nested-guests as it causes * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}. * Instead, they are strictly only requested when the nested hypervisor intercepts * them -- handled while merging VMCS controls. * * If neither the outer nor the nested-hypervisor is intercepting MOV DRx, * then the nested-guest debug state should be actively loaded on the host so that * nested-guest reads its own debug registers without causing VM-exits. */ if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT) && !CPUMIsGuestDebugStateActive(pVCpu)) CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); return VINF_SUCCESS; } #ifdef VBOX_STRICT /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */ if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG) { /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */ Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0); Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); } #endif bool fSteppingDB = false; bool fInterceptMovDRx = false; uint32_t uProcCtls = pVmcsInfo->u32ProcCtls; if (pVCpu->hm.s.fSingleInstruction) { /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */ if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG) { uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG; Assert(fSteppingDB == false); } else { pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF; pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS; pVCpu->hmr0.s.fClearTrapFlag = true; fSteppingDB = true; } } uint64_t u64GuestDr7; if ( fSteppingDB || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK)) { /* * Use the combined guest and host DRx values found in the hypervisor register set * because the hypervisor debugger has breakpoints active or someone is single stepping * on the host side without a monitor trap flag. * * Note! DBGF expects a clean DR6 state before executing guest code. */ if (!CPUMIsHyperDebugStateActive(pVCpu)) { CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */); Assert(CPUMIsHyperDebugStateActive(pVCpu)); Assert(!CPUMIsGuestDebugStateActive(pVCpu)); } /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */ u64GuestDr7 = CPUMGetHyperDR7(pVCpu); pVCpu->hmr0.s.fUsingHyperDR7 = true; fInterceptMovDRx = true; } else { /* * If the guest has enabled debug registers, we need to load them prior to * executing guest code so they'll trigger at the right time. */ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7); if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) { if (!CPUMIsGuestDebugStateActive(pVCpu)) { CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); Assert(CPUMIsGuestDebugStateActive(pVCpu)); Assert(!CPUMIsHyperDebugStateActive(pVCpu)); STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); } Assert(!fInterceptMovDRx); } else if (!CPUMIsGuestDebugStateActive(pVCpu)) { /* * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we * must intercept #DB in order to maintain a correct DR6 guest value, and * because we need to intercept it to prevent nested #DBs from hanging the * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap(). */ fInterceptMovDRx = true; } /* Update DR7 with the actual guest value. */ u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7]; pVCpu->hmr0.s.fUsingHyperDR7 = false; } if (fInterceptMovDRx) uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT; else uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT; /* * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the * monitor-trap flag and update our cache. */ if (uProcCtls != pVmcsInfo->u32ProcCtls) { int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls); AssertRC(rc); pVmcsInfo->u32ProcCtls = uProcCtls; } /* * Update guest DR7. */ int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7); AssertRC(rc); /* * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger, * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure. * * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State". */ if (fSteppingDB) { Assert(pVCpu->hm.s.fSingleInstruction); Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF); uint32_t fIntrState = 0; rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState); AssertRC(rc); if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)) { fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS); rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState); AssertRC(rc); } } return VINF_SUCCESS; } /** * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store * areas. * * These MSRs will automatically be loaded to the host CPU on every successful * VM-entry and stored from the host CPU on every successful VM-exit. * * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The * actual host MSR values are not- updated here for performance reasons. See * hmR0VmxExportHostMsrs(). * * We also exports the guest sysenter MSRs into the guest-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient) { AssertPtr(pVCpu); AssertPtr(pVmxTransient); PVMCC pVM = pVCpu->CTX_SUFF(pVM); PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; /* * MSRs that we use the auto-load/store MSR area in the VMCS. * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(), * nothing to do here. The host MSR values are updated when it's safe in * hmR0VmxLazySaveHostMsrs(). * * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction * emulation. The merged MSR permission bitmap will ensure that we get VM-exits * for any MSR that are not part of the lazy MSRs so we do not need to place * those MSRs into the auto-load/store MSR area. Nothing to do here. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS) { /* No auto-load/store MSRs currently. */ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS); } /* * Guest Sysenter MSRs. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS); if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR) { int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs); AssertRC(rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR) { int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip); AssertRC(rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR) { int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp); AssertRC(rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR); } } /* * Guest/host EFER MSR. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR) { /* Whether we are using the VMCS to swap the EFER MSR must have been determined earlier while exporting VM-entry/VM-exit controls. */ Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS)); HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER); if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient)) { /* * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME). * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0. * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow) * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state * during VM-entry. */ uint64_t uGuestEferMsr = pCtx->msrEFER; if (!pVM->hmr0.s.vmx.fUnrestrictedGuest) { if (!(pCtx->msrEFER & MSR_K6_EFER_LMA)) uGuestEferMsr &= ~MSR_K6_EFER_LME; else Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)); } /* * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}. */ if (g_fHmVmxSupportsVmcsEfer) { int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr); AssertRC(rc); } else { /* * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}. */ int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr, false /* fSetReadWrite */, false /* fUpdateHostMsr */); AssertRCReturn(rc, rc); } Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER)); } else if (!g_fHmVmxSupportsVmcsEfer) hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR); } /* * Other MSRs. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS) { /* Speculation Control (R/W). */ HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS); if (pVM->cpum.ro.GuestFeatures.fIbrs) { int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu), false /* fSetReadWrite */, false /* fUpdateHostMsr */); AssertRCReturn(rc, rc); } /* Last Branch Record. */ if (pVM->hmr0.s.vmx.fLbr) { PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared; uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst; uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst; uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1; Assert(cLbrStack <= 32); for (uint32_t i = 0; i < cLbrStack; i++) { int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i, pVmcsInfoShared->au64LbrFromIpMsr[i], false /* fSetReadWrite */, false /* fUpdateHostMsr */); AssertRCReturn(rc, rc); /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */ if (idToIpMsrStart != 0) { rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i, pVmcsInfoShared->au64LbrToIpMsr[i], false /* fSetReadWrite */, false /* fUpdateHostMsr */); AssertRCReturn(rc, rc); } } /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */ int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr, pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */, false /* fUpdateHostMsr */); AssertRCReturn(rc, rc); } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS); } return VINF_SUCCESS; } /** * Wrapper for running the guest code in VT-x. * * @returns VBox status code, no informational status codes. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks No-long-jump zone!!! */ DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient) { /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */ pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM; PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED); #ifdef VBOX_WITH_STATISTICS if (fResumeVM) STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume); else STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch); #endif int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM); AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc)); return rc; } /** * Reports world-switch error and dumps some useful debug info. * * @param pVCpu The cross context virtual CPU structure. * @param rcVMRun The return code from VMLAUNCH/VMRESUME. * @param pVmxTransient The VMX-transient structure (only * exitReason updated). */ static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient) { Assert(pVCpu); Assert(pVmxTransient); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); Log4Func(("VM-entry failure: %Rrc\n", rcVMRun)); switch (rcVMRun) { case VERR_VMX_INVALID_VMXON_PTR: AssertFailed(); break; case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */ case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */ { int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason); rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); AssertRC(rc); vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu; /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted(). Cannot do it here as we may have been long preempted. */ #ifdef VBOX_STRICT PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason, pVmxTransient->uExitReason)); Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual)); Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError)); if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX) Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError])); else Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX)); Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu)); Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu)); static struct { /** Name of the field to log. */ const char *pszName; /** The VMCS field. */ uint32_t uVmcsField; /** Whether host support of this field needs to be checked. */ bool fCheckSupport; } const s_aVmcsFields[] = { { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false }, { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false }, { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true }, { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false }, { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false }, { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false }, { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false }, { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false }, { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false }, { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false }, { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false }, { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false }, { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false }, { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false }, { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false }, { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false }, { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false }, { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false }, { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false }, { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false }, { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true }, { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false }, { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false }, { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false }, { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, }, { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false }, { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false }, { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false }, /* The order of selector fields below are fixed! */ { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false }, { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false }, { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false }, { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false }, { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false }, { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false }, { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false }, /* End of ordered selector fields. */ { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false }, { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false }, { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false }, { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false }, { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false }, { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false }, { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false }, { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false } }; RTGDTR HostGdtr; ASMGetGDTR(&HostGdtr); uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields); for (uint32_t i = 0; i < cVmcsFields; i++) { uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField; bool fSupported; if (!s_aVmcsFields[i].fCheckSupport) fSupported = true; else { PVMCC pVM = pVCpu->CTX_SUFF(pVM); switch (uVmcsField) { case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break; case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break; case VMX_VMCS32_CTRL_PROC_EXEC2: fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS); break; default: AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField)); } } if (fSupported) { uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH); switch (uWidth) { case VMX_VMCSFIELD_WIDTH_16BIT: { uint16_t u16Val; rc = VMXReadVmcs16(uVmcsField, &u16Val); AssertRC(rc); Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val)); if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL && uVmcsField <= VMX_VMCS16_HOST_TR_SEL) { if (u16Val < HostGdtr.cbGdt) { /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */ static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS", "Host FS", "Host GS", "Host TR" }; uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX); Assert(idxSel < RT_ELEMENTS(s_apszSel)); PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]); } else Log4((" Selector value exceeds GDT limit!\n")); } break; } case VMX_VMCSFIELD_WIDTH_32BIT: { uint32_t u32Val; rc = VMXReadVmcs32(uVmcsField, &u32Val); AssertRC(rc); Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val)); break; } case VMX_VMCSFIELD_WIDTH_64BIT: case VMX_VMCSFIELD_WIDTH_NATURAL: { uint64_t u64Val; rc = VMXReadVmcs64(uVmcsField, &u64Val); AssertRC(rc); Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val)); break; } } } } Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER))); Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR))); Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR))); Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR))); Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK))); Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE))); #endif /* VBOX_STRICT */ break; } default: /* Impossible */ AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun)); break; } } /** * Sets up the usage of TSC-offsetting and updates the VMCS. * * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the * VMX-preemption timer. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * @param idCurrentCpu The current CPU number. * * @remarks No-long-jump zone!!! */ static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu) { bool fOffsettedTsc; bool fParavirtTsc; uint64_t uTscOffset; PVMCC pVM = pVCpu->CTX_SUFF(pVM); PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); if (pVM->hmr0.s.vmx.fUsePreemptTimer) { /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */ uint64_t cTicksToDeadline; if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion)) { STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline); fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc); cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc(); if ((int64_t)cTicksToDeadline > 0) { /* hopefully */ } else { STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired); cTicksToDeadline = 0; } } else { STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline); cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc, &pVCpu->hmr0.s.vmx.uTscDeadline, &pVCpu->hmr0.s.vmx.uTscDeadlineVersion); pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline; if (cTicksToDeadline >= 128) { /* hopefully */ } else STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired); } /* Make sure the returned values have sane upper and lower boundaries. */ uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet); cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */ cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */ cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift; /** @todo r=ramshankar: We need to find a way to integrate nested-guest * preemption timers here. We probably need to clamp the preemption timer, * after converting the timer value to the host. */ uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16); int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount); AssertRC(rc); } else fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc); if (fParavirtTsc) { /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC information before every VM-entry, hence disable it for performance sake. */ #if 0 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */); AssertRC(rc); #endif STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt); } if ( fOffsettedTsc && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit)) { if (pVmxTransient->fIsNestedGuest) uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset); hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset); hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT); } else { /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */ hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT); } } /** * Saves the guest state from the VMCS into the guest-CPU context. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param fWhat What to import, CPUMCTX_EXTRN_XXX. */ VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat) { AssertPtr(pVCpu); PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat); } /** * Gets VMX VM-exit auxiliary information. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmxExitAux Where to store the VM-exit auxiliary info. * @param fWhat What to fetch, HMVMX_READ_XXX. */ VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat) { PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient; if (RT_LIKELY(pVmxTransient)) { AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead)); /* The exit reason is always available. */ pVmxExitAux->uReason = pVmxTransient->uExitReason; if (fWhat & HMVMX_READ_EXIT_QUALIFICATION) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->u64Qual = pVmxTransient->uExitQual; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION; #endif } if (fWhat & HMVMX_READ_IDT_VECTORING_INFO) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO; #endif } if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE; #endif } if (fWhat & HMVMX_READ_EXIT_INSTR_LEN) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN; #endif } if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO; #endif } if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE; #endif } if (fWhat & HMVMX_READ_EXIT_INSTR_INFO) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO; #endif } if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR; #endif } if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR) { vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr; #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR; #endif } if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX vmxHCReadToTransientSlow(pVCpu, pVmxTransient); pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts; #else pVmxExitAux->u64GuestPendingDbgXcpts = 0; #endif #ifdef VBOX_STRICT fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS; #endif } AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead)); return VINF_SUCCESS; } return VERR_NOT_AVAILABLE; } /** * Does the necessary state syncing before returning to ring-3 for any reason * (longjmp, preemption, voluntary exits to ring-3) from VT-x. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param fImportState Whether to import the guest state from the VMCS back * to the guest-CPU context. * * @remarks No-long-jmp zone!!! */ static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); RTCPUID const idCpu = RTMpCpuId(); Log4Func(("HostCpuId=%u\n", idCpu)); /* * !!! IMPORTANT !!! * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too. */ /* Save the guest state if necessary. */ PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); if (fImportState) { int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc, rc); } /* Restore host FPU state if necessary. We will resync on next R0 reentry. */ CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu); Assert(!CPUMIsGuestFPUStateActive(pVCpu)); /* Restore host debug registers if necessary. We will resync on next R0 reentry. */ #ifdef VBOX_STRICT if (CPUMIsHyperDebugStateActive(pVCpu)) Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT); #endif CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */); Assert(!CPUMIsGuestDebugStateActive(pVCpu)); Assert(!CPUMIsHyperDebugStateActive(pVCpu)); /* Restore host-state bits that VT-x only restores partially. */ if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED) { Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu)); VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost); } pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0; /* Restore the lazy host MSRs as we're leaving VT-x context. */ if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) { /* We shouldn't restore the host MSRs without saving the guest MSRs first. */ if (!fImportState) { int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS); AssertRCReturn(rc, rc); } hmR0VmxLazyRestoreHostMsrs(pVCpu); Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs); } else pVCpu->hmr0.s.vmx.fLazyMsrs = 0; /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */ pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false; STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry); STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3); VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); /** @todo This partially defeats the purpose of having preemption hooks. * The problem is, deregistering the hooks should be moved to a place that * lasts until the EMT is about to be destroyed not everytime while leaving HM * context. */ int rc = hmR0VmxClearVmcs(pVmcsInfo); AssertRCReturn(rc, rc); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * A valid shadow VMCS is made active as part of VM-entry. It is necessary to * clear a shadow VMCS before allowing that VMCS to become active on another * logical processor. We may or may not be importing guest state which clears * it, so cover for it here. * * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures". */ if ( pVmcsInfo->pvShadowVmcs && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR) { rc = vmxHCClearShadowVmcs(pVmcsInfo); AssertRCReturn(rc, rc); } /* * Flag that we need to re-export the host state if we switch to this VMCS before * executing guest or nested-guest code. */ pVmcsInfo->idHostCpuState = NIL_RTCPUID; #endif Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu)); NOREF(idCpu); return VINF_SUCCESS; } /** * Leaves the VT-x session. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jmp zone!!! */ static int hmR0VmxLeaveSession(PVMCPUCC pVCpu) { HM_DISABLE_PREEMPT(pVCpu); HMVMX_ASSERT_CPU_SAFE(pVCpu); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before and done this from the VMXR0ThreadCtxCallback(). */ if (!pVCpu->hmr0.s.fLeaveDone) { int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */); AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2); pVCpu->hmr0.s.fLeaveDone = true; } Assert(!pVCpu->cpum.GstCtx.fExtrn); /* * !!! IMPORTANT !!! * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too. */ /* Deregister hook now that we've left HM context before re-enabling preemption. */ /** @todo Deregistering here means we need to VMCLEAR always * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need * for calling VMMR0ThreadCtxHookDisable here! */ VMMR0ThreadCtxHookDisable(pVCpu); /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */ int rc = HMR0LeaveCpu(pVCpu); HM_RESTORE_PREEMPT(); return rc; } /** * Take necessary actions before going back to ring-3. * * An action requires us to go back to ring-3. This function does the necessary * steps before we can safely return to ring-3. This is not the same as longjmps * to ring-3, this is voluntary and prepares the guest so it may continue * executing outside HM (recompiler/IEM). * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param rcExit The reason for exiting to ring-3. Can be * VINF_VMM_UNKNOWN_RING3_CALL. */ static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit) { HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR)) { VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs); pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs; pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu; /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */ } /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */ VMMRZCallRing3Disable(pVCpu); Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit))); /* * Convert any pending HM events back to TRPM due to premature exits to ring-3. * We need to do this only on returns to ring-3 and not for longjmps to ring3. * * This is because execution may continue from ring-3 and we would need to inject * the event from there (hence place it back in TRPM). */ if (pVCpu->hm.s.Event.fPending) { vmxHCPendingEventToTrpmTrap(pVCpu); Assert(!pVCpu->hm.s.Event.fPending); /* Clear the events from the VMCS. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc); rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc); } #ifdef VBOX_STRICT /* * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are * fatal), we don't care about verifying duplicate injection of events. Errors like * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this * function so those should and will be checked below. */ else if (RT_SUCCESS(rcExit)) { /* * Ensure we don't accidentally clear a pending HM event without clearing the VMCS. * This can be pretty hard to debug otherwise, interrupts might get injected twice * occasionally, see @bugref{9180#c42}. * * However, if the VM-entry failed, any VM entry-interruption info. field would * be left unmodified as the event would not have been injected to the guest. In * such cases, don't assert, we're not going to continue guest execution anyway. */ uint32_t uExitReason; uint32_t uEntryIntInfo; int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason); rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo); AssertRC(rc); AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo), ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit))); } #endif /* * Clear the interrupt-window and NMI-window VMCS controls as we could have got * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits * (e.g. TPR below threshold). */ if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) { vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo); vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo); } /* If we're emulating an instruction, we shouldn't have any TRPM traps pending and if we're injecting an event we should have a TRPM trap pending. */ AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit))); #ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */ AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit))); #endif /* Save guest state and restore host state bits. */ int rc = hmR0VmxLeaveSession(pVCpu); AssertRCReturn(rc, rc); STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3); /* Thread-context hooks are unregistered at this point!!! */ /* Ring-3 callback notifications are unregistered at this point!!! */ /* Sync recompiler state. */ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS); if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)) CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH); Assert(!pVCpu->hmr0.s.fClearTrapFlag); /* Update the exit-to-ring 3 reason. */ pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit); /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */ if ( rcExit != VINF_EM_RAW_INTERRUPT || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) { Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL)); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); } STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3); VMMRZCallRing3Enable(pVCpu); return rc; } /** * VMMRZCallRing3() callback wrapper which saves the guest state before we * longjump due to a ring-0 assertion. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu) { /* * !!! IMPORTANT !!! * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too. * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions. */ VMMR0AssertionRemoveNotification(pVCpu); VMMRZCallRing3Disable(pVCpu); HM_DISABLE_PREEMPT(pVCpu); PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL); CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu); CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */); /* Restore host-state bits that VT-x only restores partially. */ if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED) VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost); pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0; /* Restore the lazy host MSRs as we're leaving VT-x context. */ if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) hmR0VmxLazyRestoreHostMsrs(pVCpu); /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */ pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false; VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); /* Clear the current VMCS data back to memory (shadow VMCS if any would have been cleared as part of importing the guest state above. */ hmR0VmxClearVmcs(pVmcsInfo); /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */ VMMR0ThreadCtxHookDisable(pVCpu); /* Leave HM context. This takes care of local init (term). */ HMR0LeaveCpu(pVCpu); HM_RESTORE_PREEMPT(); return VINF_SUCCESS; } /** * Enters the VT-x session. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu) { AssertPtr(pVCpu); Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); LogFlowFunc(("pVCpu=%p\n", pVCpu)); Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)) == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)); #ifdef VBOX_STRICT /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */ RTCCUINTREG uHostCr4 = ASMGetCR4(); if (!(uHostCr4 & X86_CR4_VMXE)) { LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n")); return VERR_VMX_X86_CR4_VMXE_CLEARED; } #endif /* * Do the EMT scheduled L1D and MDS flush here if needed. */ if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED) ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D); else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED) hmR0MdsClear(); /* * Load the appropriate VMCS as the current and active one. */ PVMXVMCSINFO pVmcsInfo; bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx); if (!fInNestedGuestMode) pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo; else pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst; int rc = hmR0VmxLoadVmcs(pVmcsInfo); if (RT_SUCCESS(rc)) { pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode; pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode; pVCpu->hmr0.s.fLeaveDone = false; Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId())); } return rc; } /** * The thread-context callback. * * This is used together with RTThreadCtxHookCreate() on platforms which * supports it, and directly from VMMR0EmtPrepareForBlocking() and * VMMR0EmtResumeAfterBlocking() on platforms which don't. * * @param enmEvent The thread-context event. * @param pVCpu The cross context virtual CPU structure. * @param fGlobalInit Whether global VT-x/AMD-V init. was used. * @thread EMT(pVCpu) */ VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit) { AssertPtr(pVCpu); RT_NOREF1(fGlobalInit); switch (enmEvent) { case RTTHREADCTXEVENT_OUT: { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); VMCPU_ASSERT_EMT(pVCpu); /* No longjmps (logger flushes, locks) in this fragile context. */ VMMRZCallRing3Disable(pVCpu); Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId())); /* Restore host-state (FPU, debug etc.) */ if (!pVCpu->hmr0.s.fLeaveDone) { /* * Do -not- import the guest-state here as we might already be in the middle of importing * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState(). */ hmR0VmxLeave(pVCpu, false /* fImportState */); pVCpu->hmr0.s.fLeaveDone = true; } /* Leave HM context, takes care of local init (term). */ int rc = HMR0LeaveCpu(pVCpu); AssertRC(rc); /* Restore longjmp state. */ VMMRZCallRing3Enable(pVCpu); STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt); break; } case RTTHREADCTXEVENT_IN: { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); VMCPU_ASSERT_EMT(pVCpu); /* Do the EMT scheduled L1D and MDS flush here if needed. */ if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED) ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D); else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED) hmR0MdsClear(); /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */ VMMRZCallRing3Disable(pVCpu); Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId())); /* Initialize the bare minimum state required for HM. This takes care of initializing VT-x if necessary (onlined CPUs, local init etc.) */ int rc = hmR0EnterCpu(pVCpu); AssertRC(rc); Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)) == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)); /* Load the active VMCS as the current one. */ PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); rc = hmR0VmxLoadVmcs(pVmcsInfo); AssertRC(rc); Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId())); pVCpu->hmr0.s.fLeaveDone = false; /* Restore longjmp state. */ VMMRZCallRing3Enable(pVCpu); break; } default: break; } } /** * Exports the host state into the VMCS host-state area. * Sets up the VM-exit MSR-load area. * * The CPU state will be loaded from these fields on every successful VM-exit. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportHostState(PVMCPUCC pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); int rc = VINF_SUCCESS; if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT) { uint64_t uHostCr4 = hmR0VmxExportHostControlRegs(); rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); hmR0VmxExportHostMsrs(pVCpu); pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT; } return rc; } /** * Saves the host state in the VMCS host-state. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu) { AssertPtr(pVCpu); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* * Export the host state here while entering HM context. * When thread-context hooks are used, we might get preempted and have to re-save the host * state but most of the time we won't be, so do it here before we disable interrupts. */ return hmR0VmxExportHostState(pVCpu); } /** * Exports the guest state into the VMCS guest-state area. * * The will typically be done before VM-entry when the guest-CPU state and the * VMCS state may potentially be out of sync. * * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the * VM-entry controls. * Sets up the appropriate VMX non-root function to execute guest code based on * the guest CPU mode. * * @returns VBox strict status code. * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code * without unrestricted guest execution and the VMMDev is not presently * mapped (e.g. EFI32). * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks No-long-jump zone!!! */ static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient) { AssertPtr(pVCpu); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); LogFlowFunc(("pVCpu=%p\n", pVCpu)); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x); /* * Determine real-on-v86 mode. * Used when the guest is in real-mode and unrestricted guest execution is not used. */ PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared; if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx)) pVmcsInfoShared->RealMode.fRealOnV86Active = false; else { Assert(!pVmxTransient->fIsNestedGuest); pVmcsInfoShared->RealMode.fRealOnV86Active = true; } /* * Any ordering dependency among the sub-functions below must be explicitly stated using comments. * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it. */ int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient); if (rcStrict == VINF_SUCCESS) { /* likely */ } else { Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict)); return rcStrict; } rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); vmxHCExportGuestApicTpr(pVCpu, pVmxTransient); vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient); vmxHCExportGuestRip(pVCpu); hmR0VmxExportGuestRsp(pVCpu); vmxHCExportGuestRflags(pVCpu, pVmxTransient); rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP) | HM_CHANGED_GUEST_CR2 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7) | HM_CHANGED_GUEST_X87 | HM_CHANGED_GUEST_SSE_AVX | HM_CHANGED_GUEST_OTHER_XSAVE | HM_CHANGED_GUEST_XCRx | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */ | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */ | HM_CHANGED_GUEST_TSC_AUX | HM_CHANGED_GUEST_OTHER_MSRS | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK))); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x); return rc; } /** * Exports the state shared between the host and guest into the VMCS. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK) { int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient); AssertRC(rc); pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK; /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS) vmxHCExportGuestRflags(pVCpu, pVmxTransient); } if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS) { hmR0VmxLazyLoadGuestMsrs(pVCpu); pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS; } AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE), ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged)); } /** * Worker for loading the guest-state bits in the inner VT-x execution loop. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code * without unrestricted guest execution and the VMMDev is not presently * mapped (e.g. EFI32). * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks No-long-jump zone!!! */ static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); #ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); #endif /* * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest) * changes. First try to export only these without going through all other changed-flag checks. */ VBOXSTRICTRC rcStrict; uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE; uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT; uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged); /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/ if ( (fCtxChanged & fMinimalMask) && !(fCtxChanged & (fCtxMask & ~fMinimalMask))) { vmxHCExportGuestRip(pVCpu); hmR0VmxExportGuestRsp(pVCpu); vmxHCExportGuestRflags(pVCpu, pVmxTransient); rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient); STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal); } /* If anything else also changed, go through the full export routine and export as required. */ else if (fCtxChanged & fCtxMask) { rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* likely */} else { AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); return rcStrict; } STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull); } /* Nothing changed, nothing to load here. */ else rcStrict = VINF_SUCCESS; #ifdef VBOX_STRICT /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */ uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged); AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur)); #endif return rcStrict; } /** * Map the APIC-access page for virtualizing APIC accesses. * * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence, * this not done as part of exporting guest state, see @bugref{8721}. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param GCPhysApicBase The guest-physical address of the APIC access page. */ static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase) { PVMCC pVM = pVCpu->CTX_SUFF(pVM); Assert(GCPhysApicBase); LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase)); /* Unalias the existing mapping. */ int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase); AssertRCReturn(rc, rc); /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */ Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS); rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P); AssertRCReturn(rc, rc); return VINF_SUCCESS; } /** * Worker function passed to RTMpOnSpecific() that is to be called on the target * CPU. * * @param idCpu The ID for the CPU the function is called on. * @param pvUser1 Null, not used. * @param pvUser2 Null, not used. */ static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2) { RT_NOREF3(idCpu, pvUser1, pvUser2); VMXDispatchHostNmi(); } /** * Dispatching an NMI on the host CPU that received it. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was * executing when receiving the host NMI in VMX non-root * operation. */ static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo) { RTCPUID const idCpu = pVmcsInfo->idHostCpuExec; Assert(idCpu != NIL_RTCPUID); /* * We don't want to delay dispatching the NMI any more than we have to. However, * we have already chosen -not- to dispatch NMIs when interrupts were still disabled * after executing guest or nested-guest code for the following reasons: * * - We would need to perform VMREADs with interrupts disabled and is orders of * magnitude worse when we run as a nested hypervisor without VMCS shadowing * supported by the host hypervisor. * * - It affects the common VM-exit scenario and keeps interrupts disabled for a * longer period of time just for handling an edge case like host NMIs which do * not occur nearly as frequently as other VM-exits. * * Let's cover the most likely scenario first. Check if we are on the target CPU * and dispatch the NMI right away. This should be much faster than calling into * RTMpOnSpecific() machinery. */ bool fDispatched = false; RTCCUINTREG const fEFlags = ASMIntDisableFlags(); if (idCpu == RTMpCpuId()) { VMXDispatchHostNmi(); fDispatched = true; } ASMSetFlags(fEFlags); if (fDispatched) { STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC); return VINF_SUCCESS; } /* * RTMpOnSpecific() waits until the worker function has run on the target CPU. So * there should be no race or recursion even if we are unlucky enough to be preempted * (to the target CPU) without dispatching the host NMI above. */ STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi); return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */); } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** * Merges the guest with the nested-guest MSR bitmap in preparation of executing the * nested-guest using hardware-assisted VMX. * * @param pVCpu The cross context virtual CPU structure. * @param pVmcsInfoNstGst The nested-guest VMCS info. object. * @param pVmcsInfoGst The guest VMCS info. object. */ static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst) { uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE; uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap; Assert(pu64MsrBitmap); /* * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any * MSR that is intercepted by the guest is also intercepted while executing the * nested-guest using hardware-assisted VMX. * * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a * nested-guest VM-exit even if the outer guest is not intercepting some * MSRs. We cannot assume the caller has initialized the nested-guest * MSR bitmap in this case. * * The nested hypervisor may also switch whether it uses MSR bitmaps for * each of its VM-entry, hence initializing it once per-VM while setting * up the nested-guest VMCS is not sufficient. */ PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs; if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) { uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0]; uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap; Assert(pu64MsrBitmapNstGst); Assert(pu64MsrBitmapGst); /** @todo Detect and use EVEX.POR? */ uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t); for (uint32_t i = 0; i < cFrags; i++) pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i]; } else ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff)); } /** * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of * hardware-assisted VMX execution of the nested-guest. * * For a guest, we don't modify these controls once we set up the VMCS and hence * this function is never called. * * For nested-guests since the nested hypervisor provides these controls on every * nested-guest VM-entry and could potentially change them everytime we need to * merge them before every nested-guest VM-entry. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu) { PVMCC const pVM = pVCpu->CTX_SUFF(pVM); PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo; PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs; /* * Merge the controls with the requirements of the guest VMCS. * * We do not need to validate the nested-guest VMX features specified in the nested-guest * VMCS with the features supported by the physical CPU as it's already done by the * VMLAUNCH/VMRESUME instruction emulation. * * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are * derived from the VMX features supported by the physical CPU. */ /* Pin-based VM-execution controls. */ uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls; /* Processor-based VM-execution controls. */ uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS) | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT | VMX_PROC_CTLS_NMI_WINDOW_EXIT | VMX_PROC_CTLS_MOV_DR_EXIT | VMX_PROC_CTLS_USE_TPR_SHADOW | VMX_PROC_CTLS_MONITOR_TRAP_FLAG)); /* Secondary processor-based VM-execution controls. */ uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID) | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS | VMX_PROC_CTLS2_INVPCID | VMX_PROC_CTLS2_VMCS_SHADOWING | VMX_PROC_CTLS2_RDTSCP | VMX_PROC_CTLS2_XSAVES_XRSTORS | VMX_PROC_CTLS2_APIC_REG_VIRT | VMX_PROC_CTLS2_VIRT_INT_DELIVERY | VMX_PROC_CTLS2_VMFUNC)); /* * VM-entry controls: * These controls contains state that depends on the nested-guest state (primarily * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest * VM-exit. Although the nested hypervisor cannot change it, we need to in order to * properly continue executing the nested-guest if the EFER MSR changes but does not * cause a nested-guest VM-exits. * * VM-exit controls: * These controls specify the host state on return. We cannot use the controls from * the nested hypervisor state as is as it would contain the guest state rather than * the host state. Since the host state is subject to change (e.g. preemption, trips * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant * through VMLAUNCH/VMRESUME and the nested-guest VM-exit. * * VM-entry MSR-load: * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU * context by the VMLAUNCH/VMRESUME instruction emulation. * * VM-exit MSR-store: * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context * back into the VM-exit MSR-store area. * * VM-exit MSR-load areas: * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we * can entirely ignore what the nested hypervisor wants to load here. */ /* * Exception bitmap. * * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap * here (and avoid doing anything while exporting nested-guest state), but to keep the * code more flexible if intercepting exceptions become more dynamic in the future we do * it as part of exporting the nested-guest state. */ uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap; /* * CR0/CR4 guest/host mask. * * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must * cause VM-exits, so we need to merge them here. */ uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask; uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask; /* * Page-fault error-code mask and match. * * Although we require unrestricted guest execution (and thereby nested-paging) for * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't * normally intercept #PFs, it might intercept them for debugging purposes. * * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters. * If the outer guest is intercepting #PFs, we must intercept all #PFs. */ uint32_t u32XcptPFMask; uint32_t u32XcptPFMatch; if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF))) { u32XcptPFMask = pVmcsNstGst->u32XcptPFMask; u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch; } else { u32XcptPFMask = 0; u32XcptPFMatch = 0; } /* * Pause-Loop exiting. */ /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how * this will work... */ uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap); uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow); /* * Pending debug exceptions. * Currently just copy whatever the nested-guest provides us. */ uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u; /* * I/O Bitmap. * * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always * intercept all I/O port accesses. */ Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT); Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS)); /* * VMCS shadowing. * * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be * enabled while executing the nested-guest. */ Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)); /* * APIC-access page. */ RTHCPHYS HCPhysApicAccess; if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) { Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS); RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u; void *pvPage; PGMPAGEMAPLOCK PgLockApicAccess; int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess); if (RT_SUCCESS(rc)) { rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess); AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc); /** @todo Handle proper releasing of page-mapping lock later. */ PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess); } else return rc; } else HCPhysApicAccess = 0; /* * Virtual-APIC page and TPR threshold. */ RTHCPHYS HCPhysVirtApic; uint32_t u32TprThreshold; if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW) { Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW); RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u; void *pvPage; PGMPAGEMAPLOCK PgLockVirtApic; int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic); if (RT_SUCCESS(rc)) { rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic); AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc); /** @todo Handle proper releasing of page-mapping lock later. */ PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic); } else return rc; u32TprThreshold = pVmcsNstGst->u32TprThreshold; } else { HCPhysVirtApic = 0; u32TprThreshold = 0; /* * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will * be taken care of by EPT/shadow paging. */ if (pVM->hmr0.s.fAllow64BitGuests) u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT | VMX_PROC_CTLS_CR8_LOAD_EXIT; } /* * Validate basic assumptions. */ PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst; Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest); Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS); Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst); /* * Commit it to the nested-guest VMCS. */ int rc = VINF_SUCCESS; if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls); if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls); if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2); if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap); if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask) rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask) rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask); if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch); if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT) && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)) { Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks); } if (pVmcsInfoNstGst->HCPhysVirtApic != HCPhysVirtApic) rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold); if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess); rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts); AssertRC(rc); /* * Update the nested-guest VMCS cache. */ pVmcsInfoNstGst->u32PinCtls = u32PinCtls; pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls; pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2; pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap; pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask; pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask; pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask; pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch; pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic; /* * We need to flush the TLB if we are switching the APIC-access page address. * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction". */ if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true; /* * MSR bitmap. * * The MSR bitmap address has already been initialized while setting up the nested-guest * VMCS, here we need to merge the MSR bitmaps. */ if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst); return VINF_SUCCESS; } #endif /* VBOX_WITH_NESTED_HWVIRT_VMX */ /** * Does the preparations before executing guest code in VT-x. * * This may cause longjmps to ring-3 and may even result in rescheduling to the * recompiler/IEM. We must be cautious what we do here regarding committing * guest-state information into the VMCS assuming we assuredly execute the * guest in VT-x mode. * * If we fall back to the recompiler/IEM after updating the VMCS and clearing * the common-state (TRPM/forceflags), we must undo those changes so that the * recompiler/IEM can (and should) use them when it resumes guest execution. * Otherwise such operations must be done when we can no longer exit to ring-3. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts * have been disabled. * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating * pending events). * @retval VINF_EM_RESET if a triple-fault occurs while injecting a * double-fault into the guest. * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was * dispatched directly. * @retval VINF_* scheduling changes, we have to go back to ring-3. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * @param fStepping Whether we are single-stepping the guest in the * hypervisor debugger. Makes us ignore some of the reasons * for returning to ring-3, and return VINF_EM_DBG_STEPPED * if event dispatching took place. */ static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping) { Assert(VMMRZCallRing3IsEnabled(pVCpu)); Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping)); #ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM if (pVmxTransient->fIsNestedGuest) { RT_NOREF2(pVCpu, fStepping); Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n")); return VINF_EM_RESCHEDULE_REM; } #endif /* * Check and process force flag actions, some of which might require us to go back to ring-3. */ VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping); if (rcStrict == VINF_SUCCESS) { /* FFs don't get set all the time. */ #ifdef VBOX_WITH_NESTED_HWVIRT_VMX if ( pVmxTransient->fIsNestedGuest && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit); return VINF_VMX_VMEXIT; } #endif } else return rcStrict; /* * Virtualize memory-mapped accesses to the physical APIC (may take locks). */ PVMCC pVM = pVCpu->CTX_SUFF(pVM); if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) && PDMHasApic(pVM)) { /* Get the APIC base MSR from the virtual APIC device. */ uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu); /* Map the APIC access page. */ int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK); AssertRCReturn(rc, rc); /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */ pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr; } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * Merge guest VMCS controls with the nested-guest VMCS controls. * * Even if we have not executed the guest prior to this (e.g. when resuming from a * saved state), we should be okay with merging controls as we initialize the * guest VMCS controls as part of VM setup phase. */ if ( pVmxTransient->fIsNestedGuest && !pVCpu->hm.s.vmx.fMergedNstGstCtls) { int rc = hmR0VmxMergeVmcsNested(pVCpu); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.fMergedNstGstCtls = true; } #endif /* * Evaluate events to be injected into the guest. * * Events in TRPM can be injected without inspecting the guest state. * If any new events (interrupts/NMI) are pending currently, we try to set up the * guest to cause a VM-exit the next time they are ready to receive the event. */ if (TRPMHasTrap(pVCpu)) vmxHCTrpmTrapToPendingEvent(pVCpu); uint32_t fIntrState; rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest, &fIntrState); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * While evaluating pending events if something failed (unlikely) or if we were * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail. */ if (rcStrict != VINF_SUCCESS) return rcStrict; if ( pVmxTransient->fIsNestedGuest && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit); return VINF_VMX_VMEXIT; } #else Assert(rcStrict == VINF_SUCCESS); #endif /* * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus * needs to be done with longjmps or interrupts + preemption enabled. Event injection might * also result in triple-faulting the VM. * * With nested-guests, the above does not apply since unrestricted guest execution is a * requirement. Regardless, we do this here to avoid duplicating code elsewhere. */ rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest, fIntrState, fStepping); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* likely */ } else { AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping), ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } /* * A longjump might result in importing CR3 even for VM-exits that don't necessarily * import CR3 themselves. We will need to update them here, as even as late as the above * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing * the below force flags to be set. */ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) { Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3)); int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu)); AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3, ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* Paranoia. */ Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)); #endif /* * No longjmps to ring-3 from this point on!!! * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic. * This also disables flushing of the R0-logger instance (if any). */ VMMRZCallRing3Disable(pVCpu); /* * Export the guest state bits. * * We cannot perform longjmps while loading the guest state because we do not preserve the * host/guest state (although the VMCS will be preserved) across longjmps which can cause * CPU migration. * * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment * registers. Hence, exporting of the guest state needs to be done -after- injection of events. */ rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* likely */ } else { VMMRZCallRing3Enable(pVCpu); return rcStrict; } /* * We disable interrupts so that we don't miss any interrupts that would flag preemption * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with * preemption disabled for a while. Since this is purely to aid the * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and * disable interrupt on NT. * * We need to check for force-flags that could've possible been altered since we last * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section, * see @bugref{6398}). * * We also check a couple of other force-flags as a last opportunity to get the EMT back * to ring-3 before executing guest code. */ pVmxTransient->fEFlags = ASMIntDisableFlags(); if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC) && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) ) { if (!RTThreadPreemptIsPending(NIL_RTTHREAD)) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * If we are executing a nested-guest make sure that we should intercept subsequent * events. The one we are injecting might be part of VM-entry. This is mainly to keep * the VM-exit instruction emulation happy. */ if (pVmxTransient->fIsNestedGuest) CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true); #endif /* * We've injected any pending events. This is really the point of no return (to ring-3). * * Note! The caller expects to continue with interrupts & longjmps disabled on successful * returns from this function, so do -not- enable them here. */ pVCpu->hm.s.Event.fPending = false; return VINF_SUCCESS; } STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq); rcStrict = VINF_EM_RAW_INTERRUPT; } else { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); rcStrict = VINF_EM_RAW_TO_R3; } ASMSetFlags(pVmxTransient->fEFlags); VMMRZCallRing3Enable(pVCpu); return rcStrict; } /** * Final preparations before executing guest code using hardware-assisted VMX. * * We can no longer get preempted to a different host CPU and there are no returns * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE * failures), this function is not intended to fail sans unrecoverable hardware * errors. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * * @remarks Called with preemption disabled. * @remarks No-long-jump zone!!! */ static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient) { Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!pVCpu->hm.s.Event.fPending); /* * Indicate start of guest execution and where poking EMT out of guest-context is recognized. */ VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM); VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); PVMCC pVM = pVCpu->CTX_SUFF(pVM); PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu(); RTCPUID const idCurrentCpu = pHostCpu->idCpu; if (!CPUMIsGuestFPUStateActive(pVCpu)) { STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x); if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED) pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT; STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x); STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu); } /* * Re-export the host state bits as we may've been preempted (only happens when * thread-context hooks are used or when the VM start function changes) or if * the host CR0 is modified while loading the guest FPU state above. * * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we * changed the switcher back to 32-bit, we *must* save the 32-bit host state here, * see @bugref{8432}. * * This may also happen when switching to/from a nested-guest VMCS without leaving * ring-0. */ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT) { hmR0VmxExportHostState(pVCpu); STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState); } Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)); /* * Export the state shared between host and guest (FPU, debug, lazy MSRs). */ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE) hmR0VmxExportSharedState(pVCpu, pVmxTransient); AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged)); /* * Store status of the shared guest/host debug state at the time of VM-entry. */ pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu); pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu); /* * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping * more than one conditional check. The post-run side of our code shall determine * if it needs to sync. the virtual APIC TPR with the TPR-shadow. */ if (pVmcsInfo->pbVirtApic) pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]; /* * Update the host MSRs values in the VM-exit MSR-load area. */ if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs) { if (pVmcsInfo->cExitMsrLoad > 0) hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo); pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true; } /* * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the * VMX-preemption timer based on the next virtual sync clock deadline. */ if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer || idCurrentCpu != pVCpu->hmr0.s.idLastCpu) { hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu); pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true; } /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */ bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT); if (!fIsRdtscIntercepted) STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset); else STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept); ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */ hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */ Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu); pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */ pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */ pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x); TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution, as we're about to start executing the guest. */ /* * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP. * * This is done this late as updating the TSC offsetting/preemption timer above * figures out if we can skip intercepting RDTSCP by calculating the number of * host CPU ticks till the next virtual sync deadline (for the dynamic case). */ if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP) && !fIsRdtscIntercepted) { vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX); /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true, it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */ int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu), true /* fSetReadWrite */, true /* fUpdateHostMsr */); AssertRC(rc); Assert(!pVmxTransient->fRemoveTscAuxMsr); pVmxTransient->fRemoveTscAuxMsr = true; } #ifdef VBOX_STRICT Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs); hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest); hmR0VmxCheckHostEferMsr(pVmcsInfo); AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest)); #endif #ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here. * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs, * see @bugref{9180#c54}. */ uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo); if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND) Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason)); #endif } /** * First C routine invoked after running guest code using hardware-assisted VMX. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient The VMX-transient structure. * @param rcVMRun Return code of VMLAUNCH/VMRESUME. * * @remarks Called with interrupts disabled, and returns with interrupts enabled! * * @remarks No-long-jump zone!!! This function will however re-enable longjmps * unconditionally when it is safe to do so. */ static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun) { ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */ ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */ pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */ pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */ pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */ pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */ PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo; if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT)) { uint64_t uGstTsc; if (!pVmxTransient->fIsNestedGuest) uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset; else { uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset; uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc); } TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */ } STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x); TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM); pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */ pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */ #ifdef VBOX_STRICT hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */ #endif Assert(!ASMIntAreEnabled()); ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */ Assert(!VMMRZCallRing3IsEnabled(pVCpu)); #ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT /* * Clean all the VMCS fields in the transient structure before reading * anything from the VMCS. */ pVmxTransient->uExitReason = 0; pVmxTransient->uExitIntErrorCode = 0; pVmxTransient->uExitQual = 0; pVmxTransient->uGuestLinearAddr = 0; pVmxTransient->uExitIntInfo = 0; pVmxTransient->cbExitInstr = 0; pVmxTransient->ExitInstrInfo.u = 0; pVmxTransient->uEntryIntInfo = 0; pVmxTransient->uEntryXcptErrorCode = 0; pVmxTransient->cbEntryInstr = 0; pVmxTransient->uIdtVectoringInfo = 0; pVmxTransient->uIdtVectoringErrorCode = 0; #endif /* * Save the basic VM-exit reason and check if the VM-entry failed. * See Intel spec. 24.9.1 "Basic VM-exit Information". */ uint32_t uExitReason; int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason); AssertRC(rc); pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason); pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason); /* * Log the VM-exit before logging anything else as otherwise it might be a * tad confusing what happens before and after the world-switch. */ HMVMX_LOG_EXIT(pVCpu, uExitReason); /* * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR * bitmap permissions, if it was added before VM-entry. */ if (pVmxTransient->fRemoveTscAuxMsr) { hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX); pVmxTransient->fRemoveTscAuxMsr = false; } /* * Check if VMLAUNCH/VMRESUME succeeded. * If this failed, we cause a guru meditation and cease further execution. */ if (RT_LIKELY(rcVMRun == VINF_SUCCESS)) { /* * Update the VM-exit history array here even if the VM-entry failed due to: * - Invalid guest state. * - MSR loading. * - Machine-check event. * * In any of the above cases we will still have a "valid" VM-exit reason * despite @a fVMEntryFailed being false. * * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state". * * Note! We don't have CS or RIP at this point. Will probably address that later * by amending the history entry added here. */ EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK), UINT64_MAX, pVCpu->hmr0.s.uTscExit); if (RT_LIKELY(!pVmxTransient->fVMEntryFailed)) { VMMRZCallRing3Enable(pVCpu); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); #ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE vmxHCReadAllRoFieldsVmcs(pVCpu, pVmxTransient); #endif /* * Always import the guest-interruptibility state as we need it while evaluating * injecting events on re-entry. We could in *theory* postpone reading it for * exits that does not involve instruction emulation, but since most exits are * for instruction emulation (exceptions being external interrupts, shadow * paging building page faults and EPT violations, and interrupt window stuff) * this is a reasonable simplification. * * We don't import CR0 (when unrestricted guest execution is unavailable) despite * checking for real-mode while exporting the state because all bits that cause * mode changes wrt CR0 are intercepted. * * Note! This mask _must_ match the default value for the default a_fDonePostExit * value for the vmxHCImportGuestState template! */ /** @todo r=bird: consider dropping the INHIBIT_XXX and fetch the state * explicitly in the exit handlers and injection function. That way we have * fewer clusters of vmread spread around the code, because the EM history * executor won't execute very many non-exiting instructions before stopping. */ rc = vmxHCImportGuestState< CPUMCTX_EXTRN_INHIBIT_INT | CPUMCTX_EXTRN_INHIBIT_NMI #if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE) | HMVMX_CPUMCTX_EXTRN_ALL #elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS) | CPUMCTX_EXTRN_RFLAGS #endif , 0 /*a_fDoneLocal*/, 0 /*a_fDonePostExit*/>(pVCpu, pVmcsInfo, __FUNCTION__); AssertRC(rc); /* * Sync the TPR shadow with our APIC state. */ if ( !pVmxTransient->fIsNestedGuest && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)) { Assert(pVmcsInfo->pbVirtApic); if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]) { rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]); AssertRC(rc); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR); } } Assert(VMMRZCallRing3IsEnabled(pVCpu)); Assert( pVmxTransient->fWasGuestDebugStateActive == false || pVmxTransient->fWasHyperDebugStateActive == false); return; } } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX else if (pVmxTransient->fIsNestedGuest) AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n")); #endif else Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed)); VMMRZCallRing3Enable(pVCpu); } /** * Runs the guest code using hardware-assisted VMX the normal way. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pcLoops Pointer to the number of executed loops. */ static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops) { uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops; Assert(pcLoops); Assert(*pcLoops <= cMaxResumeLoops); Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* * Switch to the guest VMCS as we may have transitioned from executing the nested-guest * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the * guest VMCS while entering the VMX ring-0 session. */ if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs) { int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */); if (RT_SUCCESS(rc)) { /* likely */ } else { LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc)); return rc; } } #endif VMXTRANSIENT VmxTransient; RT_ZERO(VmxTransient); VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); /* Paranoia. */ Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo); VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5; for (;;) { Assert(!HMR0SuspendPending()); HMVMX_ASSERT_CPU_SAFE(pVCpu); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); /* * Preparatory work for running nested-guest code, this may force us to * return to ring-3. * * Warning! This bugger disables interrupts on VINF_SUCCESS! */ rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */); if (rcStrict != VINF_SUCCESS) break; /* Interrupts are disabled at this point! */ hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient); int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient); hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun); /* Interrupts are re-enabled at this point! */ /* * Check for errors with running the VM (VMLAUNCH/VMRESUME). */ if (RT_SUCCESS(rcRun)) { /* very likely */ } else { STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x); hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient); return rcRun; } /* * Profile the VM-exit. */ AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x); HMVMX_START_EXIT_DISPATCH_PROF(); VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason); /* * Handle the VM-exit. */ #ifdef HMVMX_USE_FUNCTION_TABLE rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient); #else rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient); #endif STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x); if (rcStrict == VINF_SUCCESS) { if (++(*pcLoops) <= cMaxResumeLoops) continue; STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops); rcStrict = VINF_EM_RAW_INTERRUPT; } break; } STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); return rcStrict; } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** * Runs the nested-guest code using hardware-assisted VMX. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param pcLoops Pointer to the number of executed loops. * * @sa hmR0VmxRunGuestCodeNormal. */ static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops) { uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops; Assert(pcLoops); Assert(*pcLoops <= cMaxResumeLoops); Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)); /* * Switch to the nested-guest VMCS as we may have transitioned from executing the * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have * loaded the nested-guest VMCS while entering the VMX ring-0 session. */ if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs) { int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */); if (RT_SUCCESS(rc)) { /* likely */ } else { LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc)); return rc; } } VMXTRANSIENT VmxTransient; RT_ZERO(VmxTransient); VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); VmxTransient.fIsNestedGuest = true; /* Paranoia. */ Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst); /* Setup pointer so PGM/IEM can query VM-exit auxiliary info on demand in ring-0. */ pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient; VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5; for (;;) { Assert(!HMR0SuspendPending()); HMVMX_ASSERT_CPU_SAFE(pVCpu); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); /* * Preparatory work for running guest code, this may force us to * return to ring-3. * * Warning! This bugger disables interrupts on VINF_SUCCESS! */ rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */); if (rcStrict != VINF_SUCCESS) break; /* Interrupts are disabled at this point! */ hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient); int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient); hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun); /* Interrupts are re-enabled at this point! */ /* * Check for errors with running the VM (VMLAUNCH/VMRESUME). */ if (RT_SUCCESS(rcRun)) { /* very likely */ } else { STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x); hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient); rcStrict = rcRun; break; } /* * Profile the VM-exit. */ AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll); STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x); HMVMX_START_EXIT_DISPATCH_PROF(); VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason); /* * Handle the VM-exit. */ rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x); if (rcStrict == VINF_SUCCESS) { if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit); rcStrict = VINF_VMX_VMEXIT; } else { if (++(*pcLoops) <= cMaxResumeLoops) continue; STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops); rcStrict = VINF_EM_RAW_INTERRUPT; } } else Assert(rcStrict != VINF_VMX_VMEXIT); break; } /* Ensure VM-exit auxiliary info. is no longer available. */ pVCpu->hmr0.s.vmx.pVmxTransient = NULL; STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); return rcStrict; } #endif /* VBOX_WITH_NESTED_HWVIRT_VMX */ /** @name Execution loop for single stepping, DBGF events and expensive Dtrace * probes. * * The following few functions and associated structure contains the bloat * necessary for providing detailed debug events and dtrace probes as well as * reliable host side single stepping. This works on the principle of * "subclassing" the normal execution loop and workers. We replace the loop * method completely and override selected helpers to add necessary adjustments * to their core operation. * * The goal is to keep the "parent" code lean and mean, so as not to sacrifice * any performance for debug and analysis features. * * @{ */ /** * Single steps guest code using hardware-assisted VMX. * * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF) * but single-stepping through the hypervisor debugger. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * @param pcLoops Pointer to the number of executed loops. * * @note Mostly the same as hmR0VmxRunGuestCodeNormal(). */ static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops) { uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops; Assert(pcLoops); Assert(*pcLoops <= cMaxResumeLoops); VMXTRANSIENT VmxTransient; RT_ZERO(VmxTransient); VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu); /* Set HMCPU indicators. */ bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction; pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu); pVCpu->hmr0.s.fDebugWantRdTscExit = false; pVCpu->hmr0.s.fUsingDebugLoop = true; /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */ VMXRUNDBGSTATE DbgState; vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState); vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState); /* * The loop. */ VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5; for (;;) { Assert(!HMR0SuspendPending()); HMVMX_ASSERT_CPU_SAFE(pVCpu); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); bool fStepping = pVCpu->hm.s.fSingleInstruction; /* Set up VM-execution controls the next two can respond to. */ vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState); /* * Preparatory work for running guest code, this may force us to * return to ring-3. * * Warning! This bugger disables interrupts on VINF_SUCCESS! */ rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping); if (rcStrict != VINF_SUCCESS) break; /* Interrupts are disabled at this point! */ hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient); /* Override any obnoxious code in the above two calls. */ vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState); /* * Finally execute the guest. */ int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient); hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun); /* Interrupts are re-enabled at this point! */ /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */ if (RT_SUCCESS(rcRun)) { /* very likely */ } else { STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x); hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient); return rcRun; } /* Profile the VM-exit. */ AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll); STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x); HMVMX_START_EXIT_DISPATCH_PROF(); VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason); /* * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug(). */ rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x); if (rcStrict != VINF_SUCCESS) break; if (++(*pcLoops) > cMaxResumeLoops) { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops); rcStrict = VINF_EM_RAW_INTERRUPT; break; } /* * Stepping: Did the RIP change, if so, consider it a single step. * Otherwise, make sure one of the TFs gets set. */ if (fStepping) { int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP); AssertRC(rc); if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart) { rcStrict = VINF_EM_DBG_STEPPED; break; } ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7); } /* * Update when dtrace settings changes (DBGF kicks us, so no need to check). */ if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo) vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState); /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */ rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict); Assert(rcStrict == VINF_SUCCESS); } /* * Clear the X86_EFL_TF if necessary. */ if (pVCpu->hmr0.s.fClearTrapFlag) { int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS); AssertRC(rc); pVCpu->hmr0.s.fClearTrapFlag = false; pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0; } /** @todo there seems to be issues with the resume flag when the monitor trap * flag is pending without being used. Seen early in bios init when * accessing APIC page in protected mode. */ /** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke * out of the above loop. */ /* Restore HMCPU indicators. */ pVCpu->hmr0.s.fUsingDebugLoop = false; pVCpu->hmr0.s.fDebugWantRdTscExit = false; pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction; STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); return rcStrict; } /** @} */ /** * Checks if any expensive dtrace probes are enabled and we should go to the * debug loop. * * @returns true if we should use debug loop, false if not. */ static bool hmR0VmxAnyExpensiveProbesEnabled(void) { /* It's probably faster to OR the raw 32-bit counter variables together. Since the variables are in an array and the probes are next to one another (more or less), we have good locality. So, better read eight-nine cache lines ever time and only have one conditional, than 128+ conditionals, right? */ return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */ | VBOXVMM_XCPT_DE_ENABLED_RAW() | VBOXVMM_XCPT_DB_ENABLED_RAW() | VBOXVMM_XCPT_BP_ENABLED_RAW() | VBOXVMM_XCPT_OF_ENABLED_RAW() | VBOXVMM_XCPT_BR_ENABLED_RAW() | VBOXVMM_XCPT_UD_ENABLED_RAW() | VBOXVMM_XCPT_NM_ENABLED_RAW() | VBOXVMM_XCPT_DF_ENABLED_RAW() | VBOXVMM_XCPT_TS_ENABLED_RAW() | VBOXVMM_XCPT_NP_ENABLED_RAW() | VBOXVMM_XCPT_SS_ENABLED_RAW() | VBOXVMM_XCPT_GP_ENABLED_RAW() | VBOXVMM_XCPT_PF_ENABLED_RAW() | VBOXVMM_XCPT_MF_ENABLED_RAW() | VBOXVMM_XCPT_AC_ENABLED_RAW() | VBOXVMM_XCPT_XF_ENABLED_RAW() | VBOXVMM_XCPT_VE_ENABLED_RAW() | VBOXVMM_XCPT_SX_ENABLED_RAW() | VBOXVMM_INT_SOFTWARE_ENABLED_RAW() | VBOXVMM_INT_HARDWARE_ENABLED_RAW() ) != 0 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW() | VBOXVMM_INSTR_MWAIT_ENABLED_RAW() | VBOXVMM_INSTR_MONITOR_ENABLED_RAW() | VBOXVMM_INSTR_CPUID_ENABLED_RAW() | VBOXVMM_INSTR_INVD_ENABLED_RAW() | VBOXVMM_INSTR_WBINVD_ENABLED_RAW() | VBOXVMM_INSTR_INVLPG_ENABLED_RAW() | VBOXVMM_INSTR_RDTSC_ENABLED_RAW() | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW() | VBOXVMM_INSTR_RDPMC_ENABLED_RAW() | VBOXVMM_INSTR_RDMSR_ENABLED_RAW() | VBOXVMM_INSTR_WRMSR_ENABLED_RAW() | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW() | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW() | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW() | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW() | VBOXVMM_INSTR_PAUSE_ENABLED_RAW() | VBOXVMM_INSTR_XSETBV_ENABLED_RAW() | VBOXVMM_INSTR_SIDT_ENABLED_RAW() | VBOXVMM_INSTR_LIDT_ENABLED_RAW() | VBOXVMM_INSTR_SGDT_ENABLED_RAW() | VBOXVMM_INSTR_LGDT_ENABLED_RAW() | VBOXVMM_INSTR_SLDT_ENABLED_RAW() | VBOXVMM_INSTR_LLDT_ENABLED_RAW() | VBOXVMM_INSTR_STR_ENABLED_RAW() | VBOXVMM_INSTR_LTR_ENABLED_RAW() | VBOXVMM_INSTR_GETSEC_ENABLED_RAW() | VBOXVMM_INSTR_RSM_ENABLED_RAW() | VBOXVMM_INSTR_RDRAND_ENABLED_RAW() | VBOXVMM_INSTR_RDSEED_ENABLED_RAW() | VBOXVMM_INSTR_XSAVES_ENABLED_RAW() | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW() | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW() | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW() | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW() | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW() ) != 0 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW() | VBOXVMM_EXIT_HALT_ENABLED_RAW() | VBOXVMM_EXIT_MWAIT_ENABLED_RAW() | VBOXVMM_EXIT_MONITOR_ENABLED_RAW() | VBOXVMM_EXIT_CPUID_ENABLED_RAW() | VBOXVMM_EXIT_INVD_ENABLED_RAW() | VBOXVMM_EXIT_WBINVD_ENABLED_RAW() | VBOXVMM_EXIT_INVLPG_ENABLED_RAW() | VBOXVMM_EXIT_RDTSC_ENABLED_RAW() | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW() | VBOXVMM_EXIT_RDPMC_ENABLED_RAW() | VBOXVMM_EXIT_RDMSR_ENABLED_RAW() | VBOXVMM_EXIT_WRMSR_ENABLED_RAW() | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW() | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW() | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW() | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW() | VBOXVMM_EXIT_PAUSE_ENABLED_RAW() | VBOXVMM_EXIT_XSETBV_ENABLED_RAW() | VBOXVMM_EXIT_SIDT_ENABLED_RAW() | VBOXVMM_EXIT_LIDT_ENABLED_RAW() | VBOXVMM_EXIT_SGDT_ENABLED_RAW() | VBOXVMM_EXIT_LGDT_ENABLED_RAW() | VBOXVMM_EXIT_SLDT_ENABLED_RAW() | VBOXVMM_EXIT_LLDT_ENABLED_RAW() | VBOXVMM_EXIT_STR_ENABLED_RAW() | VBOXVMM_EXIT_LTR_ENABLED_RAW() | VBOXVMM_EXIT_GETSEC_ENABLED_RAW() | VBOXVMM_EXIT_RSM_ENABLED_RAW() | VBOXVMM_EXIT_RDRAND_ENABLED_RAW() | VBOXVMM_EXIT_RDSEED_ENABLED_RAW() | VBOXVMM_EXIT_XSAVES_ENABLED_RAW() | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW() | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW() | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW() | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW() | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW() | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW() | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW() ) != 0; } /** * Runs the guest using hardware-assisted VMX. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. */ VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu) { AssertPtr(pVCpu); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; Assert(VMMRZCallRing3IsEnabled(pVCpu)); Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn)); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); VBOXSTRICTRC rcStrict; uint32_t cLoops = 0; for (;;) { #ifdef VBOX_WITH_NESTED_HWVIRT_VMX bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx); #else NOREF(pCtx); bool const fInNestedGuestMode = false; #endif if (!fInNestedGuestMode) { if ( !pVCpu->hm.s.fUseDebugLoop && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled()) && !DBGFIsStepping(pVCpu) && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints) rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops); else rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops); } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX else rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops); if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME) { Assert(CPUMIsGuestInVmxNonRootMode(pCtx)); continue; } if (rcStrict == VINF_VMX_VMEXIT) { Assert(!CPUMIsGuestInVmxNonRootMode(pCtx)); continue; } #endif break; } int const rcLoop = VBOXSTRICTRC_VAL(rcStrict); switch (rcLoop) { case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break; case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break; } int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict); if (RT_FAILURE(rc2)) { pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict); rcStrict = rc2; } Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn)); Assert(!VMMR0AssertionIsNotificationSet(pVCpu)); return rcStrict; }