VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 103289

Last change on this file since 103289 was 103289, checked in by vboxsync, 3 months ago

VMM/HMVMXR0: Nested VMX: bugref:10318 VMCS state 'launched' shouldn't be ORed only in VMX R0 code, each value is a separate state for a while now.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 281.4 KB
Line 
1/* $Id: HMVMXR0.cpp 103289 2024-02-09 11:58:56Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_HM
33#define VMCPU_INCL_CPUM_GST_CTX
34#include <iprt/x86.h>
35#include <iprt/asm-amd64-x86.h>
36#include <iprt/thread.h>
37#include <iprt/mem.h>
38#include <iprt/mp.h>
39
40#include <VBox/vmm/pdmapi.h>
41#include <VBox/vmm/dbgf.h>
42#include <VBox/vmm/iem.h>
43#include <VBox/vmm/iom.h>
44#include <VBox/vmm/tm.h>
45#include <VBox/vmm/em.h>
46#include <VBox/vmm/gcm.h>
47#include <VBox/vmm/gim.h>
48#include <VBox/vmm/apic.h>
49#include "HMInternal.h"
50#include <VBox/vmm/vmcc.h>
51#include <VBox/vmm/hmvmxinline.h>
52#include "HMVMXR0.h"
53#include "VMXInternal.h"
54#include "dtrace/VBoxVMM.h"
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60#ifdef DEBUG_ramshankar
61# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
62# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
63# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
64# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
65# define HMVMX_ALWAYS_CLEAN_TRANSIENT
66# define HMVMX_ALWAYS_CHECK_GUEST_STATE
67# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
68# define HMVMX_ALWAYS_TRAP_PF
69# define HMVMX_ALWAYS_FLUSH_TLB
70# define HMVMX_ALWAYS_SWAP_EFER
71#endif
72
73/** Enables the fAlwaysInterceptMovDRx related code. */
74#define VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX 1
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * VMX page allocation information.
82 */
83typedef struct
84{
85 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
86 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
87 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
88 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
89} VMXPAGEALLOCINFO;
90/** Pointer to VMX page-allocation info. */
91typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
92/** Pointer to a const VMX page-allocation info. */
93typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
94AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
95
96
97/*********************************************************************************************************************************
98* Internal Functions *
99*********************************************************************************************************************************/
100static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
101static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
102
103
104/*********************************************************************************************************************************
105* Global Variables *
106*********************************************************************************************************************************/
107/** The DR6 value after writing zero to the register.
108 * Set by VMXR0GlobalInit(). */
109static uint64_t g_fDr6Zeroed = 0;
110
111
112/**
113 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
114 * @returns @c true if it's part of LBR stack, @c false otherwise.
115 *
116 * @param pVM The cross context VM structure.
117 * @param idMsr The MSR.
118 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
119 * Optional, can be NULL.
120 *
121 * @remarks Must only be called when LBR is enabled.
122 */
123DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
124{
125 Assert(pVM->hmr0.s.vmx.fLbr);
126 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
127 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
128 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
129 if (idxMsr < cLbrStack)
130 {
131 if (pidxMsr)
132 *pidxMsr = idxMsr;
133 return true;
134 }
135 return false;
136}
137
138
139/**
140 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
141 * @returns @c true if it's part of LBR stack, @c false otherwise.
142 *
143 * @param pVM The cross context VM structure.
144 * @param idMsr The MSR.
145 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
146 * Optional, can be NULL.
147 *
148 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
149 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
150 */
151DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
152{
153 Assert(pVM->hmr0.s.vmx.fLbr);
154 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
155 {
156 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
157 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
158 if (idxMsr < cLbrStack)
159 {
160 if (pidxMsr)
161 *pidxMsr = idxMsr;
162 return true;
163 }
164 }
165 return false;
166}
167
168
169/**
170 * Gets the active (in use) VMCS info. object for the specified VCPU.
171 *
172 * This is either the guest or nested-guest VMCS info. and need not necessarily
173 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
174 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
175 * current VMCS while returning to ring-3. However, the VMCS info. object for that
176 * VMCS would still be active and returned here so that we could dump the VMCS
177 * fields to ring-3 for diagnostics. This function is thus only used to
178 * distinguish between the nested-guest or guest VMCS.
179 *
180 * @returns The active VMCS information.
181 * @param pVCpu The cross context virtual CPU structure.
182 *
183 * @thread EMT.
184 * @remarks This function may be called with preemption or interrupts disabled!
185 */
186DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
187{
188 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
189 return &pVCpu->hmr0.s.vmx.VmcsInfo;
190 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
191}
192
193
194/**
195 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
196 * area.
197 *
198 * @returns @c true if it's different, @c false otherwise.
199 * @param pVmcsInfo The VMCS info. object.
200 */
201DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
202{
203 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
204 && pVmcsInfo->pvGuestMsrStore);
205}
206
207
208/**
209 * Sets the given Processor-based VM-execution controls.
210 *
211 * @param pVmxTransient The VMX-transient structure.
212 * @param uProcCtls The Processor-based VM-execution controls to set.
213 */
214static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
218 {
219 pVmcsInfo->u32ProcCtls |= uProcCtls;
220 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
221 AssertRC(rc);
222 }
223}
224
225
226/**
227 * Removes the given Processor-based VM-execution controls.
228 *
229 * @param pVCpu The cross context virtual CPU structure.
230 * @param pVmxTransient The VMX-transient structure.
231 * @param uProcCtls The Processor-based VM-execution controls to remove.
232 *
233 * @remarks When executing a nested-guest, this will not remove any of the specified
234 * controls if the nested hypervisor has set any one of them.
235 */
236static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
237{
238 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
239 if (pVmcsInfo->u32ProcCtls & uProcCtls)
240 {
241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
242 if ( !pVmxTransient->fIsNestedGuest
243 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
244#else
245 NOREF(pVCpu);
246 if (!pVmxTransient->fIsNestedGuest)
247#endif
248 {
249 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
250 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
251 AssertRC(rc);
252 }
253 }
254}
255
256
257/**
258 * Sets the TSC offset for the current VMCS.
259 *
260 * @param uTscOffset The TSC offset to set.
261 * @param pVmcsInfo The VMCS info. object.
262 */
263static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
264{
265 if (pVmcsInfo->u64TscOffset != uTscOffset)
266 {
267 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
268 AssertRC(rc);
269 pVmcsInfo->u64TscOffset = uTscOffset;
270 }
271}
272
273
274/**
275 * Loads the VMCS specified by the VMCS info. object.
276 *
277 * @returns VBox status code.
278 * @param pVmcsInfo The VMCS info. object.
279 *
280 * @remarks Can be called with interrupts disabled.
281 */
282static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
283{
284 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
285 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
286
287 return VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
288}
289
290
291/**
292 * Clears the VMCS specified by the VMCS info. object.
293 *
294 * @returns VBox status code.
295 * @param pVmcsInfo The VMCS info. object.
296 *
297 * @remarks Can be called with interrupts disabled.
298 */
299static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
300{
301 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
302 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
303
304 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
305 if (RT_SUCCESS(rc))
306 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
307 return rc;
308}
309
310
311/**
312 * Checks whether the MSR belongs to the set of guest MSRs that we restore
313 * lazily while leaving VT-x.
314 *
315 * @returns true if it does, false otherwise.
316 * @param pVCpu The cross context virtual CPU structure.
317 * @param idMsr The MSR to check.
318 */
319static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
320{
321 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
322 {
323 switch (idMsr)
324 {
325 case MSR_K8_LSTAR:
326 case MSR_K6_STAR:
327 case MSR_K8_SF_MASK:
328 case MSR_K8_KERNEL_GS_BASE:
329 return true;
330 }
331 }
332 return false;
333}
334
335
336/**
337 * Loads a set of guests MSRs to allow read/passthru to the guest.
338 *
339 * The name of this function is slightly confusing. This function does NOT
340 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
341 * common prefix for functions dealing with "lazy restoration" of the shared
342 * MSRs.
343 *
344 * @param pVCpu The cross context virtual CPU structure.
345 *
346 * @remarks No-long-jump zone!!!
347 */
348static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
349{
350 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
351 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
352
353 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
354 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
355 {
356 /*
357 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
358 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
359 * we can skip a few MSR writes.
360 *
361 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
362 * guest MSR values in the guest-CPU context might be different to what's currently
363 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
364 * CPU, see @bugref{8728}.
365 */
366 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
367 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
368 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
369 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
370 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
371 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
372 {
373#ifdef VBOX_STRICT
374 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
375 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
376 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
377 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
378#endif
379 }
380 else
381 {
382 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
383 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
384 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
385 /* The system call flag mask register isn't as benign and accepting of all
386 values as the above, so mask it to avoid #GP'ing on corrupted input. */
387 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
388 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
389 }
390 }
391 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
392}
393
394
395/**
396 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
397 *
398 * @returns @c true if found, @c false otherwise.
399 * @param pVmcsInfo The VMCS info. object.
400 * @param idMsr The MSR to find.
401 */
402static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
403{
404 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
405 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
406 Assert(pMsrs);
407 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
408 for (uint32_t i = 0; i < cMsrs; i++)
409 {
410 if (pMsrs[i].u32Msr == idMsr)
411 return true;
412 }
413 return false;
414}
415
416
417/**
418 * Performs lazy restoration of the set of host MSRs if they were previously
419 * loaded with guest MSR values.
420 *
421 * @param pVCpu The cross context virtual CPU structure.
422 *
423 * @remarks No-long-jump zone!!!
424 * @remarks The guest MSRs should have been saved back into the guest-CPU
425 * context by vmxHCImportGuestState()!!!
426 */
427static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
428{
429 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
430 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
431
432 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
433 {
434 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
435 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
436 {
437 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
438 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
439 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
440 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
441 }
442 }
443 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
444}
445
446
447/**
448 * Sets pfnStartVm to the best suited variant.
449 *
450 * This must be called whenever anything changes relative to the hmR0VmXStartVm
451 * variant selection:
452 * - pVCpu->hm.s.fLoadSaveGuestXcr0
453 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
454 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
455 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
456 * - Perhaps: CPUMCTX.fXStateMask (windows only)
457 *
458 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
459 * cannot be changed at runtime.
460 */
461static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
462{
463 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
464 {
465 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
466 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
467 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
468 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
469 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
470 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
471 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
472 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
473 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
474 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
475 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
476 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
477 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
478 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
479 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
480 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
481 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
482 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
483 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
484 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
485 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
486 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
487 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
488 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
489 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
490 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
491 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
492 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
493 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
494 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
495 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
496 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
497 };
498 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
499 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
500 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
501 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
502 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
503 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
504 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
505 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
506}
507
508
509/**
510 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
511 * stack.
512 *
513 * @returns Strict VBox status code (i.e. informational status codes too).
514 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
515 * @param pVCpu The cross context virtual CPU structure.
516 * @param uValue The value to push to the guest stack.
517 */
518static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
519{
520 /*
521 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
522 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
523 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
524 */
525 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
526 if (pCtx->sp == 1)
527 return VINF_EM_RESET;
528 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
529 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
530 AssertRC(rc);
531 return rc;
532}
533
534
535/**
536 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
537 * unreferenced local parameters in the template code...
538 */
539DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
540{
541 RT_NOREF(pVCpu);
542 return VMXWriteVmcs16(uFieldEnc, u16Val);
543}
544
545
546/**
547 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
548 * unreferenced local parameters in the template code...
549 */
550DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
551{
552 RT_NOREF(pVCpu);
553 return VMXWriteVmcs32(uFieldEnc, u32Val);
554}
555
556
557/**
558 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
559 * unreferenced local parameters in the template code...
560 */
561DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
562{
563 RT_NOREF(pVCpu);
564 return VMXWriteVmcs64(uFieldEnc, u64Val);
565}
566
567
568/**
569 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
570 * unreferenced local parameters in the template code...
571 */
572DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
573{
574 RT_NOREF(pVCpu);
575 return VMXReadVmcs16(uFieldEnc, pu16Val);
576}
577
578
579/**
580 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
581 * unreferenced local parameters in the template code...
582 */
583DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
584{
585 RT_NOREF(pVCpu);
586 return VMXReadVmcs32(uFieldEnc, pu32Val);
587}
588
589
590/**
591 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
592 * unreferenced local parameters in the template code...
593 */
594DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
595{
596 RT_NOREF(pVCpu);
597 return VMXReadVmcs64(uFieldEnc, pu64Val);
598}
599
600
601/*
602 * Instantiate the code we share with the NEM darwin backend.
603 */
604#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
605#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
606
607#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
608#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
609#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
610#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
611
612#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
613#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
614#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
615#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
616
617#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
618#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
619#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
620#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
621
622#include "../VMMAll/VMXAllTemplate.cpp.h"
623
624#undef VMX_VMCS_WRITE_16
625#undef VMX_VMCS_WRITE_32
626#undef VMX_VMCS_WRITE_64
627#undef VMX_VMCS_WRITE_NW
628
629#undef VMX_VMCS_READ_16
630#undef VMX_VMCS_READ_32
631#undef VMX_VMCS_READ_64
632#undef VMX_VMCS_READ_NW
633
634#undef VM_IS_VMX_PREEMPT_TIMER_USED
635#undef VM_IS_VMX_NESTED_PAGING
636#undef VM_IS_VMX_UNRESTRICTED_GUEST
637#undef VCPU_2_VMXSTATS
638#undef VCPU_2_VMXSTATE
639
640
641/**
642 * Updates the VM's last error record.
643 *
644 * If there was a VMX instruction error, reads the error data from the VMCS and
645 * updates VCPU's last error record as well.
646 *
647 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
648 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
649 * VERR_VMX_INVALID_VMCS_FIELD.
650 * @param rc The error code.
651 */
652static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
653{
654 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
655 || rc == VERR_VMX_UNABLE_TO_START_VM)
656 {
657 AssertPtrReturnVoid(pVCpu);
658 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
659 }
660 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
661}
662
663
664/**
665 * Enters VMX root mode operation on the current CPU.
666 *
667 * @returns VBox status code.
668 * @param pHostCpu The HM physical-CPU structure.
669 * @param pVM The cross context VM structure. Can be
670 * NULL, after a resume.
671 * @param HCPhysCpuPage Physical address of the VMXON region.
672 * @param pvCpuPage Pointer to the VMXON region.
673 */
674static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
675{
676 Assert(pHostCpu);
677 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
678 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
679 Assert(pvCpuPage);
680 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
681
682 if (pVM)
683 {
684 /* Write the VMCS revision identifier to the VMXON region. */
685 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
686 }
687
688 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
689 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
690
691 /* Enable the VMX bit in CR4 if necessary. */
692 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
693
694 /* Record whether VMXE was already prior to us enabling it above. */
695 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
696
697 /* Enter VMX root mode. */
698 int rc = VMXEnable(HCPhysCpuPage);
699 if (RT_FAILURE(rc))
700 {
701 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
702 if (!pHostCpu->fVmxeAlreadyEnabled)
703 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
704
705 if (pVM)
706 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
707 }
708
709 /* Restore interrupts. */
710 ASMSetFlags(fEFlags);
711 return rc;
712}
713
714
715/**
716 * Exits VMX root mode operation on the current CPU.
717 *
718 * @returns VBox status code.
719 * @param pHostCpu The HM physical-CPU structure.
720 */
721static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
722{
723 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
724
725 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
726 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
727
728 /* If we're for some reason not in VMX root mode, then don't leave it. */
729 RTCCUINTREG const uHostCr4 = ASMGetCR4();
730
731 int rc;
732 if (uHostCr4 & X86_CR4_VMXE)
733 {
734 /* Exit VMX root mode and clear the VMX bit in CR4. */
735 VMXDisable();
736
737 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
738 if (!pHostCpu->fVmxeAlreadyEnabled)
739 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
740
741 rc = VINF_SUCCESS;
742 }
743 else
744 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
745
746 /* Restore interrupts. */
747 ASMSetFlags(fEFlags);
748 return rc;
749}
750
751
752/**
753 * Allocates pages specified as specified by an array of VMX page allocation info
754 * objects.
755 *
756 * The pages contents are zero'd after allocation.
757 *
758 * @returns VBox status code.
759 * @param phMemObj Where to return the handle to the allocation.
760 * @param paAllocInfo The pointer to the first element of the VMX
761 * page-allocation info object array.
762 * @param cEntries The number of elements in the @a paAllocInfo array.
763 */
764static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
765{
766 *phMemObj = NIL_RTR0MEMOBJ;
767
768 /* Figure out how many pages to allocate. */
769 uint32_t cPages = 0;
770 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
771 cPages += !!paAllocInfo[iPage].fValid;
772
773 /* Allocate the pages. */
774 if (cPages)
775 {
776 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
777 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
778 if (RT_FAILURE(rc))
779 return rc;
780
781 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
782 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
783 RT_BZERO(pvFirstPage, cbPages);
784
785 uint32_t iPage = 0;
786 for (uint32_t i = 0; i < cEntries; i++)
787 if (paAllocInfo[i].fValid)
788 {
789 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
790 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
791 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
792 AssertPtr(pvPage);
793
794 Assert(paAllocInfo[iPage].pHCPhys);
795 Assert(paAllocInfo[iPage].ppVirt);
796 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
797 *paAllocInfo[iPage].ppVirt = pvPage;
798
799 /* Move to next page. */
800 ++iPage;
801 }
802
803 /* Make sure all valid (requested) pages have been assigned. */
804 Assert(iPage == cPages);
805 }
806 return VINF_SUCCESS;
807}
808
809
810/**
811 * Frees pages allocated using hmR0VmxPagesAllocZ.
812 *
813 * @param phMemObj Pointer to the memory object handle. Will be set to
814 * NIL.
815 */
816DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
817{
818 /* We can cleanup wholesale since it's all one allocation. */
819 if (*phMemObj != NIL_RTR0MEMOBJ)
820 {
821 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
822 *phMemObj = NIL_RTR0MEMOBJ;
823 }
824}
825
826
827/**
828 * Initializes a VMCS info. object.
829 *
830 * @param pVmcsInfo The VMCS info. object.
831 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
832 */
833static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
834{
835 RT_ZERO(*pVmcsInfo);
836 RT_ZERO(*pVmcsInfoShared);
837
838 pVmcsInfo->pShared = pVmcsInfoShared;
839 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
840 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
841 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
842 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
843 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
844 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
845 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
846 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
847 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
848 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
849 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
850 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
851}
852
853
854/**
855 * Frees the VT-x structures for a VMCS info. object.
856 *
857 * @param pVmcsInfo The VMCS info. object.
858 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
859 */
860static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
861{
862 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
863 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
864}
865
866
867/**
868 * Allocates the VT-x structures for a VMCS info. object.
869 *
870 * @returns VBox status code.
871 * @param pVCpu The cross context virtual CPU structure.
872 * @param pVmcsInfo The VMCS info. object.
873 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
874 *
875 * @remarks The caller is expected to take care of any and all allocation failures.
876 * This function will not perform any cleanup for failures half-way
877 * through.
878 */
879static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
880{
881 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
882
883 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
884 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
885 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
886 VMXPAGEALLOCINFO aAllocInfo[] =
887 {
888 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
889 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
890 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
891 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
892 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
893 };
894
895 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
896 if (RT_FAILURE(rc))
897 return rc;
898
899 /*
900 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
901 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
902 */
903 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
904 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
905 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
906 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
907
908 /*
909 * Get the virtual-APIC page rather than allocating them again.
910 */
911 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
912 {
913 if (!fIsNstGstVmcs)
914 {
915 if (PDMHasApic(pVM))
916 {
917 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
918 if (RT_FAILURE(rc))
919 return rc;
920 Assert(pVmcsInfo->pbVirtApic);
921 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
922 }
923 }
924 else
925 {
926 /* These are setup later while marging the nested-guest VMCS. */
927 Assert(pVmcsInfo->pbVirtApic == NULL);
928 Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS);
929 }
930 }
931
932 return VINF_SUCCESS;
933}
934
935
936/**
937 * Free all VT-x structures for the VM.
938 *
939 * @param pVM The cross context VM structure.
940 */
941static void hmR0VmxStructsFree(PVMCC pVM)
942{
943 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
944#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
945 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
946 {
947 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
948 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
949 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
950 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
951 }
952#endif
953
954 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
955 {
956 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
957 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
958#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
959 if (pVM->cpum.ro.GuestFeatures.fVmx)
960 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
961#endif
962 }
963}
964
965
966/**
967 * Allocate all VT-x structures for the VM.
968 *
969 * @returns IPRT status code.
970 * @param pVM The cross context VM structure.
971 *
972 * @remarks This functions will cleanup on memory allocation failures.
973 */
974static int hmR0VmxStructsAlloc(PVMCC pVM)
975{
976 /*
977 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
978 * The VMCS size cannot be more than 4096 bytes.
979 *
980 * See Intel spec. Appendix A.1 "Basic VMX Information".
981 */
982 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
983 if (cbVmcs <= X86_PAGE_4K_SIZE)
984 { /* likely */ }
985 else
986 {
987 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
988 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
989 }
990
991 /*
992 * Allocate per-VM VT-x structures.
993 */
994 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
995 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
996 VMXPAGEALLOCINFO aAllocInfo[] =
997 {
998 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
999 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
1000 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
1001#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1002 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
1003#endif
1004 };
1005
1006 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1007 if (RT_SUCCESS(rc))
1008 {
1009#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1010 /* Allocate the shadow VMCS-fields array. */
1011 if (fUseVmcsShadowing)
1012 {
1013 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1014 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1015 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1016 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1017 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1018 rc = VERR_NO_MEMORY;
1019 }
1020#endif
1021
1022 /*
1023 * Allocate per-VCPU VT-x structures.
1024 */
1025 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1026 {
1027 /* Allocate the guest VMCS structures. */
1028 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1029 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1030
1031#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1032 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1033 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1034 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1035#endif
1036 }
1037 if (RT_SUCCESS(rc))
1038 return VINF_SUCCESS;
1039 }
1040 hmR0VmxStructsFree(pVM);
1041 return rc;
1042}
1043
1044
1045/**
1046 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1047 *
1048 * @param pVM The cross context VM structure.
1049 */
1050static void hmR0VmxStructsInit(PVMCC pVM)
1051{
1052 /* Paranoia. */
1053 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1054#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1055 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1056#endif
1057
1058 /*
1059 * Initialize members up-front so we can cleanup en masse on allocation failures.
1060 */
1061#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1062 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1063#endif
1064 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1065 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1066 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1067 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1068 {
1069 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1070 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1071 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1072 }
1073}
1074
1075#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1076/**
1077 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1078 *
1079 * @returns @c true if the MSR is intercepted, @c false otherwise.
1080 * @param pbMsrBitmap The MSR bitmap.
1081 * @param offMsr The MSR byte offset.
1082 * @param iBit The bit offset from the byte offset.
1083 */
1084DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1085{
1086 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1087 return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit);
1088}
1089#endif
1090
1091/**
1092 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1093 *
1094 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1095 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1096 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1097 * the read/write access of this MSR.
1098 *
1099 * @param pVCpu The cross context virtual CPU structure.
1100 * @param pVmcsInfo The VMCS info. object.
1101 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1102 * @param idMsr The MSR value.
1103 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1104 * include both a read -and- a write permission!
1105 *
1106 * @sa CPUMGetVmxMsrPermission.
1107 * @remarks Can be called with interrupts disabled.
1108 */
1109static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1110{
1111 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1112 Assert(pbMsrBitmap);
1113 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1114
1115 /*
1116 * MSR-bitmap Layout:
1117 * Byte index MSR range Interpreted as
1118 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1119 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1120 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1121 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1122 *
1123 * A bit corresponding to an MSR within the above range causes a VM-exit
1124 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1125 * the MSR range, it always cause a VM-exit.
1126 *
1127 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1128 */
1129 uint16_t const offBitmapRead = 0;
1130 uint16_t const offBitmapWrite = 0x800;
1131 uint16_t offMsr;
1132 int32_t iBit;
1133 if (idMsr <= UINT32_C(0x00001fff))
1134 {
1135 offMsr = 0;
1136 iBit = idMsr;
1137 }
1138 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1139 {
1140 offMsr = 0x400;
1141 iBit = idMsr - UINT32_C(0xc0000000);
1142 }
1143 else
1144 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1145
1146 /*
1147 * Set the MSR read permission.
1148 */
1149 uint16_t const offMsrRead = offBitmapRead + offMsr;
1150 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1151 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1152 {
1153#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1154 bool const fClear = !fIsNstGstVmcs ? true
1155 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1156#else
1157 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1158 bool const fClear = true;
1159#endif
1160 if (fClear)
1161 ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit);
1162 }
1163 else
1164 ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit);
1165
1166 /*
1167 * Set the MSR write permission.
1168 */
1169 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1170 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1171 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1172 {
1173#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1174 bool const fClear = !fIsNstGstVmcs ? true
1175 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1176#else
1177 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1178 bool const fClear = true;
1179#endif
1180 if (fClear)
1181 ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1182 }
1183 else
1184 ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1185}
1186
1187
1188/**
1189 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1190 * area.
1191 *
1192 * @returns VBox status code.
1193 * @param pVCpu The cross context virtual CPU structure.
1194 * @param pVmcsInfo The VMCS info. object.
1195 * @param cMsrs The number of MSRs.
1196 */
1197static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1198{
1199 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1200 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1201 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1202 {
1203 /* Commit the MSR counts to the VMCS and update the cache. */
1204 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1205 {
1206 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1207 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1208 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1209 pVmcsInfo->cEntryMsrLoad = cMsrs;
1210 pVmcsInfo->cExitMsrStore = cMsrs;
1211 pVmcsInfo->cExitMsrLoad = cMsrs;
1212 }
1213 return VINF_SUCCESS;
1214 }
1215
1216 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1217 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1218 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1219}
1220
1221
1222/**
1223 * Adds a new (or updates the value of an existing) guest/host MSR
1224 * pair to be swapped during the world-switch as part of the
1225 * auto-load/store MSR area in the VMCS.
1226 *
1227 * @returns VBox status code.
1228 * @param pVCpu The cross context virtual CPU structure.
1229 * @param pVmxTransient The VMX-transient structure.
1230 * @param idMsr The MSR.
1231 * @param uGuestMsrValue Value of the guest MSR.
1232 * @param fSetReadWrite Whether to set the guest read/write access of this
1233 * MSR (thus not causing a VM-exit).
1234 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1235 * necessary.
1236 */
1237static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1238 bool fSetReadWrite, bool fUpdateHostMsr)
1239{
1240 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1241 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1242 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1243 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1244 uint32_t i;
1245
1246 /* Paranoia. */
1247 Assert(pGuestMsrLoad);
1248
1249#ifndef DEBUG_bird
1250 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1251#endif
1252
1253 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1254 for (i = 0; i < cMsrs; i++)
1255 {
1256 if (pGuestMsrLoad[i].u32Msr == idMsr)
1257 break;
1258 }
1259
1260 bool fAdded = false;
1261 if (i == cMsrs)
1262 {
1263 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1264 ++cMsrs;
1265 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1266 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1267
1268 /* Set the guest to read/write this MSR without causing VM-exits. */
1269 if ( fSetReadWrite
1270 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1271 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1272
1273 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1274 fAdded = true;
1275 }
1276
1277 /* Update the MSR value for the newly added or already existing MSR. */
1278 pGuestMsrLoad[i].u32Msr = idMsr;
1279 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1280
1281 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1282 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1283 {
1284 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1285 pGuestMsrStore[i].u32Msr = idMsr;
1286 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1287 }
1288
1289 /* Update the corresponding slot in the host MSR area. */
1290 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1291 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1292 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1293 pHostMsr[i].u32Msr = idMsr;
1294
1295 /*
1296 * Only if the caller requests to update the host MSR value AND we've newly added the
1297 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1298 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1299 *
1300 * We do this for performance reasons since reading MSRs may be quite expensive.
1301 */
1302 if (fAdded)
1303 {
1304 if (fUpdateHostMsr)
1305 {
1306 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1307 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1308 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1309 }
1310 else
1311 {
1312 /* Someone else can do the work. */
1313 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1314 }
1315 }
1316 return VINF_SUCCESS;
1317}
1318
1319
1320/**
1321 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1322 * auto-load/store MSR area in the VMCS.
1323 *
1324 * @returns VBox status code.
1325 * @param pVCpu The cross context virtual CPU structure.
1326 * @param pVmxTransient The VMX-transient structure.
1327 * @param idMsr The MSR.
1328 */
1329static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1330{
1331 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1332 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1333 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1334 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1335
1336#ifndef DEBUG_bird
1337 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1338#endif
1339
1340 for (uint32_t i = 0; i < cMsrs; i++)
1341 {
1342 /* Find the MSR. */
1343 if (pGuestMsrLoad[i].u32Msr == idMsr)
1344 {
1345 /*
1346 * If it's the last MSR, we only need to reduce the MSR count.
1347 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1348 */
1349 if (i < cMsrs - 1)
1350 {
1351 /* Remove it from the VM-entry MSR-load area. */
1352 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1353 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1354
1355 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1356 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1357 {
1358 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1359 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1360 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1361 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1362 }
1363
1364 /* Remove it from the VM-exit MSR-load area. */
1365 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1366 Assert(pHostMsr[i].u32Msr == idMsr);
1367 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1368 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1369 }
1370
1371 /* Reduce the count to reflect the removed MSR and bail. */
1372 --cMsrs;
1373 break;
1374 }
1375 }
1376
1377 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1378 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1379 {
1380 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1381 AssertRCReturn(rc, rc);
1382
1383 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1384 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1385 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1386
1387 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1388 return VINF_SUCCESS;
1389 }
1390
1391 return VERR_NOT_FOUND;
1392}
1393
1394
1395/**
1396 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1397 *
1398 * @param pVCpu The cross context virtual CPU structure.
1399 * @param pVmcsInfo The VMCS info. object.
1400 *
1401 * @remarks No-long-jump zone!!!
1402 */
1403static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1404{
1405 RT_NOREF(pVCpu);
1406 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1407
1408 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1409 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1410 Assert(pHostMsrLoad);
1411 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1412 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1413 for (uint32_t i = 0; i < cMsrs; i++)
1414 {
1415 /*
1416 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1417 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1418 */
1419 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1420 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1421 else
1422 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1423 }
1424}
1425
1426
1427/**
1428 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1429 * perform lazy restoration of the host MSRs while leaving VT-x.
1430 *
1431 * @param pVCpu The cross context virtual CPU structure.
1432 *
1433 * @remarks No-long-jump zone!!!
1434 */
1435static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1436{
1437 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1438
1439 /*
1440 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1441 */
1442 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1443 {
1444 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1445 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1446 {
1447 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1448 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1449 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1450 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1451 }
1452 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1453 }
1454}
1455
1456
1457#ifdef VBOX_STRICT
1458
1459/**
1460 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1461 *
1462 * @param pVmcsInfo The VMCS info. object.
1463 */
1464static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1465{
1466 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1467
1468 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1469 {
1470 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1471 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1472 uint64_t uVmcsEferMsrVmcs;
1473 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1474 AssertRC(rc);
1475
1476 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1477 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1478 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1479 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1480 }
1481}
1482
1483
1484/**
1485 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1486 * VMCS are correct.
1487 *
1488 * @param pVCpu The cross context virtual CPU structure.
1489 * @param pVmcsInfo The VMCS info. object.
1490 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1491 */
1492static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1493{
1494 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1495
1496 /* Read the various MSR-area counts from the VMCS. */
1497 uint32_t cEntryLoadMsrs;
1498 uint32_t cExitStoreMsrs;
1499 uint32_t cExitLoadMsrs;
1500 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1501 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1502 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1503
1504 /* Verify all the MSR counts are the same. */
1505 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1506 Assert(cExitStoreMsrs == cExitLoadMsrs);
1507 uint32_t const cMsrs = cExitLoadMsrs;
1508
1509 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1510 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1511
1512 /* Verify the MSR counts are within the allocated page size. */
1513 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1514
1515 /* Verify the relevant contents of the MSR areas match. */
1516 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1517 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1518 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1519 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1520 for (uint32_t i = 0; i < cMsrs; i++)
1521 {
1522 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1523 if (fSeparateExitMsrStorePage)
1524 {
1525 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1526 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1527 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1528 }
1529
1530 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1531 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1532 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1533
1534 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1535 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1536 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1537 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1538
1539 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1540 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1541 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1542 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1543
1544 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1545 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1546 {
1547 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1548 if (fIsEferMsr)
1549 {
1550 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1551 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1552 }
1553 else
1554 {
1555 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1556 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1557 if ( pVM->hmr0.s.vmx.fLbr
1558 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1559 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1560 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1561 {
1562 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1563 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1564 pGuestMsrLoad->u32Msr, cMsrs));
1565 }
1566 else if (!fIsNstGstVmcs)
1567 {
1568 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1569 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1570 }
1571 else
1572 {
1573 /*
1574 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1575 * execute a nested-guest with MSR passthrough.
1576 *
1577 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1578 * allow passthrough too.
1579 */
1580 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1581 Assert(pvMsrBitmapNstGst);
1582 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1583 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1584 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1585 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1586 }
1587 }
1588 }
1589
1590 /* Move to the next MSR. */
1591 pHostMsrLoad++;
1592 pGuestMsrLoad++;
1593 pGuestMsrStore++;
1594 }
1595}
1596
1597#endif /* VBOX_STRICT */
1598
1599/**
1600 * Flushes the TLB using EPT.
1601 *
1602 * @param pVCpu The cross context virtual CPU structure of the calling
1603 * EMT. Can be NULL depending on @a enmTlbFlush.
1604 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1605 * enmTlbFlush.
1606 * @param enmTlbFlush Type of flush.
1607 *
1608 * @remarks Caller is responsible for making sure this function is called only
1609 * when NestedPaging is supported and providing @a enmTlbFlush that is
1610 * supported by the CPU.
1611 * @remarks Can be called with interrupts disabled.
1612 */
1613static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1614{
1615 uint64_t au64Descriptor[2];
1616 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1617 au64Descriptor[0] = 0;
1618 else
1619 {
1620 Assert(pVCpu);
1621 Assert(pVmcsInfo);
1622 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1623 }
1624 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1625
1626 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1627 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1628
1629 if ( RT_SUCCESS(rc)
1630 && pVCpu)
1631 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1632}
1633
1634
1635/**
1636 * Flushes the TLB using VPID.
1637 *
1638 * @param pVCpu The cross context virtual CPU structure of the calling
1639 * EMT. Can be NULL depending on @a enmTlbFlush.
1640 * @param enmTlbFlush Type of flush.
1641 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1642 * on @a enmTlbFlush).
1643 *
1644 * @remarks Can be called with interrupts disabled.
1645 */
1646static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1647{
1648 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1649
1650 uint64_t au64Descriptor[2];
1651 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1652 {
1653 au64Descriptor[0] = 0;
1654 au64Descriptor[1] = 0;
1655 }
1656 else
1657 {
1658 AssertPtr(pVCpu);
1659 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1660 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1661 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1662 au64Descriptor[1] = GCPtr;
1663 }
1664
1665 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1666 AssertMsg(rc == VINF_SUCCESS,
1667 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1668
1669 if ( RT_SUCCESS(rc)
1670 && pVCpu)
1671 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1672 NOREF(rc);
1673}
1674
1675
1676/**
1677 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1678 * otherwise there is nothing really to invalidate.
1679 *
1680 * @returns VBox status code.
1681 * @param pVCpu The cross context virtual CPU structure.
1682 * @param GCVirt Guest virtual address of the page to invalidate.
1683 */
1684VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1685{
1686 AssertPtr(pVCpu);
1687 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1688
1689 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1690 {
1691 /*
1692 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1693 * the EPT case. See @bugref{6043} and @bugref{6177}.
1694 *
1695 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1696 * as this function maybe called in a loop with individual addresses.
1697 */
1698 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1699 if (pVM->hmr0.s.vmx.fVpid)
1700 {
1701 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1702 {
1703 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1704 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1705 }
1706 else
1707 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1708 }
1709 else if (pVM->hmr0.s.fNestedPaging)
1710 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1711 }
1712
1713 return VINF_SUCCESS;
1714}
1715
1716
1717/**
1718 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1719 * case where neither EPT nor VPID is supported by the CPU.
1720 *
1721 * @param pHostCpu The HM physical-CPU structure.
1722 * @param pVCpu The cross context virtual CPU structure.
1723 *
1724 * @remarks Called with interrupts disabled.
1725 */
1726static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1727{
1728 AssertPtr(pVCpu);
1729 AssertPtr(pHostCpu);
1730
1731 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1732
1733 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1734 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1735 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1736 pVCpu->hmr0.s.fForceTLBFlush = false;
1737 return;
1738}
1739
1740
1741/**
1742 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1743 *
1744 * @param pHostCpu The HM physical-CPU structure.
1745 * @param pVCpu The cross context virtual CPU structure.
1746 * @param pVmcsInfo The VMCS info. object.
1747 *
1748 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1749 * nomenclature. The reason is, to avoid confusion in compare statements
1750 * since the host-CPU copies are named "ASID".
1751 *
1752 * @remarks Called with interrupts disabled.
1753 */
1754static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1755{
1756#ifdef VBOX_WITH_STATISTICS
1757 bool fTlbFlushed = false;
1758# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1759# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1760 if (!fTlbFlushed) \
1761 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1762 } while (0)
1763#else
1764# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1765# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1766#endif
1767
1768 AssertPtr(pVCpu);
1769 AssertPtr(pHostCpu);
1770 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1771
1772 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1773 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1774 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1775 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1776
1777 /*
1778 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1779 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1780 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1781 * cannot reuse the current ASID anymore.
1782 */
1783 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1784 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1785 {
1786 ++pHostCpu->uCurrentAsid;
1787 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1788 {
1789 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1790 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1791 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1792 }
1793
1794 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1795 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1796 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1797
1798 /*
1799 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1800 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1801 */
1802 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1803 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1804 HMVMX_SET_TAGGED_TLB_FLUSHED();
1805 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1806 }
1807 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1808 {
1809 /*
1810 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1811 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1812 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1813 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1814 * mappings, see @bugref{6568}.
1815 *
1816 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1817 */
1818 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1819 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1820 HMVMX_SET_TAGGED_TLB_FLUSHED();
1821 }
1822 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1823 {
1824 /*
1825 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1826 * address which requires flushing the TLB of EPT cached structures.
1827 *
1828 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1829 */
1830 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1831 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1832 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1833 HMVMX_SET_TAGGED_TLB_FLUSHED();
1834 }
1835
1836
1837 pVCpu->hmr0.s.fForceTLBFlush = false;
1838 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1839
1840 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1841 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1842 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1843 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1844 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1845 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1846 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1847 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1848 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1849
1850 /* Update VMCS with the VPID. */
1851 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1852 AssertRC(rc);
1853
1854#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1855}
1856
1857
1858/**
1859 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1860 *
1861 * @param pHostCpu The HM physical-CPU structure.
1862 * @param pVCpu The cross context virtual CPU structure.
1863 * @param pVmcsInfo The VMCS info. object.
1864 *
1865 * @remarks Called with interrupts disabled.
1866 */
1867static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1868{
1869 AssertPtr(pVCpu);
1870 AssertPtr(pHostCpu);
1871 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1872 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1873 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1874
1875 /*
1876 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1877 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1878 */
1879 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1880 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1881 {
1882 pVCpu->hmr0.s.fForceTLBFlush = true;
1883 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1884 }
1885
1886 /* Check for explicit TLB flushes. */
1887 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1888 {
1889 pVCpu->hmr0.s.fForceTLBFlush = true;
1890 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1891 }
1892
1893 /* Check for TLB flushes while switching to/from a nested-guest. */
1894 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1895 {
1896 pVCpu->hmr0.s.fForceTLBFlush = true;
1897 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1898 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1899 }
1900
1901 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1902 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1903
1904 if (pVCpu->hmr0.s.fForceTLBFlush)
1905 {
1906 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1907 pVCpu->hmr0.s.fForceTLBFlush = false;
1908 }
1909}
1910
1911
1912/**
1913 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1914 *
1915 * @param pHostCpu The HM physical-CPU structure.
1916 * @param pVCpu The cross context virtual CPU structure.
1917 *
1918 * @remarks Called with interrupts disabled.
1919 */
1920static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1921{
1922 AssertPtr(pVCpu);
1923 AssertPtr(pHostCpu);
1924 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1925 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1926 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1927
1928 /*
1929 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1930 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1931 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1932 * cannot reuse the current ASID anymore.
1933 */
1934 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1935 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1936 {
1937 pVCpu->hmr0.s.fForceTLBFlush = true;
1938 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1939 }
1940
1941 /* Check for explicit TLB flushes. */
1942 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1943 {
1944 /*
1945 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1946 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1947 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1948 * include fExplicitFlush's too) - an obscure corner case.
1949 */
1950 pVCpu->hmr0.s.fForceTLBFlush = true;
1951 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1952 }
1953
1954 /* Check for TLB flushes while switching to/from a nested-guest. */
1955 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1956 {
1957 pVCpu->hmr0.s.fForceTLBFlush = true;
1958 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1959 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1960 }
1961
1962 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1963 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1964 if (pVCpu->hmr0.s.fForceTLBFlush)
1965 {
1966 ++pHostCpu->uCurrentAsid;
1967 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1968 {
1969 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1970 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1971 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1972 }
1973
1974 pVCpu->hmr0.s.fForceTLBFlush = false;
1975 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1976 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1977 if (pHostCpu->fFlushAsidBeforeUse)
1978 {
1979 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1980 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1981 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1982 {
1983 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1984 pHostCpu->fFlushAsidBeforeUse = false;
1985 }
1986 else
1987 {
1988 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1989 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1990 }
1991 }
1992 }
1993
1994 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1995 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1996 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1997 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1998 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1999 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
2000 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
2001
2002 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
2003 AssertRC(rc);
2004}
2005
2006
2007/**
2008 * Flushes the guest TLB entry based on CPU capabilities.
2009 *
2010 * @param pHostCpu The HM physical-CPU structure.
2011 * @param pVCpu The cross context virtual CPU structure.
2012 * @param pVmcsInfo The VMCS info. object.
2013 *
2014 * @remarks Called with interrupts disabled.
2015 */
2016static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2017{
2018#ifdef HMVMX_ALWAYS_FLUSH_TLB
2019 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2020#endif
2021 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2022 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2023 {
2024 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2025 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2026 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2027 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2028 default:
2029 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2030 break;
2031 }
2032 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2033}
2034
2035
2036/**
2037 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2038 * TLB entries from the host TLB before VM-entry.
2039 *
2040 * @returns VBox status code.
2041 * @param pVM The cross context VM structure.
2042 */
2043static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2044{
2045 /*
2046 * Determine optimal flush type for nested paging.
2047 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2048 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2049 */
2050 if (pVM->hmr0.s.fNestedPaging)
2051 {
2052 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2053 {
2054 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2055 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2056 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2057 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2058 else
2059 {
2060 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2061 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2062 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2063 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2064 }
2065
2066 /* Make sure the write-back cacheable memory type for EPT is supported. */
2067 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2068 {
2069 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2070 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2071 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2072 }
2073
2074 /* EPT requires a page-walk length of 4. */
2075 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2076 {
2077 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2078 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2079 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2080 }
2081 }
2082 else
2083 {
2084 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2085 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2086 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2087 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2088 }
2089 }
2090
2091 /*
2092 * Determine optimal flush type for VPID.
2093 */
2094 if (pVM->hmr0.s.vmx.fVpid)
2095 {
2096 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2097 {
2098 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2099 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2100 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2101 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2102 else
2103 {
2104 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2105 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2106 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2107 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2108 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2109 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2110 pVM->hmr0.s.vmx.fVpid = false;
2111 }
2112 }
2113 else
2114 {
2115 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2116 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2117 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2118 pVM->hmr0.s.vmx.fVpid = false;
2119 }
2120 }
2121
2122 /*
2123 * Setup the handler for flushing tagged-TLBs.
2124 */
2125 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2126 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2127 else if (pVM->hmr0.s.fNestedPaging)
2128 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2129 else if (pVM->hmr0.s.vmx.fVpid)
2130 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2131 else
2132 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2133
2134
2135 /*
2136 * Copy out the result to ring-3.
2137 */
2138 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2139 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2140 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2141 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2142 return VINF_SUCCESS;
2143}
2144
2145
2146/**
2147 * Sets up the LBR MSR ranges based on the host CPU.
2148 *
2149 * @returns VBox status code.
2150 * @param pVM The cross context VM structure.
2151 *
2152 * @sa nemR3DarwinSetupLbrMsrRange
2153 */
2154static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2155{
2156 Assert(pVM->hmr0.s.vmx.fLbr);
2157 uint32_t idLbrFromIpMsrFirst;
2158 uint32_t idLbrFromIpMsrLast;
2159 uint32_t idLbrToIpMsrFirst;
2160 uint32_t idLbrToIpMsrLast;
2161 uint32_t idLbrTosMsr;
2162
2163 /*
2164 * Determine the LBR MSRs supported for this host CPU family and model.
2165 *
2166 * See Intel spec. 17.4.8 "LBR Stack".
2167 * See Intel "Model-Specific Registers" spec.
2168 */
2169 uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
2170 | g_CpumHostFeatures.s.uModel;
2171 switch (uFamilyModel)
2172 {
2173 case 0x0f01: case 0x0f02:
2174 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2175 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2176 idLbrToIpMsrFirst = 0x0;
2177 idLbrToIpMsrLast = 0x0;
2178 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2179 break;
2180
2181 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2182 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2183 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2184 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2185 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2186 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2187 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2188 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2189 break;
2190
2191 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2192 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2193 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2194 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2195 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2196 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2197 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2198 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2199 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2200 break;
2201
2202 case 0x0617: case 0x061d: case 0x060f:
2203 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2204 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2205 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2206 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2207 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2208 break;
2209
2210 /* Atom and related microarchitectures we don't care about:
2211 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2212 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2213 case 0x0636: */
2214 /* All other CPUs: */
2215 default:
2216 {
2217 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2218 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2219 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2220 }
2221 }
2222
2223 /*
2224 * Validate.
2225 */
2226 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2227 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2228 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2229 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2230 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2231 {
2232 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2233 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2234 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2235 }
2236 NOREF(pVCpu0);
2237
2238 /*
2239 * Update the LBR info. to the VM struct. for use later.
2240 */
2241 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2242
2243 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2244 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2245
2246 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2247 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2248 return VINF_SUCCESS;
2249}
2250
2251#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2252
2253/**
2254 * Sets up the shadow VMCS fields arrays.
2255 *
2256 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2257 * executing the guest.
2258 *
2259 * @returns VBox status code.
2260 * @param pVM The cross context VM structure.
2261 */
2262static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2263{
2264 /*
2265 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2266 * when the host does not support it.
2267 */
2268 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2269 if ( !fGstVmwriteAll
2270 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2271 { /* likely. */ }
2272 else
2273 {
2274 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2275 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2276 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2277 }
2278
2279 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2280 uint32_t cRwFields = 0;
2281 uint32_t cRoFields = 0;
2282 for (uint32_t i = 0; i < cVmcsFields; i++)
2283 {
2284 VMXVMCSFIELD VmcsField;
2285 VmcsField.u = g_aVmcsFields[i];
2286
2287 /*
2288 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2289 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2290 * in the shadow VMCS fields array as they would be redundant.
2291 *
2292 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2293 * we must not include it in the shadow VMCS fields array. Guests attempting to
2294 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2295 * the required behavior.
2296 */
2297 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2298 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2299 {
2300 /*
2301 * Read-only fields are placed in a separate array so that while syncing shadow
2302 * VMCS fields later (which is more performance critical) we can avoid branches.
2303 *
2304 * However, if the guest can write to all fields (including read-only fields),
2305 * we treat it a as read/write field. Otherwise, writing to these fields would
2306 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2307 */
2308 if ( fGstVmwriteAll
2309 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2310 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2311 else
2312 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2313 }
2314 }
2315
2316 /* Update the counts. */
2317 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2318 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2319 return VINF_SUCCESS;
2320}
2321
2322
2323/**
2324 * Sets up the VMREAD and VMWRITE bitmaps.
2325 *
2326 * @param pVM The cross context VM structure.
2327 */
2328static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2329{
2330 /*
2331 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2332 */
2333 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2334 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2335 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2336 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2337 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2338
2339 /*
2340 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2341 * VMREAD and VMWRITE bitmaps.
2342 */
2343 {
2344 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2345 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2346 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2347 {
2348 uint32_t const uVmcsField = paShadowVmcsFields[i];
2349 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2350 Assert(uVmcsField >> 3 < cbBitmap);
2351 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2352 ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
2353 }
2354 }
2355
2356 /*
2357 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2358 * if the host supports VMWRITE to all supported VMCS fields.
2359 */
2360 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2361 {
2362 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2363 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2364 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2365 {
2366 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2367 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2368 Assert(uVmcsField >> 3 < cbBitmap);
2369 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2370 }
2371 }
2372}
2373
2374#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2375
2376/**
2377 * Sets up the virtual-APIC page address for the VMCS.
2378 *
2379 * @param pVmcsInfo The VMCS info. object.
2380 */
2381DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2382{
2383 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2384 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2385 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2386 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2387 AssertRC(rc);
2388}
2389
2390
2391/**
2392 * Sets up the MSR-bitmap address for the VMCS.
2393 *
2394 * @param pVmcsInfo The VMCS info. object.
2395 */
2396DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2397{
2398 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2399 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2400 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2401 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2402 AssertRC(rc);
2403}
2404
2405
2406/**
2407 * Sets up the APIC-access page address for the VMCS.
2408 *
2409 * @param pVCpu The cross context virtual CPU structure.
2410 */
2411DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2412{
2413 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2414 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2415 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2416 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2417 AssertRC(rc);
2418}
2419
2420#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2421
2422/**
2423 * Sets up the VMREAD bitmap address for the VMCS.
2424 *
2425 * @param pVCpu The cross context virtual CPU structure.
2426 */
2427DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2428{
2429 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2430 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2431 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2432 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2433 AssertRC(rc);
2434}
2435
2436
2437/**
2438 * Sets up the VMWRITE bitmap address for the VMCS.
2439 *
2440 * @param pVCpu The cross context virtual CPU structure.
2441 */
2442DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2443{
2444 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2445 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2446 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2447 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2448 AssertRC(rc);
2449}
2450
2451#endif
2452
2453/**
2454 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2455 * in the VMCS.
2456 *
2457 * @returns VBox status code.
2458 * @param pVmcsInfo The VMCS info. object.
2459 */
2460DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2461{
2462 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2463 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2464 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2465
2466 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2467 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2468 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2469
2470 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2471 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2472 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2473
2474 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2475 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2476 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2477 return VINF_SUCCESS;
2478}
2479
2480
2481/**
2482 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2483 *
2484 * @param pVCpu The cross context virtual CPU structure.
2485 * @param pVmcsInfo The VMCS info. object.
2486 */
2487static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2488{
2489 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2490
2491 /*
2492 * By default, ensure guest attempts to access any MSR cause VM-exits.
2493 * This shall later be relaxed for specific MSRs as necessary.
2494 *
2495 * Note: For nested-guests, the entire bitmap will be merged prior to
2496 * executing the nested-guest using hardware-assisted VMX and hence there
2497 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2498 */
2499 Assert(pVmcsInfo->pvMsrBitmap);
2500 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2501
2502 /*
2503 * The guest can access the following MSRs (read, write) without causing
2504 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2505 */
2506 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2507 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2508 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2509 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2510 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2511 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2512
2513 /*
2514 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2515 * associated with then. We never need to intercept access (writes need to be
2516 * executed without causing a VM-exit, reads will #GP fault anyway).
2517 *
2518 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2519 * read/write them. We swap the guest/host MSR value using the
2520 * auto-load/store MSR area.
2521 */
2522 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2523 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2524 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2525 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2526 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2527 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2528
2529 /*
2530 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2531 * required for 64-bit guests.
2532 */
2533 if (pVM->hmr0.s.fAllow64BitGuests)
2534 {
2535 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2536 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2537 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2538 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2539 }
2540
2541 /*
2542 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2543 */
2544#ifdef VBOX_STRICT
2545 Assert(pVmcsInfo->pvMsrBitmap);
2546 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2547 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2548#endif
2549}
2550
2551
2552/**
2553 * Sets up pin-based VM-execution controls in the VMCS.
2554 *
2555 * @returns VBox status code.
2556 * @param pVCpu The cross context virtual CPU structure.
2557 * @param pVmcsInfo The VMCS info. object.
2558 */
2559static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2560{
2561 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2562 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2563 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2564
2565 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2566 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2567
2568 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2569 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2570
2571 /* Enable the VMX-preemption timer. */
2572 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2573 {
2574 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2575 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2576 }
2577
2578#if 0
2579 /* Enable posted-interrupt processing. */
2580 if (pVM->hm.s.fPostedIntrs)
2581 {
2582 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2583 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2584 fVal |= VMX_PIN_CTLS_POSTED_INT;
2585 }
2586#endif
2587
2588 if ((fVal & fZap) != fVal)
2589 {
2590 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2591 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2592 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2593 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2594 }
2595
2596 /* Commit it to the VMCS and update our cache. */
2597 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2598 AssertRC(rc);
2599 pVmcsInfo->u32PinCtls = fVal;
2600
2601 return VINF_SUCCESS;
2602}
2603
2604
2605/**
2606 * Sets up secondary processor-based VM-execution controls in the VMCS.
2607 *
2608 * @returns VBox status code.
2609 * @param pVCpu The cross context virtual CPU structure.
2610 * @param pVmcsInfo The VMCS info. object.
2611 */
2612static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2613{
2614 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2615 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2616 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2617
2618 /* WBINVD causes a VM-exit. */
2619 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2620 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2621
2622 /* Enable EPT (aka nested-paging). */
2623 if (pVM->hmr0.s.fNestedPaging)
2624 fVal |= VMX_PROC_CTLS2_EPT;
2625
2626 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2627 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2628 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2629 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2630 fVal |= VMX_PROC_CTLS2_INVPCID;
2631
2632 /* Enable VPID. */
2633 if (pVM->hmr0.s.vmx.fVpid)
2634 fVal |= VMX_PROC_CTLS2_VPID;
2635
2636 /* Enable unrestricted guest execution. */
2637 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2638 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2639
2640#if 0
2641 if (pVM->hm.s.fVirtApicRegs)
2642 {
2643 /* Enable APIC-register virtualization. */
2644 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2645 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2646
2647 /* Enable virtual-interrupt delivery. */
2648 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2649 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2650 }
2651#endif
2652
2653 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2654 where the TPR shadow resides. */
2655 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2656 * done dynamically. */
2657 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2658 {
2659 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2660 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2661 }
2662
2663 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2664 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2665 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2666 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2667 fVal |= VMX_PROC_CTLS2_RDTSCP;
2668
2669 /* Enable Pause-Loop exiting. */
2670 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2671 && pVM->hm.s.vmx.cPleGapTicks
2672 && pVM->hm.s.vmx.cPleWindowTicks)
2673 {
2674 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2675
2676 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2677 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2678 }
2679
2680 if ((fVal & fZap) != fVal)
2681 {
2682 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2683 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2684 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2685 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2686 }
2687
2688 /* Commit it to the VMCS and update our cache. */
2689 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2690 AssertRC(rc);
2691 pVmcsInfo->u32ProcCtls2 = fVal;
2692
2693 return VINF_SUCCESS;
2694}
2695
2696
2697/**
2698 * Sets up processor-based VM-execution controls in the VMCS.
2699 *
2700 * @returns VBox status code.
2701 * @param pVCpu The cross context virtual CPU structure.
2702 * @param pVmcsInfo The VMCS info. object.
2703 */
2704static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2705{
2706 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2707 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2708 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2709
2710 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2711 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2712 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2713 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2714 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2715 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2716 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2717
2718 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2719 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2720 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2721 {
2722 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2723 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2724 }
2725
2726 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2727 if (!pVM->hmr0.s.fNestedPaging)
2728 {
2729 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2730 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2731 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2732 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2733 }
2734
2735 /* Use TPR shadowing if supported by the CPU. */
2736 if ( PDMHasApic(pVM)
2737 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2738 {
2739 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2740 /* CR8 writes cause a VM-exit based on TPR threshold. */
2741 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2742 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2743 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2744 }
2745 else
2746 {
2747 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2748 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2749 if (pVM->hmr0.s.fAllow64BitGuests)
2750 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2751 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2752 }
2753
2754 /* Use MSR-bitmaps if supported by the CPU. */
2755 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2756 {
2757 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2758 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2759 }
2760
2761 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2762 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2763 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2764
2765 if ((fVal & fZap) != fVal)
2766 {
2767 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2768 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2769 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2770 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2771 }
2772
2773 /* Commit it to the VMCS and update our cache. */
2774 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2775 AssertRC(rc);
2776 pVmcsInfo->u32ProcCtls = fVal;
2777
2778 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2779 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2780 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2781
2782 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2783 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2784 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2785
2786 /* Sanity check, should not really happen. */
2787 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2788 { /* likely */ }
2789 else
2790 {
2791 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2792 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2793 }
2794
2795 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2796 return VINF_SUCCESS;
2797}
2798
2799
2800/**
2801 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2802 * Processor-based VM-execution) control fields in the VMCS.
2803 *
2804 * @returns VBox status code.
2805 * @param pVCpu The cross context virtual CPU structure.
2806 * @param pVmcsInfo The VMCS info. object.
2807 */
2808static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2809{
2810#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2811 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2812 {
2813 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2814 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2815 }
2816#endif
2817
2818 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2819 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2820 AssertRC(rc);
2821
2822 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2823 if (RT_SUCCESS(rc))
2824 {
2825 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2826 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2827
2828 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2829 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2830
2831 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2832 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2833
2834 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2835 {
2836 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2837 AssertRC(rc);
2838 }
2839 return VINF_SUCCESS;
2840 }
2841 else
2842 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2843 return rc;
2844}
2845
2846
2847/**
2848 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2849 *
2850 * We shall setup those exception intercepts that don't change during the
2851 * lifetime of the VM here. The rest are done dynamically while loading the
2852 * guest state.
2853 *
2854 * @param pVCpu The cross context virtual CPU structure.
2855 * @param pVmcsInfo The VMCS info. object.
2856 */
2857static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2858{
2859 /*
2860 * The following exceptions are always intercepted:
2861 *
2862 * #AC - To prevent the guest from hanging the CPU and for dealing with
2863 * split-lock detecting host configs.
2864 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2865 * recursive #DBs can cause a CPU hang.
2866 * #PF - To sync our shadow page tables when nested-paging is not used.
2867 */
2868 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2869 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2870 | RT_BIT(X86_XCPT_DB)
2871 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2872
2873 /* Commit it to the VMCS. */
2874 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2875 AssertRC(rc);
2876
2877 /* Update our cache of the exception bitmap. */
2878 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2879}
2880
2881
2882#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2883/**
2884 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2885 *
2886 * @returns VBox status code.
2887 * @param pVmcsInfo The VMCS info. object.
2888 */
2889static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2890{
2891 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2892 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2893 AssertRC(rc);
2894
2895 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2896 if (RT_SUCCESS(rc))
2897 {
2898 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2899 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2900
2901 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2902 Assert(!pVmcsInfo->u64Cr0Mask);
2903 Assert(!pVmcsInfo->u64Cr4Mask);
2904 return VINF_SUCCESS;
2905 }
2906 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2907 return rc;
2908}
2909#endif
2910
2911
2912/**
2913 * Selector FNHMSVMVMRUN implementation.
2914 */
2915static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2916{
2917 hmR0VmxUpdateStartVmFunction(pVCpu);
2918 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2919}
2920
2921
2922/**
2923 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2924 * VMX.
2925 *
2926 * @returns VBox status code.
2927 * @param pVCpu The cross context virtual CPU structure.
2928 * @param pVmcsInfo The VMCS info. object.
2929 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2930 */
2931static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2932{
2933 Assert(pVmcsInfo->pvVmcs);
2934 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2935
2936 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2937 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2938 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2939
2940 LogFlowFunc(("\n"));
2941
2942 /*
2943 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2944 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2945 */
2946 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2947 if (RT_SUCCESS(rc))
2948 {
2949 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2950 if (RT_SUCCESS(rc))
2951 {
2952 /*
2953 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2954 * The host is always 64-bit since we no longer support 32-bit hosts.
2955 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2956 */
2957 if (!fIsNstGstVmcs)
2958 {
2959 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2960 if (RT_SUCCESS(rc))
2961 {
2962 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2963 if (RT_SUCCESS(rc))
2964 {
2965 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2966 if (RT_SUCCESS(rc))
2967 {
2968 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2969#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2970 /*
2971 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2972 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2973 * making it fit for use when VMCS shadowing is later enabled.
2974 */
2975 if (pVmcsInfo->pvShadowVmcs)
2976 {
2977 VMXVMCSREVID VmcsRevId;
2978 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2979 VmcsRevId.n.fIsShadowVmcs = 1;
2980 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2981 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2982 if (RT_SUCCESS(rc))
2983 { /* likely */ }
2984 else
2985 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2986 }
2987#endif
2988 }
2989 else
2990 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2991 }
2992 else
2993 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2994 }
2995 else
2996 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2997 }
2998 else
2999 {
3000#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3001 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
3002 if (RT_SUCCESS(rc))
3003 { /* likely */ }
3004 else
3005 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
3006#else
3007 AssertFailed();
3008#endif
3009 }
3010 }
3011 else
3012 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", pszVmcs, rc));
3013 }
3014 else
3015 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3016
3017 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3018 if (RT_SUCCESS(rc))
3019 {
3020 rc = hmR0VmxClearVmcs(pVmcsInfo);
3021 if (RT_SUCCESS(rc))
3022 { /* likely */ }
3023 else
3024 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3025 }
3026
3027 /*
3028 * Update the last-error record both for failures and success, so we
3029 * can propagate the status code back to ring-3 for diagnostics.
3030 */
3031 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3032 NOREF(pszVmcs);
3033 return rc;
3034}
3035
3036
3037/**
3038 * Does global VT-x initialization (called during module initialization).
3039 *
3040 * @returns VBox status code.
3041 */
3042VMMR0DECL(int) VMXR0GlobalInit(void)
3043{
3044#ifdef HMVMX_USE_FUNCTION_TABLE
3045 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3046# ifdef VBOX_STRICT
3047 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3048 Assert(g_aVMExitHandlers[i].pfn);
3049# endif
3050#endif
3051
3052 /*
3053 * For detecting whether DR6.RTM is writable or not (done in VMXR0InitVM).
3054 */
3055 RTTHREADPREEMPTSTATE Preempt = RTTHREADPREEMPTSTATE_INITIALIZER;
3056 RTThreadPreemptDisable(&Preempt);
3057 RTCCUINTXREG const fSavedDr6 = ASMGetDR6();
3058 ASMSetDR6(0);
3059 RTCCUINTXREG const fZeroDr6 = ASMGetDR6();
3060 ASMSetDR6(fSavedDr6);
3061 RTThreadPreemptRestore(&Preempt);
3062
3063 g_fDr6Zeroed = fZeroDr6;
3064
3065 return VINF_SUCCESS;
3066}
3067
3068
3069/**
3070 * Does global VT-x termination (called during module termination).
3071 */
3072VMMR0DECL(void) VMXR0GlobalTerm()
3073{
3074 /* Nothing to do currently. */
3075}
3076
3077
3078/**
3079 * Sets up and activates VT-x on the current CPU.
3080 *
3081 * @returns VBox status code.
3082 * @param pHostCpu The HM physical-CPU structure.
3083 * @param pVM The cross context VM structure. Can be
3084 * NULL after a host resume operation.
3085 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3086 * fEnabledByHost is @c true).
3087 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3088 * @a fEnabledByHost is @c true).
3089 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3090 * enable VT-x on the host.
3091 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3092 */
3093VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3094 PCSUPHWVIRTMSRS pHwvirtMsrs)
3095{
3096 AssertPtr(pHostCpu);
3097 AssertPtr(pHwvirtMsrs);
3098 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3099
3100 /* Enable VT-x if it's not already enabled by the host. */
3101 if (!fEnabledByHost)
3102 {
3103 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3104 if (RT_FAILURE(rc))
3105 return rc;
3106 }
3107
3108 /*
3109 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3110 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3111 * invalidated when flushing by VPID.
3112 */
3113 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3114 {
3115 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3116 pHostCpu->fFlushAsidBeforeUse = false;
3117 }
3118 else
3119 pHostCpu->fFlushAsidBeforeUse = true;
3120
3121 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3122 ++pHostCpu->cTlbFlushes;
3123
3124 return VINF_SUCCESS;
3125}
3126
3127
3128/**
3129 * Deactivates VT-x on the current CPU.
3130 *
3131 * @returns VBox status code.
3132 * @param pHostCpu The HM physical-CPU structure.
3133 * @param pvCpuPage Pointer to the VMXON region.
3134 * @param HCPhysCpuPage Physical address of the VMXON region.
3135 *
3136 * @remarks This function should never be called when SUPR0EnableVTx() or
3137 * similar was used to enable VT-x on the host.
3138 */
3139VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3140{
3141 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3142
3143 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3144 return hmR0VmxLeaveRootMode(pHostCpu);
3145}
3146
3147
3148/**
3149 * Does per-VM VT-x initialization.
3150 *
3151 * @returns VBox status code.
3152 * @param pVM The cross context VM structure.
3153 */
3154VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3155{
3156 AssertPtr(pVM);
3157 LogFlowFunc(("pVM=%p\n", pVM));
3158
3159 hmR0VmxStructsInit(pVM);
3160 int rc = hmR0VmxStructsAlloc(pVM);
3161 if (RT_FAILURE(rc))
3162 {
3163 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3164 return rc;
3165 }
3166
3167 /* Setup the crash dump page. */
3168#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3169 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3170 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3171#endif
3172
3173 /*
3174 * Copy out stuff that's for ring-3 and determin default configuration.
3175 */
3176 pVM->hm.s.ForR3.vmx.u64HostDr6Zeroed = g_fDr6Zeroed;
3177
3178 /* Since we do not emulate RTM, make sure DR6.RTM cannot be cleared by the
3179 guest and cause confusion there. It appears that the DR6.RTM bit can be
3180 cleared even if TSX-NI is disabled (microcode update / system / whatever). */
3181#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3182 if (pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg == 0)
3183 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = g_fDr6Zeroed != X86_DR6_RA1_MASK;
3184 else
3185#endif
3186 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg > 0;
3187 pVM->hm.s.ForR3.vmx.fAlwaysInterceptMovDRx = pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3188
3189 return VINF_SUCCESS;
3190}
3191
3192
3193/**
3194 * Does per-VM VT-x termination.
3195 *
3196 * @returns VBox status code.
3197 * @param pVM The cross context VM structure.
3198 */
3199VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3200{
3201 AssertPtr(pVM);
3202 LogFlowFunc(("pVM=%p\n", pVM));
3203
3204#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3205 if (pVM->hmr0.s.vmx.pbScratch)
3206 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3207#endif
3208 hmR0VmxStructsFree(pVM);
3209 return VINF_SUCCESS;
3210}
3211
3212
3213/**
3214 * Sets up the VM for execution using hardware-assisted VMX.
3215 * This function is only called once per-VM during initialization.
3216 *
3217 * @returns VBox status code.
3218 * @param pVM The cross context VM structure.
3219 */
3220VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3221{
3222 AssertPtr(pVM);
3223 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3224
3225 LogFlowFunc(("pVM=%p\n", pVM));
3226
3227 /*
3228 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3229 * without causing a #GP.
3230 */
3231 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3232 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3233 { /* likely */ }
3234 else
3235 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3236
3237 /*
3238 * Check that nested paging is supported if enabled and copy over the flag to the
3239 * ring-0 only structure.
3240 */
3241 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3242 AssertReturn( !fNestedPaging
3243 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3244 VERR_INCOMPATIBLE_CONFIG);
3245 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3246 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3247
3248 /*
3249 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3250 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3251 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3252 */
3253 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3254 AssertReturn( !fUnrestrictedGuest
3255 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3256 && fNestedPaging),
3257 VERR_INCOMPATIBLE_CONFIG);
3258 if ( !fUnrestrictedGuest
3259 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3260 || !pVM->hm.s.vmx.pRealModeTSS))
3261 {
3262 LogRelFunc(("Invalid real-on-v86 state.\n"));
3263 return VERR_INTERNAL_ERROR;
3264 }
3265 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3266
3267 /* Initialize these always, see hmR3InitFinalizeR0().*/
3268 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3269 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3270
3271 /* Setup the tagged-TLB flush handlers. */
3272 int rc = hmR0VmxSetupTaggedTlb(pVM);
3273 if (RT_FAILURE(rc))
3274 {
3275 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3276 return rc;
3277 }
3278
3279 /* Determine LBR capabilities. */
3280 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3281 if (pVM->hmr0.s.vmx.fLbr)
3282 {
3283 rc = hmR0VmxSetupLbrMsrRange(pVM);
3284 if (RT_FAILURE(rc))
3285 {
3286 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3287 return rc;
3288 }
3289 }
3290
3291#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3292 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3293 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3294 {
3295 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3296 if (RT_SUCCESS(rc))
3297 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3298 else
3299 {
3300 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3301 return rc;
3302 }
3303 }
3304#endif
3305
3306 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3307 {
3308 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3309 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3310
3311 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3312
3313 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3314 if (RT_SUCCESS(rc))
3315 {
3316#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3317 if (pVM->cpum.ro.GuestFeatures.fVmx)
3318 {
3319 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3320 if (RT_SUCCESS(rc))
3321 { /* likely */ }
3322 else
3323 {
3324 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3325 return rc;
3326 }
3327 }
3328#endif
3329 }
3330 else
3331 {
3332 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3333 return rc;
3334 }
3335 }
3336
3337 return VINF_SUCCESS;
3338}
3339
3340
3341/**
3342 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3343 * the VMCS.
3344 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3345 */
3346static uint64_t hmR0VmxExportHostControlRegs(void)
3347{
3348 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3349 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3350 uint64_t uHostCr4 = ASMGetCR4();
3351 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3352 return uHostCr4;
3353}
3354
3355
3356/**
3357 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3358 * the host-state area in the VMCS.
3359 *
3360 * @returns VBox status code.
3361 * @param pVCpu The cross context virtual CPU structure.
3362 * @param uHostCr4 The host CR4 value.
3363 */
3364static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3365{
3366 /*
3367 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3368 * will be messed up. We should -not- save the messed up state without restoring
3369 * the original host-state, see @bugref{7240}.
3370 *
3371 * This apparently can happen (most likely the FPU changes), deal with it rather than
3372 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3373 */
3374 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3375 {
3376 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3377 pVCpu->idCpu));
3378 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3379 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3380 }
3381
3382 /*
3383 * Get all the host info.
3384 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3385 * without also checking the cpuid bit.
3386 */
3387 uint32_t fRestoreHostFlags;
3388#if RT_INLINE_ASM_EXTERNAL
3389 if (uHostCr4 & X86_CR4_FSGSBASE)
3390 {
3391 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3392 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3393 }
3394 else
3395 {
3396 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3397 fRestoreHostFlags = 0;
3398 }
3399 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3400 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3401 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3402 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3403#else
3404 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3405 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3406 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3407 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3408 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3409 if (uHostCr4 & X86_CR4_FSGSBASE)
3410 {
3411 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3412 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3413 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3414 }
3415 else
3416 {
3417 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3418 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3419 fRestoreHostFlags = 0;
3420 }
3421 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3422 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3423 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3424 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3425 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3426#endif
3427
3428 /*
3429 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3430 * gain VM-entry and restore them before we get preempted.
3431 *
3432 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3433 */
3434 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3435 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3436 {
3437 if (!(uSelAll & X86_SEL_LDT))
3438 {
3439#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3440 do { \
3441 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3442 if ((a_uVmcsVar) & X86_SEL_RPL) \
3443 { \
3444 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3445 (a_uVmcsVar) = 0; \
3446 } \
3447 } while (0)
3448 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3449 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3450 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3451 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3452#undef VMXLOCAL_ADJUST_HOST_SEG
3453 }
3454 else
3455 {
3456#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3457 do { \
3458 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3459 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3460 { \
3461 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3462 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3463 else \
3464 { \
3465 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3466 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3467 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3468 } \
3469 (a_uVmcsVar) = 0; \
3470 } \
3471 } while (0)
3472 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3473 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3474 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3475 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3476#undef VMXLOCAL_ADJUST_HOST_SEG
3477 }
3478 }
3479
3480 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3481 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3482 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3483 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3484 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3485 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3486 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3487 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3488
3489 /*
3490 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3491 * them to the maximum limit (0xffff) on every VM-exit.
3492 */
3493 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3494 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3495
3496 /*
3497 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3498 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3499 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3500 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3501 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3502 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3503 * at 0xffff on hosts where we are sure it won't cause trouble.
3504 */
3505#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3506 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3507#else
3508 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3509#endif
3510 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3511
3512 /*
3513 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3514 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3515 * RPL should be too in most cases.
3516 */
3517 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3518 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3519 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3520 VERR_VMX_INVALID_HOST_STATE);
3521
3522 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3523 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3524
3525 /*
3526 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3527 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3528 * restoration if the host has something else. Task switching is not supported in 64-bit
3529 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3530 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3531 *
3532 * [1] See Intel spec. 3.5 "System Descriptor Types".
3533 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3534 */
3535 Assert(pDesc->System.u4Type == 11);
3536 if ( pDesc->System.u16LimitLow != 0x67
3537 || pDesc->System.u4LimitHigh)
3538 {
3539 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3540
3541 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3542 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3543 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3544 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3545 {
3546 /* The GDT is read-only but the writable GDT is available. */
3547 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3548 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3549 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3550 AssertRCReturn(rc, rc);
3551 }
3552 }
3553
3554 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3555
3556 /*
3557 * Do all the VMCS updates in one block to assist nested virtualization.
3558 */
3559 int rc;
3560 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3561 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3562 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3563 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3564 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3565 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3566 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3567 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3568 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3569 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3570 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3571 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3572
3573 return VINF_SUCCESS;
3574}
3575
3576
3577/**
3578 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3579 * host-state area of the VMCS.
3580 *
3581 * These MSRs will be automatically restored on the host after every successful
3582 * VM-exit.
3583 *
3584 * @param pVCpu The cross context virtual CPU structure.
3585 *
3586 * @remarks No-long-jump zone!!!
3587 */
3588static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3589{
3590 AssertPtr(pVCpu);
3591
3592 /*
3593 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3594 * rather than swapping them on every VM-entry.
3595 */
3596 hmR0VmxLazySaveHostMsrs(pVCpu);
3597
3598 /*
3599 * Host Sysenter MSRs.
3600 */
3601 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3602 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3603 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3604
3605 /*
3606 * Host EFER MSR.
3607 *
3608 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3609 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3610 */
3611 if (g_fHmVmxSupportsVmcsEfer)
3612 {
3613 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3614 AssertRC(rc);
3615 }
3616
3617 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3618 * vmxHCExportGuestEntryExitCtls(). */
3619}
3620
3621
3622/**
3623 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3624 *
3625 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3626 * these two bits are handled by VM-entry, see vmxHCExportGuestEntryExitCtls().
3627 *
3628 * @returns true if we need to load guest EFER, false otherwise.
3629 * @param pVCpu The cross context virtual CPU structure.
3630 * @param pVmxTransient The VMX-transient structure.
3631 *
3632 * @remarks Requires EFER, CR4.
3633 * @remarks No-long-jump zone!!!
3634 */
3635static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3636{
3637#ifdef HMVMX_ALWAYS_SWAP_EFER
3638 RT_NOREF2(pVCpu, pVmxTransient);
3639 return true;
3640#else
3641 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3642 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3643 uint64_t const u64GuestEfer = pCtx->msrEFER;
3644
3645# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3646 /*
3647 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3648 * the nested-guest.
3649 */
3650 if ( pVmxTransient->fIsNestedGuest
3651 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3652 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3653 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3654 return true;
3655# else
3656 RT_NOREF(pVmxTransient);
3657#endif
3658
3659 /*
3660 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3661 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3662 */
3663 if ( CPUMIsGuestInLongModeEx(pCtx)
3664 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3665 return true;
3666
3667 /*
3668 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3669 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3670 *
3671 * See Intel spec. 4.5 "IA-32e Paging".
3672 * See Intel spec. 4.1.1 "Three Paging Modes".
3673 *
3674 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3675 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3676 */
3677 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3678 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3679 if ( (pCtx->cr4 & X86_CR4_PAE)
3680 && (pCtx->cr0 & X86_CR0_PG))
3681 {
3682 /*
3683 * If nested paging is not used, verify that the guest paging mode matches the
3684 * shadow paging mode which is/will be placed in the VMCS (which is what will
3685 * actually be used while executing the guest and not the CR4 shadow value).
3686 */
3687 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3688 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3689 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3690 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3691 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3692 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3693 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3694 {
3695 /* Verify that the host is NX capable. */
3696 Assert(g_CpumHostFeatures.s.fNoExecute);
3697 return true;
3698 }
3699 }
3700
3701 return false;
3702#endif
3703}
3704
3705
3706/**
3707 * Exports the guest's RSP into the guest-state area in the VMCS.
3708 *
3709 * @param pVCpu The cross context virtual CPU structure.
3710 *
3711 * @remarks No-long-jump zone!!!
3712 */
3713static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3714{
3715 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3716 {
3717 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3718
3719 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3720 AssertRC(rc);
3721
3722 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3723 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3724 }
3725}
3726
3727
3728/**
3729 * Exports the guest hardware-virtualization state.
3730 *
3731 * @returns VBox status code.
3732 * @param pVCpu The cross context virtual CPU structure.
3733 * @param pVmxTransient The VMX-transient structure.
3734 *
3735 * @remarks No-long-jump zone!!!
3736 */
3737static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3738{
3739 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3740 {
3741#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3742 /*
3743 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3744 * VMCS shadowing.
3745 */
3746 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3747 {
3748 /*
3749 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3750 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3751 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3752 *
3753 * We check for VMX root mode here in case the guest executes VMXOFF without
3754 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3755 * not clear the current VMCS pointer.
3756 */
3757 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3758 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3759 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3760 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3761 {
3762 /* Paranoia. */
3763 Assert(!pVmxTransient->fIsNestedGuest);
3764
3765 /*
3766 * For performance reasons, also check if the nested hypervisor's current VMCS
3767 * was newly loaded or modified before copying it to the shadow VMCS.
3768 */
3769 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3770 {
3771 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3772 AssertRCReturn(rc, rc);
3773 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3774 }
3775 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3776 }
3777 else
3778 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3779 }
3780#else
3781 NOREF(pVmxTransient);
3782#endif
3783 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3784 }
3785 return VINF_SUCCESS;
3786}
3787
3788
3789/**
3790 * Exports the guest debug registers into the guest-state area in the VMCS.
3791 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3792 *
3793 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3794 *
3795 * @returns VBox status code.
3796 * @param pVCpu The cross context virtual CPU structure.
3797 * @param pVmxTransient The VMX-transient structure.
3798 *
3799 * @remarks No-long-jump zone!!!
3800 */
3801static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3802{
3803 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3804
3805 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3806 * stepping. */
3807 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3808 if (pVmxTransient->fIsNestedGuest)
3809 {
3810 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3811 AssertRC(rc);
3812
3813 /*
3814 * We don't want to always intercept MOV DRx for nested-guests as it causes
3815 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3816 * Instead, they are strictly only requested when the nested hypervisor intercepts
3817 * them -- handled while merging VMCS controls.
3818 *
3819 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3820 * then the nested-guest debug state should be actively loaded on the host so that
3821 * nested-guest reads its own debug registers without causing VM-exits.
3822 */
3823 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3824 && !CPUMIsGuestDebugStateActive(pVCpu))
3825 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3826 return VINF_SUCCESS;
3827 }
3828
3829#ifdef VBOX_STRICT
3830 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3831 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3832 {
3833 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3834 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3835 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3836 }
3837#endif
3838
3839 bool fSteppingDB = false;
3840 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3841 if (pVCpu->hm.s.fSingleInstruction)
3842 {
3843 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3844 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3845 {
3846 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3847 Assert(fSteppingDB == false);
3848 }
3849 else
3850 {
3851 pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF;
3852 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3853 pVCpu->hmr0.s.fClearTrapFlag = true;
3854 fSteppingDB = true;
3855 }
3856 }
3857
3858#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3859 bool fInterceptMovDRx = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3860#else
3861 bool fInterceptMovDRx = false;
3862#endif
3863 uint64_t u64GuestDr7;
3864 if ( fSteppingDB
3865 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3866 {
3867 /*
3868 * Use the combined guest and host DRx values found in the hypervisor register set
3869 * because the hypervisor debugger has breakpoints active or someone is single stepping
3870 * on the host side without a monitor trap flag.
3871 *
3872 * Note! DBGF expects a clean DR6 state before executing guest code.
3873 */
3874 if (!CPUMIsHyperDebugStateActive(pVCpu))
3875 {
3876 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3877 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3878 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3879 }
3880
3881 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3882 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3883 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3884 fInterceptMovDRx = true;
3885 }
3886 else
3887 {
3888 /*
3889 * If the guest has enabled debug registers, we need to load them prior to
3890 * executing guest code so they'll trigger at the right time.
3891 */
3892 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3893 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3894 {
3895 if (!CPUMIsGuestDebugStateActive(pVCpu))
3896 {
3897 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3898 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3899 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3900 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3901 }
3902#ifndef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3903 Assert(!fInterceptMovDRx);
3904#endif
3905 }
3906 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3907 {
3908 /*
3909 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3910 * must intercept #DB in order to maintain a correct DR6 guest value, and
3911 * because we need to intercept it to prevent nested #DBs from hanging the
3912 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3913 */
3914 fInterceptMovDRx = true;
3915 }
3916
3917 /* Update DR7 with the actual guest value. */
3918 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3919 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3920 }
3921
3922 if (fInterceptMovDRx)
3923 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3924 else
3925 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3926
3927 /*
3928 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3929 * monitor-trap flag and update our cache.
3930 */
3931 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3932 {
3933 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3934 AssertRC(rc);
3935 pVmcsInfo->u32ProcCtls = uProcCtls;
3936 }
3937
3938 /*
3939 * Update guest DR7.
3940 */
3941 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3942 AssertRC(rc);
3943
3944 /*
3945 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3946 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3947 *
3948 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3949 */
3950 if (fSteppingDB)
3951 {
3952 Assert(pVCpu->hm.s.fSingleInstruction);
3953 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3954
3955 uint32_t fIntrState = 0;
3956 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3957 AssertRC(rc);
3958
3959 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3960 {
3961 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3962 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3963 AssertRC(rc);
3964 }
3965 }
3966
3967 return VINF_SUCCESS;
3968}
3969
3970
3971/**
3972 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3973 * areas.
3974 *
3975 * These MSRs will automatically be loaded to the host CPU on every successful
3976 * VM-entry and stored from the host CPU on every successful VM-exit.
3977 *
3978 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3979 * actual host MSR values are not- updated here for performance reasons. See
3980 * hmR0VmxExportHostMsrs().
3981 *
3982 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3983 *
3984 * @returns VBox status code.
3985 * @param pVCpu The cross context virtual CPU structure.
3986 * @param pVmxTransient The VMX-transient structure.
3987 *
3988 * @remarks No-long-jump zone!!!
3989 */
3990static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3991{
3992 AssertPtr(pVCpu);
3993 AssertPtr(pVmxTransient);
3994
3995 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3996 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3997
3998 /*
3999 * MSRs that we use the auto-load/store MSR area in the VMCS.
4000 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
4001 * nothing to do here. The host MSR values are updated when it's safe in
4002 * hmR0VmxLazySaveHostMsrs().
4003 *
4004 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
4005 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
4006 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
4007 * for any MSR that are not part of the lazy MSRs so we do not need to place
4008 * those MSRs into the auto-load/store MSR area. Nothing to do here.
4009 */
4010 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
4011 {
4012 /* No auto-load/store MSRs currently. */
4013 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4014 }
4015
4016 /*
4017 * Guest Sysenter MSRs.
4018 */
4019 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
4020 {
4021 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
4022
4023 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
4024 {
4025 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
4026 AssertRC(rc);
4027 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4028 }
4029
4030 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
4031 {
4032 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
4033 AssertRC(rc);
4034 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4035 }
4036
4037 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
4038 {
4039 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
4040 AssertRC(rc);
4041 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4042 }
4043 }
4044
4045 /*
4046 * Guest/host EFER MSR.
4047 */
4048 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4049 {
4050 /* Whether we are using the VMCS to swap the EFER MSR must have been
4051 determined earlier while exporting VM-entry/VM-exit controls. */
4052 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4053 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4054
4055 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4056 {
4057 /*
4058 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4059 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4060 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4061 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4062 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4063 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4064 * during VM-entry.
4065 */
4066 uint64_t uGuestEferMsr = pCtx->msrEFER;
4067 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4068 {
4069 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4070 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4071 else
4072 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4073 }
4074
4075 /*
4076 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4077 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4078 */
4079 if (g_fHmVmxSupportsVmcsEfer)
4080 {
4081 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4082 AssertRC(rc);
4083 }
4084 else
4085 {
4086 /*
4087 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4088 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4089 */
4090 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4091 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4092 AssertRCReturn(rc, rc);
4093 }
4094
4095 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4096 }
4097 else if (!g_fHmVmxSupportsVmcsEfer)
4098 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4099
4100 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4101 }
4102
4103 /*
4104 * Other MSRs.
4105 */
4106 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4107 {
4108 /* Speculation Control (R/W). */
4109 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4110 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4111 {
4112 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4113 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4114 AssertRCReturn(rc, rc);
4115 }
4116
4117 /* Last Branch Record. */
4118 if (pVM->hmr0.s.vmx.fLbr)
4119 {
4120 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4121 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4122 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4123 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4124 Assert(cLbrStack <= 32);
4125 for (uint32_t i = 0; i < cLbrStack; i++)
4126 {
4127 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4128 pVmcsInfoShared->au64LbrFromIpMsr[i],
4129 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4130 AssertRCReturn(rc, rc);
4131
4132 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4133 if (idToIpMsrStart != 0)
4134 {
4135 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4136 pVmcsInfoShared->au64LbrToIpMsr[i],
4137 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4138 AssertRCReturn(rc, rc);
4139 }
4140 }
4141
4142 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4143 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4144 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4145 false /* fUpdateHostMsr */);
4146 AssertRCReturn(rc, rc);
4147 }
4148
4149 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4150 }
4151
4152 return VINF_SUCCESS;
4153}
4154
4155
4156/**
4157 * Wrapper for running the guest code in VT-x.
4158 *
4159 * @returns VBox status code, no informational status codes.
4160 * @param pVCpu The cross context virtual CPU structure.
4161 * @param pVmxTransient The VMX-transient structure.
4162 *
4163 * @remarks No-long-jump zone!!!
4164 */
4165DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4166{
4167 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4168 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4169
4170 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4171 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState == VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4172#ifdef VBOX_WITH_STATISTICS
4173 if (fResumeVM)
4174 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4175 else
4176 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4177#endif
4178 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4179 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4180 return rc;
4181}
4182
4183
4184/**
4185 * Reports world-switch error and dumps some useful debug info.
4186 *
4187 * @param pVCpu The cross context virtual CPU structure.
4188 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4189 * @param pVmxTransient The VMX-transient structure (only
4190 * exitReason updated).
4191 */
4192static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4193{
4194 Assert(pVCpu);
4195 Assert(pVmxTransient);
4196 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4197
4198 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4199 switch (rcVMRun)
4200 {
4201 case VERR_VMX_INVALID_VMXON_PTR:
4202 AssertFailed();
4203 break;
4204 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4205 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4206 {
4207 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4208 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4209 AssertRC(rc);
4210 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4211
4212 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4213 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4214 Cannot do it here as we may have been long preempted. */
4215
4216#ifdef VBOX_STRICT
4217 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4218 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4219 pVmxTransient->uExitReason));
4220 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4221 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4222 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4223 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4224 else
4225 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4226 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4227 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4228
4229 static struct
4230 {
4231 /** Name of the field to log. */
4232 const char *pszName;
4233 /** The VMCS field. */
4234 uint32_t uVmcsField;
4235 /** Whether host support of this field needs to be checked. */
4236 bool fCheckSupport;
4237 } const s_aVmcsFields[] =
4238 {
4239 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4240 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4241 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4242 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4243 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4244 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4245 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4246 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4247 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4248 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4249 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4250 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4251 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4252 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4253 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4254 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4255 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4256 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4257 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4258 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4259 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4260 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4261 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4262 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4263 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4264 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4265 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4266 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4267 /* The order of selector fields below are fixed! */
4268 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4269 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4270 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4271 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4272 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4273 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4274 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4275 /* End of ordered selector fields. */
4276 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4277 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4278 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4279 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4280 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4281 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4282 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4283 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4284 };
4285
4286 RTGDTR HostGdtr;
4287 ASMGetGDTR(&HostGdtr);
4288
4289 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4290 for (uint32_t i = 0; i < cVmcsFields; i++)
4291 {
4292 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4293
4294 bool fSupported;
4295 if (!s_aVmcsFields[i].fCheckSupport)
4296 fSupported = true;
4297 else
4298 {
4299 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4300 switch (uVmcsField)
4301 {
4302 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4303 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4304 case VMX_VMCS32_CTRL_PROC_EXEC2:
4305 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4306 break;
4307 default:
4308 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4309 }
4310 }
4311
4312 if (fSupported)
4313 {
4314 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4315 switch (uWidth)
4316 {
4317 case VMX_VMCSFIELD_WIDTH_16BIT:
4318 {
4319 uint16_t u16Val;
4320 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4321 AssertRC(rc);
4322 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4323
4324 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4325 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4326 {
4327 if (u16Val < HostGdtr.cbGdt)
4328 {
4329 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4330 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4331 "Host FS", "Host GS", "Host TR" };
4332 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4333 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4334 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4335 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4336 }
4337 else
4338 Log4((" Selector value exceeds GDT limit!\n"));
4339 }
4340 break;
4341 }
4342
4343 case VMX_VMCSFIELD_WIDTH_32BIT:
4344 {
4345 uint32_t u32Val;
4346 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4347 AssertRC(rc);
4348 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4349 break;
4350 }
4351
4352 case VMX_VMCSFIELD_WIDTH_64BIT:
4353 case VMX_VMCSFIELD_WIDTH_NATURAL:
4354 {
4355 uint64_t u64Val;
4356 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4357 AssertRC(rc);
4358 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4359 break;
4360 }
4361 }
4362 }
4363 }
4364
4365 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4366 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4367 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4368 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4369 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4370 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4371#endif /* VBOX_STRICT */
4372 break;
4373 }
4374
4375 default:
4376 /* Impossible */
4377 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4378 break;
4379 }
4380}
4381
4382
4383/**
4384 * Sets up the usage of TSC-offsetting and updates the VMCS.
4385 *
4386 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4387 * VMX-preemption timer.
4388 *
4389 * @param pVCpu The cross context virtual CPU structure.
4390 * @param pVmxTransient The VMX-transient structure.
4391 * @param idCurrentCpu The current CPU number.
4392 *
4393 * @remarks No-long-jump zone!!!
4394 */
4395static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4396{
4397 bool fOffsettedTsc;
4398 bool fParavirtTsc;
4399 uint64_t uTscOffset;
4400 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4401
4402 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4403 {
4404 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4405 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4406 uint64_t cTicksToDeadline;
4407 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4408 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4409 {
4410 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4411 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4412 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4413 if ((int64_t)cTicksToDeadline > 0)
4414 { /* hopefully */ }
4415 else
4416 {
4417 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4418 cTicksToDeadline = 0;
4419 }
4420 }
4421 else
4422 {
4423 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4424 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4425 &pVCpu->hmr0.s.vmx.uTscDeadline,
4426 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4427 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4428 if (cTicksToDeadline >= 128)
4429 { /* hopefully */ }
4430 else
4431 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4432 }
4433
4434 /* Make sure the returned values have sane upper and lower boundaries. */
4435 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4436 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4437 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4438 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4439
4440 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4441 * preemption timers here. We probably need to clamp the preemption timer,
4442 * after converting the timer value to the host. */
4443 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4444 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4445 AssertRC(rc);
4446 }
4447 else
4448 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4449
4450 if (fParavirtTsc)
4451 {
4452 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4453 information before every VM-entry, hence disable it for performance sake. */
4454#if 0
4455 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4456 AssertRC(rc);
4457#endif
4458 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4459 }
4460
4461 if ( fOffsettedTsc
4462 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4463 {
4464 if (pVmxTransient->fIsNestedGuest)
4465 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4466 hmR0VmxSetTscOffsetVmcs(pVmxTransient->pVmcsInfo, uTscOffset);
4467 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4468 }
4469 else
4470 {
4471 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4472 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4473 }
4474}
4475
4476
4477/**
4478 * Saves the guest state from the VMCS into the guest-CPU context.
4479 *
4480 * @returns VBox status code.
4481 * @param pVCpu The cross context virtual CPU structure.
4482 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4483 */
4484VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4485{
4486 AssertPtr(pVCpu);
4487 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4488 return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat);
4489}
4490
4491
4492/**
4493 * Gets VMX VM-exit auxiliary information.
4494 *
4495 * @returns VBox status code.
4496 * @param pVCpu The cross context virtual CPU structure.
4497 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4498 * @param fWhat What to fetch, HMVMX_READ_XXX.
4499 */
4500VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4501{
4502 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4503 if (RT_LIKELY(pVmxTransient))
4504 {
4505 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4506
4507 /* The exit reason is always available. */
4508 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4509
4510
4511 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4512 {
4513 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4514 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4515#ifdef VBOX_STRICT
4516 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4517#endif
4518 }
4519
4520 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4521 {
4522 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_INFO>(pVCpu, pVmxTransient);
4523 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4524#ifdef VBOX_STRICT
4525 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4526#endif
4527 }
4528
4529 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4530 {
4531 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_ERROR_CODE>(pVCpu, pVmxTransient);
4532 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4533#ifdef VBOX_STRICT
4534 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4535#endif
4536 }
4537
4538 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4539 {
4540 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_LEN>(pVCpu, pVmxTransient);
4541 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4542#ifdef VBOX_STRICT
4543 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4544#endif
4545 }
4546
4547 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4548 {
4549 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_INFO>(pVCpu, pVmxTransient);
4550 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4551#ifdef VBOX_STRICT
4552 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4553#endif
4554 }
4555
4556 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4557 {
4558 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE>(pVCpu, pVmxTransient);
4559 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4560#ifdef VBOX_STRICT
4561 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4562#endif
4563 }
4564
4565 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4566 {
4567 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_INFO>(pVCpu, pVmxTransient);
4568 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4569#ifdef VBOX_STRICT
4570 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4571#endif
4572 }
4573
4574 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4575 {
4576 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_LINEAR_ADDR>(pVCpu, pVmxTransient);
4577 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4578#ifdef VBOX_STRICT
4579 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4580#endif
4581 }
4582
4583 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4584 {
4585 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PHYSICAL_ADDR>(pVCpu, pVmxTransient);
4586 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4587#ifdef VBOX_STRICT
4588 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4589#endif
4590 }
4591
4592 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4593 {
4594#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4595 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PENDING_DBG_XCPTS>(pVCpu, pVmxTransient);
4596 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4597#else
4598 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4599#endif
4600#ifdef VBOX_STRICT
4601 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4602#endif
4603 }
4604
4605 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4606 return VINF_SUCCESS;
4607 }
4608 return VERR_NOT_AVAILABLE;
4609}
4610
4611
4612/**
4613 * Does the necessary state syncing before returning to ring-3 for any reason
4614 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4615 *
4616 * @returns VBox status code.
4617 * @param pVCpu The cross context virtual CPU structure.
4618 * @param fImportState Whether to import the guest state from the VMCS back
4619 * to the guest-CPU context.
4620 *
4621 * @remarks No-long-jmp zone!!!
4622 */
4623static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4624{
4625 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4626 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4627
4628 RTCPUID const idCpu = RTMpCpuId();
4629 Log4Func(("HostCpuId=%u\n", idCpu));
4630
4631 /*
4632 * !!! IMPORTANT !!!
4633 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4634 */
4635
4636 /* Save the guest state if necessary. */
4637 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4638 if (fImportState)
4639 {
4640 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4641 AssertRCReturn(rc, rc);
4642 }
4643
4644 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4645 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4646 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4647
4648 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4649#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
4650 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4651 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4652 || (!CPUMIsHyperDebugStateActive(pVCpu) && !pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx));
4653#else
4654 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4655 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4656 || !CPUMIsHyperDebugStateActive(pVCpu));
4657#endif
4658 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4659 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4660 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4661
4662 /* Restore host-state bits that VT-x only restores partially. */
4663 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4664 {
4665 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4666 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4667 }
4668 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4669
4670 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4671 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4672 {
4673 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4674 if (!fImportState)
4675 {
4676 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4677 AssertRCReturn(rc, rc);
4678 }
4679 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4680 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4681 }
4682 else
4683 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4684
4685 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4686 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4687
4688 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4689 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4690 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4691 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4692 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4693 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4694 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4695 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4696 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4697 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4698
4699 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4700
4701 /** @todo This partially defeats the purpose of having preemption hooks.
4702 * The problem is, deregistering the hooks should be moved to a place that
4703 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4704 * context.
4705 */
4706 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4707 AssertRCReturn(rc, rc);
4708
4709#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4710 /*
4711 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4712 * clear a shadow VMCS before allowing that VMCS to become active on another
4713 * logical processor. We may or may not be importing guest state which clears
4714 * it, so cover for it here.
4715 *
4716 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4717 */
4718 if ( pVmcsInfo->pvShadowVmcs
4719 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4720 {
4721 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4722 AssertRCReturn(rc, rc);
4723 }
4724
4725 /*
4726 * Flag that we need to re-export the host state if we switch to this VMCS before
4727 * executing guest or nested-guest code.
4728 */
4729 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4730#endif
4731
4732 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4733 NOREF(idCpu);
4734 return VINF_SUCCESS;
4735}
4736
4737
4738/**
4739 * Leaves the VT-x session.
4740 *
4741 * @returns VBox status code.
4742 * @param pVCpu The cross context virtual CPU structure.
4743 *
4744 * @remarks No-long-jmp zone!!!
4745 */
4746static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4747{
4748 HM_DISABLE_PREEMPT(pVCpu);
4749 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4750 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4751 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4752
4753 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4754 and done this from the VMXR0ThreadCtxCallback(). */
4755 if (!pVCpu->hmr0.s.fLeaveDone)
4756 {
4757 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4758 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4759 pVCpu->hmr0.s.fLeaveDone = true;
4760 }
4761 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4762
4763 /*
4764 * !!! IMPORTANT !!!
4765 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4766 */
4767
4768 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4769 /** @todo Deregistering here means we need to VMCLEAR always
4770 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4771 * for calling VMMR0ThreadCtxHookDisable here! */
4772 VMMR0ThreadCtxHookDisable(pVCpu);
4773
4774 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4775 int rc = HMR0LeaveCpu(pVCpu);
4776 HM_RESTORE_PREEMPT();
4777 return rc;
4778}
4779
4780
4781/**
4782 * Take necessary actions before going back to ring-3.
4783 *
4784 * An action requires us to go back to ring-3. This function does the necessary
4785 * steps before we can safely return to ring-3. This is not the same as longjmps
4786 * to ring-3, this is voluntary and prepares the guest so it may continue
4787 * executing outside HM (recompiler/IEM).
4788 *
4789 * @returns VBox status code.
4790 * @param pVCpu The cross context virtual CPU structure.
4791 * @param rcExit The reason for exiting to ring-3. Can be
4792 * VINF_VMM_UNKNOWN_RING3_CALL.
4793 */
4794static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
4795{
4796 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4797
4798 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4799 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
4800 {
4801 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
4802 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
4803 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4804 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
4805 }
4806
4807 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
4808 VMMRZCallRing3Disable(pVCpu);
4809 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
4810
4811 /*
4812 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
4813 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
4814 *
4815 * This is because execution may continue from ring-3 and we would need to inject
4816 * the event from there (hence place it back in TRPM).
4817 */
4818 if (pVCpu->hm.s.Event.fPending)
4819 {
4820 vmxHCPendingEventToTrpmTrap(pVCpu);
4821 Assert(!pVCpu->hm.s.Event.fPending);
4822
4823 /* Clear the events from the VMCS. */
4824 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
4825 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
4826 }
4827#ifdef VBOX_STRICT
4828 /*
4829 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
4830 * fatal), we don't care about verifying duplicate injection of events. Errors like
4831 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
4832 * function so those should and will be checked below.
4833 */
4834 else if (RT_SUCCESS(rcExit))
4835 {
4836 /*
4837 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
4838 * This can be pretty hard to debug otherwise, interrupts might get injected twice
4839 * occasionally, see @bugref{9180#c42}.
4840 *
4841 * However, if the VM-entry failed, any VM entry-interruption info. field would
4842 * be left unmodified as the event would not have been injected to the guest. In
4843 * such cases, don't assert, we're not going to continue guest execution anyway.
4844 */
4845 uint32_t uExitReason;
4846 uint32_t uEntryIntInfo;
4847 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
4848 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
4849 AssertRC(rc);
4850 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
4851 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
4852 }
4853#endif
4854
4855 /*
4856 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
4857 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
4858 * (e.g. TPR below threshold).
4859 */
4860 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4861 {
4862 Assert(!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
4863 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
4864 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
4865 }
4866
4867 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
4868 and if we're injecting an event we should have a TRPM trap pending. */
4869 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4870#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
4871 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4872#endif
4873
4874 /* Save guest state and restore host state bits. */
4875 int rc = hmR0VmxLeaveSession(pVCpu);
4876 AssertRCReturn(rc, rc);
4877 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4878
4879 /* Thread-context hooks are unregistered at this point!!! */
4880 /* Ring-3 callback notifications are unregistered at this point!!! */
4881
4882 /* Sync recompiler state. */
4883 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4884 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
4885 | CPUM_CHANGED_LDTR
4886 | CPUM_CHANGED_GDTR
4887 | CPUM_CHANGED_IDTR
4888 | CPUM_CHANGED_TR
4889 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4890 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
4891 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
4892 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
4893
4894 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
4895
4896 /* Update the exit-to-ring 3 reason. */
4897 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
4898
4899 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
4900 if ( rcExit != VINF_EM_RAW_INTERRUPT
4901 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4902 {
4903 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
4904 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
4905 }
4906
4907 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
4908 VMMRZCallRing3Enable(pVCpu);
4909 return rc;
4910}
4911
4912
4913/**
4914 * VMMRZCallRing3() callback wrapper which saves the guest state before we
4915 * longjump due to a ring-0 assertion.
4916 *
4917 * @returns VBox status code.
4918 * @param pVCpu The cross context virtual CPU structure.
4919 */
4920VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
4921{
4922 /*
4923 * !!! IMPORTANT !!!
4924 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
4925 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
4926 */
4927 VMMR0AssertionRemoveNotification(pVCpu);
4928 VMMRZCallRing3Disable(pVCpu);
4929 HM_DISABLE_PREEMPT(pVCpu);
4930
4931 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4932 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4933 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4934 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4935
4936 /* Restore host-state bits that VT-x only restores partially. */
4937 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4938 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4939 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4940
4941 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4942 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4943 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4944
4945 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4946 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4947 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4948
4949 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
4950 cleared as part of importing the guest state above. */
4951 hmR0VmxClearVmcs(pVmcsInfo);
4952
4953 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
4954 VMMR0ThreadCtxHookDisable(pVCpu);
4955
4956 /* Leave HM context. This takes care of local init (term). */
4957 HMR0LeaveCpu(pVCpu);
4958 HM_RESTORE_PREEMPT();
4959 return VINF_SUCCESS;
4960}
4961
4962
4963/**
4964 * Enters the VT-x session.
4965 *
4966 * @returns VBox status code.
4967 * @param pVCpu The cross context virtual CPU structure.
4968 */
4969VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
4970{
4971 AssertPtr(pVCpu);
4972 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
4973 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4974
4975 LogFlowFunc(("pVCpu=%p\n", pVCpu));
4976 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
4977 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
4978
4979#ifdef VBOX_STRICT
4980 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
4981 RTCCUINTREG uHostCr4 = ASMGetCR4();
4982 if (!(uHostCr4 & X86_CR4_VMXE))
4983 {
4984 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
4985 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4986 }
4987#endif
4988
4989 /*
4990 * Do the EMT scheduled L1D and MDS flush here if needed.
4991 */
4992 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
4993 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
4994 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
4995 hmR0MdsClear();
4996
4997 /*
4998 * Load the appropriate VMCS as the current and active one.
4999 */
5000 PVMXVMCSINFO pVmcsInfo;
5001 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5002 if (!fInNestedGuestMode)
5003 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5004 else
5005 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5006 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5007 if (RT_SUCCESS(rc))
5008 {
5009 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5010 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5011 pVCpu->hmr0.s.fLeaveDone = false;
5012 Log4Func(("Loaded %s Vmcs. HostCpuId=%u\n", fInNestedGuestMode ? "nested-guest" : "guest", RTMpCpuId()));
5013 }
5014 return rc;
5015}
5016
5017
5018/**
5019 * The thread-context callback.
5020 *
5021 * This is used together with RTThreadCtxHookCreate() on platforms which
5022 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5023 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5024 *
5025 * @param enmEvent The thread-context event.
5026 * @param pVCpu The cross context virtual CPU structure.
5027 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5028 * @thread EMT(pVCpu)
5029 */
5030VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5031{
5032 AssertPtr(pVCpu);
5033 RT_NOREF1(fGlobalInit);
5034
5035 switch (enmEvent)
5036 {
5037 case RTTHREADCTXEVENT_OUT:
5038 {
5039 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5040 VMCPU_ASSERT_EMT(pVCpu);
5041
5042 /* No longjmps (logger flushes, locks) in this fragile context. */
5043 VMMRZCallRing3Disable(pVCpu);
5044 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5045
5046 /* Restore host-state (FPU, debug etc.) */
5047 if (!pVCpu->hmr0.s.fLeaveDone)
5048 {
5049 /*
5050 * Do -not- import the guest-state here as we might already be in the middle of importing
5051 * it, esp. bad if we're holding the PGM lock, see comment at the end of vmxHCImportGuestStateEx().
5052 */
5053 hmR0VmxLeave(pVCpu, false /* fImportState */);
5054 pVCpu->hmr0.s.fLeaveDone = true;
5055 }
5056
5057 /* Leave HM context, takes care of local init (term). */
5058 int rc = HMR0LeaveCpu(pVCpu);
5059 AssertRC(rc);
5060
5061 /* Restore longjmp state. */
5062 VMMRZCallRing3Enable(pVCpu);
5063 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5064 break;
5065 }
5066
5067 case RTTHREADCTXEVENT_IN:
5068 {
5069 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5070 VMCPU_ASSERT_EMT(pVCpu);
5071
5072 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5073 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5074 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5075 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5076 hmR0MdsClear();
5077
5078 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5079 VMMRZCallRing3Disable(pVCpu);
5080 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5081
5082 /* Initialize the bare minimum state required for HM. This takes care of
5083 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5084 int rc = hmR0EnterCpu(pVCpu);
5085 AssertRC(rc);
5086 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5087 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5088
5089 /* Load the active VMCS as the current one. */
5090 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5091 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5092 AssertRC(rc);
5093 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5094 pVCpu->hmr0.s.fLeaveDone = false;
5095
5096 /* Restore longjmp state. */
5097 VMMRZCallRing3Enable(pVCpu);
5098 break;
5099 }
5100
5101 default:
5102 break;
5103 }
5104}
5105
5106
5107/**
5108 * Exports the host state into the VMCS host-state area.
5109 * Sets up the VM-exit MSR-load area.
5110 *
5111 * The CPU state will be loaded from these fields on every successful VM-exit.
5112 *
5113 * @returns VBox status code.
5114 * @param pVCpu The cross context virtual CPU structure.
5115 *
5116 * @remarks No-long-jump zone!!!
5117 */
5118static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5119{
5120 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5121
5122 int rc = VINF_SUCCESS;
5123 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5124 {
5125 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5126
5127 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5128 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5129
5130 hmR0VmxExportHostMsrs(pVCpu);
5131
5132 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5133 }
5134 return rc;
5135}
5136
5137
5138/**
5139 * Saves the host state in the VMCS host-state.
5140 *
5141 * @returns VBox status code.
5142 * @param pVCpu The cross context virtual CPU structure.
5143 *
5144 * @remarks No-long-jump zone!!!
5145 */
5146VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5147{
5148 AssertPtr(pVCpu);
5149 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5150
5151 /*
5152 * Export the host state here while entering HM context.
5153 * When thread-context hooks are used, we might get preempted and have to re-save the host
5154 * state but most of the time we won't be, so do it here before we disable interrupts.
5155 */
5156 return hmR0VmxExportHostState(pVCpu);
5157}
5158
5159
5160/**
5161 * Exports the guest state into the VMCS guest-state area.
5162 *
5163 * The will typically be done before VM-entry when the guest-CPU state and the
5164 * VMCS state may potentially be out of sync.
5165 *
5166 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5167 * VM-entry controls.
5168 * Sets up the appropriate VMX non-root function to execute guest code based on
5169 * the guest CPU mode.
5170 *
5171 * @returns VBox strict status code.
5172 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5173 * without unrestricted guest execution and the VMMDev is not presently
5174 * mapped (e.g. EFI32).
5175 *
5176 * @param pVCpu The cross context virtual CPU structure.
5177 * @param pVmxTransient The VMX-transient structure.
5178 *
5179 * @remarks No-long-jump zone!!!
5180 */
5181static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5182{
5183 AssertPtr(pVCpu);
5184 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5185 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5186
5187 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5188
5189 /*
5190 * Determine real-on-v86 mode.
5191 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5192 */
5193 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5194 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5195 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5196 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5197 else
5198 {
5199 Assert(!pVmxTransient->fIsNestedGuest);
5200 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5201 }
5202
5203 /*
5204 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5205 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5206 */
5207 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5208 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5209
5210 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5211 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5212
5213 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5214 if (rcStrict == VINF_SUCCESS)
5215 { /* likely */ }
5216 else
5217 {
5218 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5219 return rcStrict;
5220 }
5221
5222 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5223 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5224
5225 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5226 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5227
5228 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5229 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5230 vmxHCExportGuestRip(pVCpu);
5231 hmR0VmxExportGuestRsp(pVCpu);
5232 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5233
5234 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5235 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5236
5237 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5238 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5239 | HM_CHANGED_GUEST_CR2
5240 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5241 | HM_CHANGED_GUEST_X87
5242 | HM_CHANGED_GUEST_SSE_AVX
5243 | HM_CHANGED_GUEST_OTHER_XSAVE
5244 | HM_CHANGED_GUEST_XCRx
5245 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5246 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5247 | HM_CHANGED_GUEST_TSC_AUX
5248 | HM_CHANGED_GUEST_OTHER_MSRS
5249 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5250
5251 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5252 return rc;
5253}
5254
5255
5256/**
5257 * Exports the state shared between the host and guest into the VMCS.
5258 *
5259 * @param pVCpu The cross context virtual CPU structure.
5260 * @param pVmxTransient The VMX-transient structure.
5261 *
5262 * @remarks No-long-jump zone!!!
5263 */
5264static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5265{
5266 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5267 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5268
5269 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5270 {
5271 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5272 AssertRC(rc);
5273 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5274
5275 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5276 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5277 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5278 }
5279
5280 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5281 {
5282 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5283 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5284 }
5285
5286 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5287 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5288}
5289
5290
5291/**
5292 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5293 *
5294 * @returns Strict VBox status code (i.e. informational status codes too).
5295 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5296 * without unrestricted guest execution and the VMMDev is not presently
5297 * mapped (e.g. EFI32).
5298 *
5299 * @param pVCpu The cross context virtual CPU structure.
5300 * @param pVmxTransient The VMX-transient structure.
5301 *
5302 * @remarks No-long-jump zone!!!
5303 */
5304static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5305{
5306 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5307 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5308
5309#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5310 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5311#endif
5312
5313 /*
5314 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5315 * changes. First try to export only these without going through all other changed-flag checks.
5316 */
5317 VBOXSTRICTRC rcStrict;
5318 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5319 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5320 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5321
5322 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5323 if ( (fCtxChanged & fMinimalMask)
5324 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5325 {
5326 vmxHCExportGuestRip(pVCpu);
5327 hmR0VmxExportGuestRsp(pVCpu);
5328 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5329 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5330 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5331 }
5332 /* If anything else also changed, go through the full export routine and export as required. */
5333 else if (fCtxChanged & fCtxMask)
5334 {
5335 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5336 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5337 { /* likely */}
5338 else
5339 {
5340 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5341 VBOXSTRICTRC_VAL(rcStrict)));
5342 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5343 return rcStrict;
5344 }
5345 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5346 }
5347 /* Nothing changed, nothing to load here. */
5348 else
5349 rcStrict = VINF_SUCCESS;
5350
5351#ifdef VBOX_STRICT
5352 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5353 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5354 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5355#endif
5356 return rcStrict;
5357}
5358
5359
5360/**
5361 * Map the APIC-access page for virtualizing APIC accesses.
5362 *
5363 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5364 * this not done as part of exporting guest state, see @bugref{8721}.
5365 *
5366 * @returns VBox status code.
5367 * @param pVCpu The cross context virtual CPU structure.
5368 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5369 */
5370static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5371{
5372 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5373 Assert(GCPhysApicBase);
5374
5375 LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5376
5377 /* Unalias the existing mapping. */
5378 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5379 AssertRCReturn(rc, rc);
5380
5381 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5382 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5383 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5384 AssertRCReturn(rc, rc);
5385
5386 return VINF_SUCCESS;
5387}
5388
5389
5390/**
5391 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5392 * CPU.
5393 *
5394 * @param idCpu The ID for the CPU the function is called on.
5395 * @param pvUser1 Null, not used.
5396 * @param pvUser2 Null, not used.
5397 */
5398static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5399{
5400 RT_NOREF3(idCpu, pvUser1, pvUser2);
5401 VMXDispatchHostNmi();
5402}
5403
5404
5405/**
5406 * Dispatching an NMI on the host CPU that received it.
5407 *
5408 * @returns VBox status code.
5409 * @param pVCpu The cross context virtual CPU structure.
5410 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5411 * executing when receiving the host NMI in VMX non-root
5412 * operation.
5413 */
5414static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5415{
5416 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5417 Assert(idCpu != NIL_RTCPUID);
5418
5419 /*
5420 * We don't want to delay dispatching the NMI any more than we have to. However,
5421 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5422 * after executing guest or nested-guest code for the following reasons:
5423 *
5424 * - We would need to perform VMREADs with interrupts disabled and is orders of
5425 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5426 * supported by the host hypervisor.
5427 *
5428 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5429 * longer period of time just for handling an edge case like host NMIs which do
5430 * not occur nearly as frequently as other VM-exits.
5431 *
5432 * Let's cover the most likely scenario first. Check if we are on the target CPU
5433 * and dispatch the NMI right away. This should be much faster than calling into
5434 * RTMpOnSpecific() machinery.
5435 */
5436 bool fDispatched = false;
5437 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5438 if (idCpu == RTMpCpuId())
5439 {
5440 VMXDispatchHostNmi();
5441 fDispatched = true;
5442 }
5443 ASMSetFlags(fEFlags);
5444 if (fDispatched)
5445 {
5446 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5447 return VINF_SUCCESS;
5448 }
5449
5450 /*
5451 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5452 * there should be no race or recursion even if we are unlucky enough to be preempted
5453 * (to the target CPU) without dispatching the host NMI above.
5454 */
5455 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5456 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5457}
5458
5459
5460#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5461/**
5462 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5463 * nested-guest using hardware-assisted VMX.
5464 *
5465 * @param pVCpu The cross context virtual CPU structure.
5466 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5467 * @param pVmcsInfoGst The guest VMCS info. object.
5468 */
5469static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5470{
5471 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5472 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5473 Assert(pu64MsrBitmap);
5474
5475 /*
5476 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5477 * MSR that is intercepted by the guest is also intercepted while executing the
5478 * nested-guest using hardware-assisted VMX.
5479 *
5480 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5481 * nested-guest VM-exit even if the outer guest is not intercepting some
5482 * MSRs. We cannot assume the caller has initialized the nested-guest
5483 * MSR bitmap in this case.
5484 *
5485 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5486 * each of its VM-entry, hence initializing it once per-VM while setting
5487 * up the nested-guest VMCS is not sufficient.
5488 */
5489 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5490 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5491 {
5492 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5493 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5494 Assert(pu64MsrBitmapNstGst);
5495 Assert(pu64MsrBitmapGst);
5496
5497 /** @todo Detect and use EVEX.POR? */
5498 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5499 for (uint32_t i = 0; i < cFrags; i++)
5500 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5501 }
5502 else
5503 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5504}
5505
5506
5507/**
5508 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5509 * hardware-assisted VMX execution of the nested-guest.
5510 *
5511 * For a guest, we don't modify these controls once we set up the VMCS and hence
5512 * this function is never called.
5513 *
5514 * For nested-guests since the nested hypervisor provides these controls on every
5515 * nested-guest VM-entry and could potentially change them everytime we need to
5516 * merge them before every nested-guest VM-entry.
5517 *
5518 * @returns VBox status code.
5519 * @param pVCpu The cross context virtual CPU structure.
5520 */
5521static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5522{
5523 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5524 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5525 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5526
5527 /*
5528 * Merge the controls with the requirements of the guest VMCS.
5529 *
5530 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5531 * VMCS with the features supported by the physical CPU as it's already done by the
5532 * VMLAUNCH/VMRESUME instruction emulation.
5533 *
5534 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5535 * derived from the VMX features supported by the physical CPU.
5536 */
5537
5538 /* Pin-based VM-execution controls. */
5539 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5540
5541 /* Processor-based VM-execution controls. */
5542 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5543 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5544 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5545 | VMX_PROC_CTLS_MOV_DR_EXIT /* hmR0VmxExportSharedDebugState makes
5546 sure guest DRx regs are loaded. */
5547 | VMX_PROC_CTLS_USE_TPR_SHADOW
5548 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5549
5550 /* Secondary processor-based VM-execution controls. */
5551 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5552 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5553 | VMX_PROC_CTLS2_INVPCID
5554 | VMX_PROC_CTLS2_VMCS_SHADOWING
5555 | VMX_PROC_CTLS2_RDTSCP
5556 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5557 | VMX_PROC_CTLS2_APIC_REG_VIRT
5558 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5559 | VMX_PROC_CTLS2_VMFUNC));
5560
5561 /*
5562 * VM-entry controls:
5563 * These controls contains state that depends on the nested-guest state (primarily
5564 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5565 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5566 * properly continue executing the nested-guest if the EFER MSR changes but does not
5567 * cause a nested-guest VM-exits.
5568 *
5569 * VM-exit controls:
5570 * These controls specify the host state on return. We cannot use the controls from
5571 * the nested hypervisor state as is as it would contain the guest state rather than
5572 * the host state. Since the host state is subject to change (e.g. preemption, trips
5573 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5574 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5575 *
5576 * VM-entry MSR-load:
5577 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5578 * context by the VMLAUNCH/VMRESUME instruction emulation.
5579 *
5580 * VM-exit MSR-store:
5581 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5582 * back into the VM-exit MSR-store area.
5583 *
5584 * VM-exit MSR-load areas:
5585 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5586 * can entirely ignore what the nested hypervisor wants to load here.
5587 */
5588
5589 /*
5590 * Exception bitmap.
5591 *
5592 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5593 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5594 * code more flexible if intercepting exceptions become more dynamic in the future we do
5595 * it as part of exporting the nested-guest state.
5596 */
5597 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5598
5599 /*
5600 * CR0/CR4 guest/host mask.
5601 *
5602 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5603 * cause VM-exits, so we need to merge them here.
5604 */
5605 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5606 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5607
5608 /*
5609 * Page-fault error-code mask and match.
5610 *
5611 * Although we require unrestricted guest execution (and thereby nested-paging) for
5612 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5613 * normally intercept #PFs, it might intercept them for debugging purposes.
5614 *
5615 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5616 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5617 */
5618 uint32_t u32XcptPFMask;
5619 uint32_t u32XcptPFMatch;
5620 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5621 {
5622 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5623 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5624 }
5625 else
5626 {
5627 u32XcptPFMask = 0;
5628 u32XcptPFMatch = 0;
5629 }
5630
5631 /*
5632 * Pause-Loop exiting.
5633 */
5634 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5635 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5636 * this will work... */
5637 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5638 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5639
5640 /*
5641 * Pending debug exceptions.
5642 * Currently just copy whatever the nested-guest provides us.
5643 */
5644 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5645
5646 /*
5647 * I/O Bitmap.
5648 *
5649 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5650 * intercept all I/O port accesses.
5651 */
5652 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5653 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5654
5655 /*
5656 * VMCS shadowing.
5657 *
5658 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5659 * enabled while executing the nested-guest.
5660 */
5661 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5662
5663 /*
5664 * APIC-access page.
5665 */
5666 RTHCPHYS HCPhysApicAccess;
5667 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5668 {
5669 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5670 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5671
5672 void *pvPage;
5673 PGMPAGEMAPLOCK PgLockApicAccess;
5674 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5675 if (RT_SUCCESS(rc))
5676 {
5677 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5678 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5679
5680 /** @todo Handle proper releasing of page-mapping lock later. */
5681 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5682 }
5683 else
5684 return rc;
5685 }
5686 else
5687 HCPhysApicAccess = 0;
5688
5689 /*
5690 * Virtual-APIC page and TPR threshold.
5691 */
5692 RTHCPHYS HCPhysVirtApic;
5693 uint32_t u32TprThreshold;
5694 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5695 {
5696 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
5697 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
5698
5699 void *pvPage;
5700 PGMPAGEMAPLOCK PgLockVirtApic;
5701 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
5702 if (RT_SUCCESS(rc))
5703 {
5704 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
5705 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
5706
5707 /** @todo Handle proper releasing of page-mapping lock later. */
5708 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
5709 }
5710 else
5711 return rc;
5712
5713 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
5714 }
5715 else
5716 {
5717 HCPhysVirtApic = 0;
5718 u32TprThreshold = 0;
5719
5720 /*
5721 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
5722 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
5723 * be taken care of by EPT/shadow paging.
5724 */
5725 if (pVM->hmr0.s.fAllow64BitGuests)
5726 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
5727 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
5728 }
5729
5730 /*
5731 * Validate basic assumptions.
5732 */
5733 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5734 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
5735 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
5736 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
5737
5738 /*
5739 * Commit it to the nested-guest VMCS.
5740 */
5741 int rc = VINF_SUCCESS;
5742 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
5743 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
5744 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
5745 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
5746 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
5747 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
5748 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
5749 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
5750 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
5751 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
5752 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
5753 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
5754 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
5755 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
5756 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
5757 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
5758 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
5759 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
5760 {
5761 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
5762 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
5763 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
5764 }
5765 if (pVmcsInfoNstGst->HCPhysVirtApic != HCPhysVirtApic)
5766 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
5767 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5768 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5769 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
5770 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
5771 AssertRC(rc);
5772
5773 /*
5774 * Update the nested-guest VMCS cache.
5775 */
5776 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
5777 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
5778 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
5779 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
5780 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
5781 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
5782 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
5783 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
5784 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
5785
5786 /*
5787 * We need to flush the TLB if we are switching the APIC-access page address.
5788 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
5789 */
5790 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5791 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
5792
5793 /*
5794 * MSR bitmap.
5795 *
5796 * The MSR bitmap address has already been initialized while setting up the nested-guest
5797 * VMCS, here we need to merge the MSR bitmaps.
5798 */
5799 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5800 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
5801
5802 return VINF_SUCCESS;
5803}
5804#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
5805
5806
5807/**
5808 * Does the preparations before executing guest code in VT-x.
5809 *
5810 * This may cause longjmps to ring-3 and may even result in rescheduling to the
5811 * recompiler/IEM. We must be cautious what we do here regarding committing
5812 * guest-state information into the VMCS assuming we assuredly execute the
5813 * guest in VT-x mode.
5814 *
5815 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
5816 * the common-state (TRPM/forceflags), we must undo those changes so that the
5817 * recompiler/IEM can (and should) use them when it resumes guest execution.
5818 * Otherwise such operations must be done when we can no longer exit to ring-3.
5819 *
5820 * @returns Strict VBox status code (i.e. informational status codes too).
5821 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
5822 * have been disabled.
5823 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
5824 * pending events).
5825 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
5826 * double-fault into the guest.
5827 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
5828 * dispatched directly.
5829 * @retval VINF_* scheduling changes, we have to go back to ring-3.
5830 *
5831 * @param pVCpu The cross context virtual CPU structure.
5832 * @param pVmxTransient The VMX-transient structure.
5833 * @param fStepping Whether we are single-stepping the guest in the
5834 * hypervisor debugger. Makes us ignore some of the reasons
5835 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
5836 * if event dispatching took place.
5837 */
5838static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
5839{
5840 Assert(VMMRZCallRing3IsEnabled(pVCpu));
5841
5842 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
5843
5844#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
5845 if (pVmxTransient->fIsNestedGuest)
5846 {
5847 RT_NOREF2(pVCpu, fStepping);
5848 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
5849 return VINF_EM_RESCHEDULE_REM;
5850 }
5851#endif
5852
5853 /*
5854 * Check and process force flag actions, some of which might require us to go back to ring-3.
5855 */
5856 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
5857 if (rcStrict == VINF_SUCCESS)
5858 {
5859 /* FFs don't get set all the time. */
5860#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5861 if ( pVmxTransient->fIsNestedGuest
5862 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5863 {
5864 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5865 return VINF_VMX_VMEXIT;
5866 }
5867#endif
5868 }
5869 else
5870 return rcStrict;
5871
5872 /*
5873 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
5874 */
5875 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5876 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
5877 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5878 && PDMHasApic(pVM))
5879 {
5880 /* Get the APIC base MSR from the virtual APIC device. */
5881 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
5882
5883 /* Map the APIC access page. */
5884 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
5885 AssertRCReturn(rc, rc);
5886
5887 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
5888 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
5889 }
5890
5891#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5892 /*
5893 * Merge guest VMCS controls with the nested-guest VMCS controls.
5894 *
5895 * Even if we have not executed the guest prior to this (e.g. when resuming from a
5896 * saved state), we should be okay with merging controls as we initialize the
5897 * guest VMCS controls as part of VM setup phase.
5898 */
5899 if ( pVmxTransient->fIsNestedGuest
5900 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
5901 {
5902 int rc = hmR0VmxMergeVmcsNested(pVCpu);
5903 AssertRCReturn(rc, rc);
5904 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
5905 }
5906#endif
5907
5908 /*
5909 * Evaluate events to be injected into the guest.
5910 *
5911 * Events in TRPM can be injected without inspecting the guest state.
5912 * If any new events (interrupts/NMI) are pending currently, we try to set up the
5913 * guest to cause a VM-exit the next time they are ready to receive the event.
5914 */
5915 if (TRPMHasTrap(pVCpu))
5916 vmxHCTrpmTrapToPendingEvent(pVCpu);
5917
5918 uint32_t fIntrState;
5919#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5920 if (!pVmxTransient->fIsNestedGuest)
5921 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5922 else
5923 rcStrict = vmxHCEvaluatePendingEventNested(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5924
5925 /*
5926 * While evaluating pending events if something failed (unlikely) or if we were
5927 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
5928 */
5929 if (rcStrict != VINF_SUCCESS)
5930 return rcStrict;
5931 if ( pVmxTransient->fIsNestedGuest
5932 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5933 {
5934 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5935 return VINF_VMX_VMEXIT;
5936 }
5937#else
5938 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5939 Assert(rcStrict == VINF_SUCCESS);
5940#endif
5941
5942 /*
5943 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
5944 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
5945 * also result in triple-faulting the VM.
5946 *
5947 * With nested-guests, the above does not apply since unrestricted guest execution is a
5948 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
5949 */
5950 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest, fIntrState, fStepping);
5951 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5952 { /* likely */ }
5953 else
5954 {
5955 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
5956 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
5957 return rcStrict;
5958 }
5959
5960 /*
5961 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
5962 * import CR3 themselves. We will need to update them here, as even as late as the above
5963 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
5964 * the below force flags to be set.
5965 */
5966 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5967 {
5968 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
5969 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
5970 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
5971 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
5972 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5973 }
5974
5975#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5976 /* Paranoia. */
5977 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
5978#endif
5979
5980 /*
5981 * No longjmps to ring-3 from this point on!!!
5982 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
5983 * This also disables flushing of the R0-logger instance (if any).
5984 */
5985 VMMRZCallRing3Disable(pVCpu);
5986
5987 /*
5988 * Export the guest state bits.
5989 *
5990 * We cannot perform longjmps while loading the guest state because we do not preserve the
5991 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
5992 * CPU migration.
5993 *
5994 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
5995 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
5996 */
5997 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
5998 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5999 { /* likely */ }
6000 else
6001 {
6002 VMMRZCallRing3Enable(pVCpu);
6003 return rcStrict;
6004 }
6005
6006 /*
6007 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6008 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6009 * preemption disabled for a while. Since this is purely to aid the
6010 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6011 * disable interrupt on NT.
6012 *
6013 * We need to check for force-flags that could've possible been altered since we last
6014 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6015 * see @bugref{6398}).
6016 *
6017 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6018 * to ring-3 before executing guest code.
6019 */
6020 pVmxTransient->fEFlags = ASMIntDisableFlags();
6021
6022 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6023 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6024 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6025 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6026 {
6027 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6028 {
6029#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6030 /*
6031 * If we are executing a nested-guest make sure that we should intercept subsequent
6032 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6033 * the VM-exit instruction emulation happy.
6034 */
6035 if (pVmxTransient->fIsNestedGuest)
6036 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6037#endif
6038
6039 /*
6040 * We've injected any pending events. This is really the point of no return (to ring-3).
6041 *
6042 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6043 * returns from this function, so do -not- enable them here.
6044 */
6045 pVCpu->hm.s.Event.fPending = false;
6046 return VINF_SUCCESS;
6047 }
6048
6049 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6050 rcStrict = VINF_EM_RAW_INTERRUPT;
6051 }
6052 else
6053 {
6054 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6055 rcStrict = VINF_EM_RAW_TO_R3;
6056 }
6057
6058 ASMSetFlags(pVmxTransient->fEFlags);
6059 VMMRZCallRing3Enable(pVCpu);
6060
6061 return rcStrict;
6062}
6063
6064
6065/**
6066 * Final preparations before executing guest code using hardware-assisted VMX.
6067 *
6068 * We can no longer get preempted to a different host CPU and there are no returns
6069 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6070 * failures), this function is not intended to fail sans unrecoverable hardware
6071 * errors.
6072 *
6073 * @param pVCpu The cross context virtual CPU structure.
6074 * @param pVmxTransient The VMX-transient structure.
6075 *
6076 * @remarks Called with preemption disabled.
6077 * @remarks No-long-jump zone!!!
6078 */
6079static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6080{
6081 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6082 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6083 Assert(!pVCpu->hm.s.Event.fPending);
6084
6085 /*
6086 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6087 */
6088 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6089 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6090
6091 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6092 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6093 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6094 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6095
6096 if (!CPUMIsGuestFPUStateActive(pVCpu))
6097 {
6098 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6099 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6100 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6101 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6102 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6103 }
6104
6105 /*
6106 * Re-export the host state bits as we may've been preempted (only happens when
6107 * thread-context hooks are used or when the VM start function changes) or if
6108 * the host CR0 is modified while loading the guest FPU state above.
6109 *
6110 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6111 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6112 * see @bugref{8432}.
6113 *
6114 * This may also happen when switching to/from a nested-guest VMCS without leaving
6115 * ring-0.
6116 */
6117 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6118 {
6119 hmR0VmxExportHostState(pVCpu);
6120 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6121 }
6122 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6123
6124 /*
6125 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6126 */
6127 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6128 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6129 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6130
6131 /*
6132 * Store status of the shared guest/host debug state at the time of VM-entry.
6133 */
6134 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6135 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6136
6137 /*
6138 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6139 * more than one conditional check. The post-run side of our code shall determine
6140 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6141 */
6142 if (pVmcsInfo->pbVirtApic)
6143 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6144
6145 /*
6146 * Update the host MSRs values in the VM-exit MSR-load area.
6147 */
6148 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6149 {
6150 if (pVmcsInfo->cExitMsrLoad > 0)
6151 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6152 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6153 }
6154
6155 /*
6156 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6157 * VMX-preemption timer based on the next virtual sync clock deadline.
6158 */
6159 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6160 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6161 {
6162 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6163 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6164 }
6165
6166 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6167 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6168 if (!fIsRdtscIntercepted)
6169 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6170 else
6171 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6172
6173 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6174 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6175 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6176 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6177 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6178 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6179
6180 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6181
6182 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6183 as we're about to start executing the guest. */
6184
6185 /*
6186 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6187 *
6188 * This is done this late as updating the TSC offsetting/preemption timer above
6189 * figures out if we can skip intercepting RDTSCP by calculating the number of
6190 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6191 */
6192 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6193 && !fIsRdtscIntercepted)
6194 {
6195 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6196
6197 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6198 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6199 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6200 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6201 AssertRC(rc);
6202 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6203 pVmxTransient->fRemoveTscAuxMsr = true;
6204 }
6205
6206#ifdef VBOX_STRICT
6207 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6208 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6209 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6210 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6211#endif
6212
6213#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6214 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6215 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6216 * see @bugref{9180#c54}. */
6217 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6218 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6219 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6220#endif
6221}
6222
6223
6224/**
6225 * First C routine invoked after running guest code using hardware-assisted VMX.
6226 *
6227 * @param pVCpu The cross context virtual CPU structure.
6228 * @param pVmxTransient The VMX-transient structure.
6229 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6230 *
6231 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6232 *
6233 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6234 * unconditionally when it is safe to do so.
6235 */
6236static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6237{
6238 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6239 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6240 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6241 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6242 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6243 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6244
6245 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6246 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6247 {
6248 uint64_t uGstTsc;
6249 if (!pVmxTransient->fIsNestedGuest)
6250 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6251 else
6252 {
6253 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6254 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6255 }
6256 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6257 }
6258
6259 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6260 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6261 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6262
6263 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6264 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6265#ifdef VBOX_STRICT
6266 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6267#endif
6268 Assert(!ASMIntAreEnabled());
6269 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6270 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6271
6272#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6273 /*
6274 * Clean all the VMCS fields in the transient structure before reading
6275 * anything from the VMCS.
6276 */
6277 pVmxTransient->uExitReason = 0;
6278 pVmxTransient->uExitIntErrorCode = 0;
6279 pVmxTransient->uExitQual = 0;
6280 pVmxTransient->uGuestLinearAddr = 0;
6281 pVmxTransient->uExitIntInfo = 0;
6282 pVmxTransient->cbExitInstr = 0;
6283 pVmxTransient->ExitInstrInfo.u = 0;
6284 pVmxTransient->uEntryIntInfo = 0;
6285 pVmxTransient->uEntryXcptErrorCode = 0;
6286 pVmxTransient->cbEntryInstr = 0;
6287 pVmxTransient->uIdtVectoringInfo = 0;
6288 pVmxTransient->uIdtVectoringErrorCode = 0;
6289#endif
6290
6291 /*
6292 * Save the basic VM-exit reason and check if the VM-entry failed.
6293 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6294 */
6295 uint32_t uExitReason;
6296 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6297 AssertRC(rc);
6298 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6299 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6300
6301 /*
6302 * Log the VM-exit before logging anything else as otherwise it might be a
6303 * tad confusing what happens before and after the world-switch.
6304 */
6305 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6306
6307 /*
6308 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6309 * bitmap permissions, if it was added before VM-entry.
6310 */
6311 if (pVmxTransient->fRemoveTscAuxMsr)
6312 {
6313 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6314 pVmxTransient->fRemoveTscAuxMsr = false;
6315 }
6316
6317 /*
6318 * Check if VMLAUNCH/VMRESUME succeeded.
6319 * If this failed, we cause a guru meditation and cease further execution.
6320 */
6321 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6322 {
6323 /*
6324 * Update the VM-exit history array here even if the VM-entry failed due to:
6325 * - Invalid guest state.
6326 * - MSR loading.
6327 * - Machine-check event.
6328 *
6329 * In any of the above cases we will still have a "valid" VM-exit reason
6330 * despite @a fVMEntryFailed being false.
6331 *
6332 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6333 *
6334 * Note! We don't have CS or RIP at this point. Will probably address that later
6335 * by amending the history entry added here.
6336 */
6337 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6338 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6339
6340 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6341 {
6342 VMMRZCallRing3Enable(pVCpu);
6343 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6344
6345#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6346 vmxHCReadAllRoFieldsVmcs(pVCpu, pVmxTransient);
6347#endif
6348
6349 /*
6350 * Always import the guest-interruptibility state as we need it while evaluating
6351 * injecting events on re-entry. We could in *theory* postpone reading it for
6352 * exits that does not involve instruction emulation, but since most exits are
6353 * for instruction emulation (exceptions being external interrupts, shadow
6354 * paging building page faults and EPT violations, and interrupt window stuff)
6355 * this is a reasonable simplification.
6356 *
6357 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6358 * checking for real-mode while exporting the state because all bits that cause
6359 * mode changes wrt CR0 are intercepted.
6360 *
6361 * Note! This mask _must_ match the default value for the default a_fDonePostExit
6362 * value for the vmxHCImportGuestState template!
6363 */
6364 /** @todo r=bird: consider dropping the INHIBIT_XXX and fetch the state
6365 * explicitly in the exit handlers and injection function. That way we have
6366 * fewer clusters of vmread spread around the code, because the EM history
6367 * executor won't execute very many non-exiting instructions before stopping. */
6368 rc = vmxHCImportGuestState< CPUMCTX_EXTRN_INHIBIT_INT
6369 | CPUMCTX_EXTRN_INHIBIT_NMI
6370#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6371 | HMVMX_CPUMCTX_EXTRN_ALL
6372#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6373 | CPUMCTX_EXTRN_RFLAGS
6374#endif
6375 , 0 /*a_fDoneLocal*/, 0 /*a_fDonePostExit*/>(pVCpu, pVmcsInfo, __FUNCTION__);
6376 AssertRC(rc);
6377
6378 /*
6379 * Sync the TPR shadow with our APIC state.
6380 */
6381 if ( !pVmxTransient->fIsNestedGuest
6382 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6383 {
6384 Assert(pVmcsInfo->pbVirtApic);
6385 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6386 {
6387 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6388 AssertRC(rc);
6389 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6390 }
6391 }
6392
6393 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6394 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6395 || pVmxTransient->fWasHyperDebugStateActive == false);
6396 return;
6397 }
6398 }
6399#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6400 else if (pVmxTransient->fIsNestedGuest)
6401 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6402#endif
6403 else
6404 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6405
6406 VMMRZCallRing3Enable(pVCpu);
6407}
6408
6409
6410/**
6411 * Runs the guest code using hardware-assisted VMX the normal way.
6412 *
6413 * @returns VBox status code.
6414 * @param pVCpu The cross context virtual CPU structure.
6415 * @param pcLoops Pointer to the number of executed loops.
6416 */
6417static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6418{
6419 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6420 Assert(pcLoops);
6421 Assert(*pcLoops <= cMaxResumeLoops);
6422 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6423
6424#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6425 /*
6426 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6427 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6428 * guest VMCS while entering the VMX ring-0 session.
6429 */
6430 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6431 {
6432 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6433 if (RT_SUCCESS(rc))
6434 { /* likely */ }
6435 else
6436 {
6437 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6438 return rc;
6439 }
6440 }
6441#endif
6442
6443 VMXTRANSIENT VmxTransient;
6444 RT_ZERO(VmxTransient);
6445 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6446 Assert(!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
6447
6448 /* Paranoia. */
6449 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6450
6451 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6452 for (;;)
6453 {
6454 Assert(!HMR0SuspendPending());
6455 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6456 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6457
6458 /*
6459 * Preparatory work for running nested-guest code, this may force us to
6460 * return to ring-3.
6461 *
6462 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6463 */
6464 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6465 if (rcStrict != VINF_SUCCESS)
6466 break;
6467
6468 /* Interrupts are disabled at this point! */
6469 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6470 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6471 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6472 /* Interrupts are re-enabled at this point! */
6473
6474 /*
6475 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6476 */
6477 if (RT_SUCCESS(rcRun))
6478 { /* very likely */ }
6479 else
6480 {
6481 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6482 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6483 return rcRun;
6484 }
6485
6486 /*
6487 * Profile the VM-exit.
6488 */
6489 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6490 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6491 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6492 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6493 HMVMX_START_EXIT_DISPATCH_PROF();
6494
6495 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6496
6497 /*
6498 * Handle the VM-exit.
6499 */
6500#ifdef HMVMX_USE_FUNCTION_TABLE
6501 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6502#else
6503 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6504#endif
6505 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6506 if (rcStrict == VINF_SUCCESS)
6507 {
6508 if (++(*pcLoops) <= cMaxResumeLoops)
6509 continue;
6510 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6511 rcStrict = VINF_EM_RAW_INTERRUPT;
6512 }
6513 break;
6514 }
6515
6516 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6517 return rcStrict;
6518}
6519
6520
6521#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6522/**
6523 * Runs the nested-guest code using hardware-assisted VMX.
6524 *
6525 * @returns VBox status code.
6526 * @param pVCpu The cross context virtual CPU structure.
6527 * @param pcLoops Pointer to the number of executed loops.
6528 *
6529 * @sa hmR0VmxRunGuestCodeNormal.
6530 */
6531static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6532{
6533 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6534 Assert(pcLoops);
6535 Assert(*pcLoops <= cMaxResumeLoops);
6536 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6537
6538 /*
6539 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6540 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6541 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6542 */
6543 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6544 {
6545 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6546 if (RT_SUCCESS(rc))
6547 { /* likely */ }
6548 else
6549 {
6550 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6551 return rc;
6552 }
6553 }
6554
6555 VMXTRANSIENT VmxTransient;
6556 RT_ZERO(VmxTransient);
6557 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6558 VmxTransient.fIsNestedGuest = true;
6559 Assert(pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
6560
6561 /* Paranoia. */
6562 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6563
6564 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info on demand in ring-0. */
6565 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6566
6567 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6568 for (;;)
6569 {
6570 Assert(!HMR0SuspendPending());
6571 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6572 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6573
6574 /*
6575 * Preparatory work for running guest code, this may force us to
6576 * return to ring-3.
6577 *
6578 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6579 */
6580 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6581 if (rcStrict != VINF_SUCCESS)
6582 break;
6583
6584 /* Interrupts are disabled at this point! */
6585 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6586 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6587 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6588 /* Interrupts are re-enabled at this point! */
6589
6590 /*
6591 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6592 */
6593 if (RT_SUCCESS(rcRun))
6594 { /* very likely */ }
6595 else
6596 {
6597 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6598 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6599 rcStrict = rcRun;
6600 break;
6601 }
6602
6603 /*
6604 * Profile the VM-exit.
6605 */
6606 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6607 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6608 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6609 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6610 HMVMX_START_EXIT_DISPATCH_PROF();
6611
6612 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6613
6614 /*
6615 * Handle the VM-exit.
6616 */
6617 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6618 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6619 if (rcStrict == VINF_SUCCESS)
6620 {
6621 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6622 {
6623 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6624 rcStrict = VINF_VMX_VMEXIT;
6625 }
6626 else
6627 {
6628 if (++(*pcLoops) <= cMaxResumeLoops)
6629 continue;
6630 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6631 rcStrict = VINF_EM_RAW_INTERRUPT;
6632 }
6633 }
6634 else
6635 Assert(rcStrict != VINF_VMX_VMEXIT);
6636 break;
6637 }
6638
6639 /* Ensure VM-exit auxiliary info. is no longer available. */
6640 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6641
6642 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6643 return rcStrict;
6644}
6645#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6646
6647
6648/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6649 * probes.
6650 *
6651 * The following few functions and associated structure contains the bloat
6652 * necessary for providing detailed debug events and dtrace probes as well as
6653 * reliable host side single stepping. This works on the principle of
6654 * "subclassing" the normal execution loop and workers. We replace the loop
6655 * method completely and override selected helpers to add necessary adjustments
6656 * to their core operation.
6657 *
6658 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6659 * any performance for debug and analysis features.
6660 *
6661 * @{
6662 */
6663
6664/**
6665 * Single steps guest code using hardware-assisted VMX.
6666 *
6667 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6668 * but single-stepping through the hypervisor debugger.
6669 *
6670 * @returns Strict VBox status code (i.e. informational status codes too).
6671 * @param pVCpu The cross context virtual CPU structure.
6672 * @param pcLoops Pointer to the number of executed loops.
6673 *
6674 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6675 */
6676static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6677{
6678 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6679 Assert(pcLoops);
6680 Assert(*pcLoops <= cMaxResumeLoops);
6681
6682 VMXTRANSIENT VmxTransient;
6683 RT_ZERO(VmxTransient);
6684 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6685
6686 /* Set HMCPU indicators. */
6687 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
6688 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
6689 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6690 pVCpu->hmr0.s.fUsingDebugLoop = true;
6691
6692 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
6693 VMXRUNDBGSTATE DbgState;
6694 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
6695 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6696
6697 /*
6698 * The loop.
6699 */
6700 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6701 for (;;)
6702 {
6703 Assert(!HMR0SuspendPending());
6704 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6705 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6706 bool fStepping = pVCpu->hm.s.fSingleInstruction;
6707
6708 /* Set up VM-execution controls the next two can respond to. */
6709 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6710
6711 /*
6712 * Preparatory work for running guest code, this may force us to
6713 * return to ring-3.
6714 *
6715 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6716 */
6717 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
6718 if (rcStrict != VINF_SUCCESS)
6719 break;
6720
6721 /* Interrupts are disabled at this point! */
6722 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6723
6724 /* Override any obnoxious code in the above two calls. */
6725 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6726
6727 /*
6728 * Finally execute the guest.
6729 */
6730 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6731
6732 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6733 /* Interrupts are re-enabled at this point! */
6734
6735 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
6736 if (RT_SUCCESS(rcRun))
6737 { /* very likely */ }
6738 else
6739 {
6740 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6741 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6742 return rcRun;
6743 }
6744
6745 /* Profile the VM-exit. */
6746 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6747 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
6748 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6749 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6750 HMVMX_START_EXIT_DISPATCH_PROF();
6751
6752 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6753
6754 /*
6755 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
6756 */
6757 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
6758 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6759 if (rcStrict != VINF_SUCCESS)
6760 break;
6761 if (++(*pcLoops) > cMaxResumeLoops)
6762 {
6763 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6764 rcStrict = VINF_EM_RAW_INTERRUPT;
6765 break;
6766 }
6767
6768 /*
6769 * Stepping: Did the RIP change, if so, consider it a single step.
6770 * Otherwise, make sure one of the TFs gets set.
6771 */
6772 if (fStepping)
6773 {
6774 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
6775 AssertRC(rc);
6776 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
6777 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
6778 {
6779 rcStrict = VINF_EM_DBG_STEPPED;
6780 break;
6781 }
6782 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
6783 }
6784
6785 /*
6786 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
6787 */
6788 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
6789 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6790
6791 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
6792 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
6793 Assert(rcStrict == VINF_SUCCESS);
6794 }
6795
6796 /*
6797 * Clear the X86_EFL_TF if necessary.
6798 */
6799 if (pVCpu->hmr0.s.fClearTrapFlag)
6800 {
6801 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
6802 AssertRC(rc);
6803 pVCpu->hmr0.s.fClearTrapFlag = false;
6804 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
6805 }
6806 /** @todo there seems to be issues with the resume flag when the monitor trap
6807 * flag is pending without being used. Seen early in bios init when
6808 * accessing APIC page in protected mode. */
6809
6810/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
6811 * out of the above loop. */
6812
6813 /* Restore HMCPU indicators. */
6814 pVCpu->hmr0.s.fUsingDebugLoop = false;
6815 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6816 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
6817
6818 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6819 return rcStrict;
6820}
6821
6822/** @} */
6823
6824
6825/**
6826 * Checks if any expensive dtrace probes are enabled and we should go to the
6827 * debug loop.
6828 *
6829 * @returns true if we should use debug loop, false if not.
6830 */
6831static bool hmR0VmxAnyExpensiveProbesEnabled(void)
6832{
6833 /* It's probably faster to OR the raw 32-bit counter variables together.
6834 Since the variables are in an array and the probes are next to one
6835 another (more or less), we have good locality. So, better read
6836 eight-nine cache lines ever time and only have one conditional, than
6837 128+ conditionals, right? */
6838 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
6839 | VBOXVMM_XCPT_DE_ENABLED_RAW()
6840 | VBOXVMM_XCPT_DB_ENABLED_RAW()
6841 | VBOXVMM_XCPT_BP_ENABLED_RAW()
6842 | VBOXVMM_XCPT_OF_ENABLED_RAW()
6843 | VBOXVMM_XCPT_BR_ENABLED_RAW()
6844 | VBOXVMM_XCPT_UD_ENABLED_RAW()
6845 | VBOXVMM_XCPT_NM_ENABLED_RAW()
6846 | VBOXVMM_XCPT_DF_ENABLED_RAW()
6847 | VBOXVMM_XCPT_TS_ENABLED_RAW()
6848 | VBOXVMM_XCPT_NP_ENABLED_RAW()
6849 | VBOXVMM_XCPT_SS_ENABLED_RAW()
6850 | VBOXVMM_XCPT_GP_ENABLED_RAW()
6851 | VBOXVMM_XCPT_PF_ENABLED_RAW()
6852 | VBOXVMM_XCPT_MF_ENABLED_RAW()
6853 | VBOXVMM_XCPT_AC_ENABLED_RAW()
6854 | VBOXVMM_XCPT_XF_ENABLED_RAW()
6855 | VBOXVMM_XCPT_VE_ENABLED_RAW()
6856 | VBOXVMM_XCPT_SX_ENABLED_RAW()
6857 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
6858 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
6859 ) != 0
6860 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
6861 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
6862 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
6863 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
6864 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
6865 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
6866 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
6867 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
6868 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
6869 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
6870 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
6871 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
6872 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
6873 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
6874 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
6875 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
6876 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
6877 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
6878 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
6879 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
6880 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
6881 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
6882 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
6883 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
6884 | VBOXVMM_INSTR_STR_ENABLED_RAW()
6885 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
6886 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
6887 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
6888 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
6889 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
6890 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
6891 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
6892 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
6893 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
6894 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
6895 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
6896 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
6897 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
6898 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
6899 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
6900 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
6901 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
6902 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
6903 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
6904 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
6905 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
6906 ) != 0
6907 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
6908 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
6909 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
6910 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
6911 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
6912 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
6913 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
6914 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
6915 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
6916 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
6917 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
6918 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
6919 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
6920 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
6921 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
6922 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
6923 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
6924 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
6925 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
6926 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
6927 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
6928 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
6929 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
6930 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
6931 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
6932 | VBOXVMM_EXIT_STR_ENABLED_RAW()
6933 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
6934 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
6935 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
6936 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
6937 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
6938 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
6939 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
6940 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
6941 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
6942 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
6943 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
6944 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
6945 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
6946 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
6947 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
6948 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
6949 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
6950 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
6951 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
6952 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
6953 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
6954 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
6955 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
6956 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
6957 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
6958 ) != 0;
6959}
6960
6961
6962/**
6963 * Runs the guest using hardware-assisted VMX.
6964 *
6965 * @returns Strict VBox status code (i.e. informational status codes too).
6966 * @param pVCpu The cross context virtual CPU structure.
6967 */
6968VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
6969{
6970 AssertPtr(pVCpu);
6971 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6972 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6973 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
6974 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
6975
6976 VBOXSTRICTRC rcStrict;
6977 uint32_t cLoops = 0;
6978 for (;;)
6979 {
6980#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6981 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
6982#else
6983 NOREF(pCtx);
6984 bool const fInNestedGuestMode = false;
6985#endif
6986 if (!fInNestedGuestMode)
6987 {
6988 if ( !pVCpu->hm.s.fUseDebugLoop
6989 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
6990 && !DBGFIsStepping(pVCpu)
6991 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
6992 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
6993 else
6994 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
6995 }
6996#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6997 else
6998 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
6999
7000 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7001 {
7002 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7003 continue;
7004 }
7005 if (rcStrict == VINF_VMX_VMEXIT)
7006 {
7007 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7008 continue;
7009 }
7010#endif
7011 break;
7012 }
7013
7014 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7015 switch (rcLoop)
7016 {
7017 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7018 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7019 }
7020
7021 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7022 if (RT_FAILURE(rc2))
7023 {
7024 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7025 rcStrict = rc2;
7026 }
7027 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7028 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7029 return rcStrict;
7030}
7031
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use