VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp

Last change on this file was 104219, checked in by vboxsync, 4 weeks ago

VMM: bugref:10610 Fixed MSR loading.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 281.8 KB
Line 
1/* $Id: HMVMXR0.cpp 104219 2024-04-08 06:01:43Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_HM
33#define VMCPU_INCL_CPUM_GST_CTX
34#include <iprt/x86.h>
35#include <iprt/asm-amd64-x86.h>
36#include <iprt/thread.h>
37#include <iprt/mem.h>
38#include <iprt/mp.h>
39
40#include <VBox/vmm/pdmapi.h>
41#include <VBox/vmm/dbgf.h>
42#include <VBox/vmm/iem.h>
43#include <VBox/vmm/iom.h>
44#include <VBox/vmm/tm.h>
45#include <VBox/vmm/em.h>
46#include <VBox/vmm/gcm.h>
47#include <VBox/vmm/gim.h>
48#include <VBox/vmm/apic.h>
49#include "HMInternal.h"
50#include <VBox/vmm/vmcc.h>
51#include <VBox/vmm/hmvmxinline.h>
52#include "HMVMXR0.h"
53#include "VMXInternal.h"
54#include "dtrace/VBoxVMM.h"
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60#ifdef DEBUG_ramshankar
61# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
62# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
63# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
64# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
65# define HMVMX_ALWAYS_CLEAN_TRANSIENT
66# define HMVMX_ALWAYS_CHECK_GUEST_STATE
67# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
68# define HMVMX_ALWAYS_TRAP_PF
69# define HMVMX_ALWAYS_FLUSH_TLB
70# define HMVMX_ALWAYS_SWAP_EFER
71#endif
72
73/** Enables the fAlwaysInterceptMovDRx related code. */
74#define VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX 1
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * VMX page allocation information.
82 */
83typedef struct
84{
85 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
86 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
87 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
88 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
89} VMXPAGEALLOCINFO;
90/** Pointer to VMX page-allocation info. */
91typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
92/** Pointer to a const VMX page-allocation info. */
93typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
94AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
95
96
97/*********************************************************************************************************************************
98* Internal Functions *
99*********************************************************************************************************************************/
100static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
101static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
102
103
104/*********************************************************************************************************************************
105* Global Variables *
106*********************************************************************************************************************************/
107/** The DR6 value after writing zero to the register.
108 * Set by VMXR0GlobalInit(). */
109static uint64_t g_fDr6Zeroed = 0;
110
111
112/**
113 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
114 * @returns @c true if it's part of LBR stack, @c false otherwise.
115 *
116 * @param pVM The cross context VM structure.
117 * @param idMsr The MSR.
118 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
119 * Optional, can be NULL.
120 *
121 * @remarks Must only be called when LBR is enabled.
122 */
123DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
124{
125 Assert(pVM->hmr0.s.vmx.fLbr);
126 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
127 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
128 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
129 if (idxMsr < cLbrStack)
130 {
131 if (pidxMsr)
132 *pidxMsr = idxMsr;
133 return true;
134 }
135 return false;
136}
137
138
139/**
140 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
141 * @returns @c true if it's part of LBR stack, @c false otherwise.
142 *
143 * @param pVM The cross context VM structure.
144 * @param idMsr The MSR.
145 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
146 * Optional, can be NULL.
147 *
148 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
149 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
150 */
151DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
152{
153 Assert(pVM->hmr0.s.vmx.fLbr);
154 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
155 {
156 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
157 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
158 if (idxMsr < cLbrStack)
159 {
160 if (pidxMsr)
161 *pidxMsr = idxMsr;
162 return true;
163 }
164 }
165 return false;
166}
167
168
169/**
170 * Gets the active (in use) VMCS info. object for the specified VCPU.
171 *
172 * This is either the guest or nested-guest VMCS info. and need not necessarily
173 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
174 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
175 * current VMCS while returning to ring-3. However, the VMCS info. object for that
176 * VMCS would still be active and returned here so that we could dump the VMCS
177 * fields to ring-3 for diagnostics. This function is thus only used to
178 * distinguish between the nested-guest or guest VMCS.
179 *
180 * @returns The active VMCS information.
181 * @param pVCpu The cross context virtual CPU structure.
182 *
183 * @thread EMT.
184 * @remarks This function may be called with preemption or interrupts disabled!
185 */
186DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
187{
188 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
189 return &pVCpu->hmr0.s.vmx.VmcsInfo;
190 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
191}
192
193
194/**
195 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
196 * area.
197 *
198 * @returns @c true if it's different, @c false otherwise.
199 * @param pVmcsInfo The VMCS info. object.
200 */
201DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
202{
203 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
204 && pVmcsInfo->pvGuestMsrStore);
205}
206
207
208/**
209 * Sets the given Processor-based VM-execution controls.
210 *
211 * @param pVmxTransient The VMX-transient structure.
212 * @param uProcCtls The Processor-based VM-execution controls to set.
213 */
214static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
218 {
219 pVmcsInfo->u32ProcCtls |= uProcCtls;
220 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
221 AssertRC(rc);
222 }
223}
224
225
226/**
227 * Removes the given Processor-based VM-execution controls.
228 *
229 * @param pVCpu The cross context virtual CPU structure.
230 * @param pVmxTransient The VMX-transient structure.
231 * @param uProcCtls The Processor-based VM-execution controls to remove.
232 *
233 * @remarks When executing a nested-guest, this will not remove any of the specified
234 * controls if the nested hypervisor has set any one of them.
235 */
236static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
237{
238 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
239 if (pVmcsInfo->u32ProcCtls & uProcCtls)
240 {
241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
242 if ( !pVmxTransient->fIsNestedGuest
243 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
244#else
245 NOREF(pVCpu);
246 if (!pVmxTransient->fIsNestedGuest)
247#endif
248 {
249 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
250 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
251 AssertRC(rc);
252 }
253 }
254}
255
256
257/**
258 * Sets the TSC offset for the current VMCS.
259 *
260 * @param uTscOffset The TSC offset to set.
261 * @param pVmcsInfo The VMCS info. object.
262 */
263static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
264{
265 if (pVmcsInfo->u64TscOffset != uTscOffset)
266 {
267 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
268 AssertRC(rc);
269 pVmcsInfo->u64TscOffset = uTscOffset;
270 }
271}
272
273
274/**
275 * Loads the VMCS specified by the VMCS info. object.
276 *
277 * @returns VBox status code.
278 * @param pVmcsInfo The VMCS info. object.
279 *
280 * @remarks Can be called with interrupts disabled.
281 */
282static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
283{
284 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
285 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
286
287 return VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
288}
289
290
291/**
292 * Clears the VMCS specified by the VMCS info. object.
293 *
294 * @returns VBox status code.
295 * @param pVmcsInfo The VMCS info. object.
296 *
297 * @remarks Can be called with interrupts disabled.
298 */
299static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
300{
301 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
302 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
303
304 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
305 if (RT_SUCCESS(rc))
306 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
307 return rc;
308}
309
310
311/**
312 * Checks whether the MSR belongs to the set of guest MSRs that we restore
313 * lazily while leaving VT-x.
314 *
315 * @returns true if it does, false otherwise.
316 * @param pVCpu The cross context virtual CPU structure.
317 * @param idMsr The MSR to check.
318 */
319static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
320{
321 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
322 {
323 switch (idMsr)
324 {
325 case MSR_K8_LSTAR:
326 case MSR_K6_STAR:
327 case MSR_K8_SF_MASK:
328 case MSR_K8_KERNEL_GS_BASE:
329 return true;
330 }
331 }
332 return false;
333}
334
335
336/**
337 * Loads a set of guests MSRs to allow read/passthru to the guest.
338 *
339 * The name of this function is slightly confusing. This function does NOT
340 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
341 * common prefix for functions dealing with "lazy restoration" of the shared
342 * MSRs.
343 *
344 * @param pVCpu The cross context virtual CPU structure.
345 *
346 * @remarks No-long-jump zone!!!
347 */
348static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
349{
350 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
351 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
352
353 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
354 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
355 {
356 /*
357 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
358 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
359 * we can skip a few MSR writes.
360 *
361 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
362 * guest MSR values in the guest-CPU context might be different to what's currently
363 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
364 * CPU, see @bugref{8728}.
365 */
366 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
367 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
368 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
369 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
370 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
371 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
372 {
373#ifdef VBOX_STRICT
374 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
375 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
376 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
377 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
378#endif
379 }
380 else
381 {
382 /* Avoid raising #GP caused by writing illegal values to these MSRs. */
383 if ( X86_IS_CANONICAL(pCtx->msrKERNELGSBASE)
384 && X86_IS_CANONICAL(pCtx->msrLSTAR))
385 {
386 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
387 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
388 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
389 /* The system call flag mask register isn't as benign and accepting of all
390 values as the above, so mask it to avoid #GP'ing on corrupted input. */
391 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
392 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
393 }
394 else
395 AssertMsgFailed(("Incompatible lazily-loaded guest MSR values\n"));
396 }
397 }
398 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
399}
400
401
402/**
403 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
404 *
405 * @returns @c true if found, @c false otherwise.
406 * @param pVmcsInfo The VMCS info. object.
407 * @param idMsr The MSR to find.
408 */
409static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
410{
411 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
412 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
413 Assert(pMsrs);
414 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
415 for (uint32_t i = 0; i < cMsrs; i++)
416 {
417 if (pMsrs[i].u32Msr == idMsr)
418 return true;
419 }
420 return false;
421}
422
423
424/**
425 * Performs lazy restoration of the set of host MSRs if they were previously
426 * loaded with guest MSR values.
427 *
428 * @param pVCpu The cross context virtual CPU structure.
429 *
430 * @remarks No-long-jump zone!!!
431 * @remarks The guest MSRs should have been saved back into the guest-CPU
432 * context by vmxHCImportGuestState()!!!
433 */
434static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
435{
436 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
437 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
438
439 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
440 {
441 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
442 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
443 {
444 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
445 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
446 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
447 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
448 }
449 }
450 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
451}
452
453
454/**
455 * Sets pfnStartVm to the best suited variant.
456 *
457 * This must be called whenever anything changes relative to the hmR0VmXStartVm
458 * variant selection:
459 * - pVCpu->hm.s.fLoadSaveGuestXcr0
460 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
461 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
462 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
463 * - Perhaps: CPUMCTX.fXStateMask (windows only)
464 *
465 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
466 * cannot be changed at runtime.
467 */
468static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
469{
470 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
471 {
472 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
473 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
474 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
475 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
476 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
477 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
478 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
479 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
480 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
481 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
482 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
483 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
484 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
485 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
486 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
487 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
488 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
489 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
490 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
491 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
492 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
493 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
494 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
495 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
496 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
497 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
498 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
499 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
500 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
501 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
502 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
503 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
504 };
505 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
506 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
507 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
508 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
509 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
510 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
511 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
512 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
513}
514
515
516/**
517 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
518 * stack.
519 *
520 * @returns Strict VBox status code (i.e. informational status codes too).
521 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
522 * @param pVCpu The cross context virtual CPU structure.
523 * @param uValue The value to push to the guest stack.
524 */
525static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
526{
527 /*
528 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
529 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
530 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
531 */
532 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
533 if (pCtx->sp == 1)
534 return VINF_EM_RESET;
535 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
536 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
537 AssertRC(rc);
538 return rc;
539}
540
541
542/**
543 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
544 * unreferenced local parameters in the template code...
545 */
546DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
547{
548 RT_NOREF(pVCpu);
549 return VMXWriteVmcs16(uFieldEnc, u16Val);
550}
551
552
553/**
554 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
555 * unreferenced local parameters in the template code...
556 */
557DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
558{
559 RT_NOREF(pVCpu);
560 return VMXWriteVmcs32(uFieldEnc, u32Val);
561}
562
563
564/**
565 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
566 * unreferenced local parameters in the template code...
567 */
568DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
569{
570 RT_NOREF(pVCpu);
571 return VMXWriteVmcs64(uFieldEnc, u64Val);
572}
573
574
575/**
576 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
577 * unreferenced local parameters in the template code...
578 */
579DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
580{
581 RT_NOREF(pVCpu);
582 return VMXReadVmcs16(uFieldEnc, pu16Val);
583}
584
585
586/**
587 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
588 * unreferenced local parameters in the template code...
589 */
590DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
591{
592 RT_NOREF(pVCpu);
593 return VMXReadVmcs32(uFieldEnc, pu32Val);
594}
595
596
597/**
598 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
599 * unreferenced local parameters in the template code...
600 */
601DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
602{
603 RT_NOREF(pVCpu);
604 return VMXReadVmcs64(uFieldEnc, pu64Val);
605}
606
607
608/*
609 * Instantiate the code we share with the NEM darwin backend.
610 */
611#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
612#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
613
614#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
615#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
616#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
617#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
618
619#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
620#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
621#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
622#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
623
624#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
625#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
626#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
627#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
628
629#include "../VMMAll/VMXAllTemplate.cpp.h"
630
631#undef VMX_VMCS_WRITE_16
632#undef VMX_VMCS_WRITE_32
633#undef VMX_VMCS_WRITE_64
634#undef VMX_VMCS_WRITE_NW
635
636#undef VMX_VMCS_READ_16
637#undef VMX_VMCS_READ_32
638#undef VMX_VMCS_READ_64
639#undef VMX_VMCS_READ_NW
640
641#undef VM_IS_VMX_PREEMPT_TIMER_USED
642#undef VM_IS_VMX_NESTED_PAGING
643#undef VM_IS_VMX_UNRESTRICTED_GUEST
644#undef VCPU_2_VMXSTATS
645#undef VCPU_2_VMXSTATE
646
647
648/**
649 * Updates the VM's last error record.
650 *
651 * If there was a VMX instruction error, reads the error data from the VMCS and
652 * updates VCPU's last error record as well.
653 *
654 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
655 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
656 * VERR_VMX_INVALID_VMCS_FIELD.
657 * @param rc The error code.
658 */
659static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
660{
661 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
662 || rc == VERR_VMX_UNABLE_TO_START_VM)
663 {
664 AssertPtrReturnVoid(pVCpu);
665 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
666 }
667 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
668}
669
670
671/**
672 * Enters VMX root mode operation on the current CPU.
673 *
674 * @returns VBox status code.
675 * @param pHostCpu The HM physical-CPU structure.
676 * @param pVM The cross context VM structure. Can be
677 * NULL, after a resume.
678 * @param HCPhysCpuPage Physical address of the VMXON region.
679 * @param pvCpuPage Pointer to the VMXON region.
680 */
681static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
682{
683 Assert(pHostCpu);
684 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
685 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
686 Assert(pvCpuPage);
687 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
688
689 if (pVM)
690 {
691 /* Write the VMCS revision identifier to the VMXON region. */
692 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
693 }
694
695 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
696 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
697
698 /* Enable the VMX bit in CR4 if necessary. */
699 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
700
701 /* Record whether VMXE was already prior to us enabling it above. */
702 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
703
704 /* Enter VMX root mode. */
705 int rc = VMXEnable(HCPhysCpuPage);
706 if (RT_FAILURE(rc))
707 {
708 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
709 if (!pHostCpu->fVmxeAlreadyEnabled)
710 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
711
712 if (pVM)
713 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
714 }
715
716 /* Restore interrupts. */
717 ASMSetFlags(fEFlags);
718 return rc;
719}
720
721
722/**
723 * Exits VMX root mode operation on the current CPU.
724 *
725 * @returns VBox status code.
726 * @param pHostCpu The HM physical-CPU structure.
727 */
728static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
729{
730 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
731
732 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
733 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
734
735 /* If we're for some reason not in VMX root mode, then don't leave it. */
736 RTCCUINTREG const uHostCr4 = ASMGetCR4();
737
738 int rc;
739 if (uHostCr4 & X86_CR4_VMXE)
740 {
741 /* Exit VMX root mode and clear the VMX bit in CR4. */
742 VMXDisable();
743
744 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
745 if (!pHostCpu->fVmxeAlreadyEnabled)
746 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
747
748 rc = VINF_SUCCESS;
749 }
750 else
751 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
752
753 /* Restore interrupts. */
754 ASMSetFlags(fEFlags);
755 return rc;
756}
757
758
759/**
760 * Allocates pages specified as specified by an array of VMX page allocation info
761 * objects.
762 *
763 * The pages contents are zero'd after allocation.
764 *
765 * @returns VBox status code.
766 * @param phMemObj Where to return the handle to the allocation.
767 * @param paAllocInfo The pointer to the first element of the VMX
768 * page-allocation info object array.
769 * @param cEntries The number of elements in the @a paAllocInfo array.
770 */
771static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
772{
773 *phMemObj = NIL_RTR0MEMOBJ;
774
775 /* Figure out how many pages to allocate. */
776 uint32_t cPages = 0;
777 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
778 cPages += !!paAllocInfo[iPage].fValid;
779
780 /* Allocate the pages. */
781 if (cPages)
782 {
783 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
784 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
785 if (RT_FAILURE(rc))
786 return rc;
787
788 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
789 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
790 RT_BZERO(pvFirstPage, cbPages);
791
792 uint32_t iPage = 0;
793 for (uint32_t i = 0; i < cEntries; i++)
794 if (paAllocInfo[i].fValid)
795 {
796 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
797 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
798 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
799 AssertPtr(pvPage);
800
801 Assert(paAllocInfo[iPage].pHCPhys);
802 Assert(paAllocInfo[iPage].ppVirt);
803 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
804 *paAllocInfo[iPage].ppVirt = pvPage;
805
806 /* Move to next page. */
807 ++iPage;
808 }
809
810 /* Make sure all valid (requested) pages have been assigned. */
811 Assert(iPage == cPages);
812 }
813 return VINF_SUCCESS;
814}
815
816
817/**
818 * Frees pages allocated using hmR0VmxPagesAllocZ.
819 *
820 * @param phMemObj Pointer to the memory object handle. Will be set to
821 * NIL.
822 */
823DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
824{
825 /* We can cleanup wholesale since it's all one allocation. */
826 if (*phMemObj != NIL_RTR0MEMOBJ)
827 {
828 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
829 *phMemObj = NIL_RTR0MEMOBJ;
830 }
831}
832
833
834/**
835 * Initializes a VMCS info. object.
836 *
837 * @param pVmcsInfo The VMCS info. object.
838 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
839 */
840static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
841{
842 RT_ZERO(*pVmcsInfo);
843 RT_ZERO(*pVmcsInfoShared);
844
845 pVmcsInfo->pShared = pVmcsInfoShared;
846 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
847 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
848 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
849 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
850 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
851 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
852 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
853 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
854 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
855 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
856 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
857 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
858}
859
860
861/**
862 * Frees the VT-x structures for a VMCS info. object.
863 *
864 * @param pVmcsInfo The VMCS info. object.
865 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
866 */
867static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
868{
869 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
870 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
871}
872
873
874/**
875 * Allocates the VT-x structures for a VMCS info. object.
876 *
877 * @returns VBox status code.
878 * @param pVCpu The cross context virtual CPU structure.
879 * @param pVmcsInfo The VMCS info. object.
880 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
881 *
882 * @remarks The caller is expected to take care of any and all allocation failures.
883 * This function will not perform any cleanup for failures half-way
884 * through.
885 */
886static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
887{
888 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
889
890 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
891 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
892 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
893 VMXPAGEALLOCINFO aAllocInfo[] =
894 {
895 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
896 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
897 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
898 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
899 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
900 };
901
902 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
903 if (RT_FAILURE(rc))
904 return rc;
905
906 /*
907 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
908 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
909 */
910 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
911 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
912 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
913 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
914
915 /*
916 * Get the virtual-APIC page rather than allocating them again.
917 */
918 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
919 {
920 if (!fIsNstGstVmcs)
921 {
922 if (PDMHasApic(pVM))
923 {
924 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
925 if (RT_FAILURE(rc))
926 return rc;
927 Assert(pVmcsInfo->pbVirtApic);
928 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
929 }
930 }
931 else
932 {
933 /* These are setup later while marging the nested-guest VMCS. */
934 Assert(pVmcsInfo->pbVirtApic == NULL);
935 Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS);
936 }
937 }
938
939 return VINF_SUCCESS;
940}
941
942
943/**
944 * Free all VT-x structures for the VM.
945 *
946 * @param pVM The cross context VM structure.
947 */
948static void hmR0VmxStructsFree(PVMCC pVM)
949{
950 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
951#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
952 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
953 {
954 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
955 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
956 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
957 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
958 }
959#endif
960
961 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
962 {
963 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
964 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
965#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
966 if (pVM->cpum.ro.GuestFeatures.fVmx)
967 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
968#endif
969 }
970}
971
972
973/**
974 * Allocate all VT-x structures for the VM.
975 *
976 * @returns IPRT status code.
977 * @param pVM The cross context VM structure.
978 *
979 * @remarks This functions will cleanup on memory allocation failures.
980 */
981static int hmR0VmxStructsAlloc(PVMCC pVM)
982{
983 /*
984 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
985 * The VMCS size cannot be more than 4096 bytes.
986 *
987 * See Intel spec. Appendix A.1 "Basic VMX Information".
988 */
989 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
990 if (cbVmcs <= X86_PAGE_4K_SIZE)
991 { /* likely */ }
992 else
993 {
994 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
995 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
996 }
997
998 /*
999 * Allocate per-VM VT-x structures.
1000 */
1001 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
1002 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
1003 VMXPAGEALLOCINFO aAllocInfo[] =
1004 {
1005 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
1006 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
1007 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
1008#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1009 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
1010#endif
1011 };
1012
1013 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1014 if (RT_SUCCESS(rc))
1015 {
1016#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1017 /* Allocate the shadow VMCS-fields array. */
1018 if (fUseVmcsShadowing)
1019 {
1020 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1021 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1022 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1023 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1024 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1025 rc = VERR_NO_MEMORY;
1026 }
1027#endif
1028
1029 /*
1030 * Allocate per-VCPU VT-x structures.
1031 */
1032 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1033 {
1034 /* Allocate the guest VMCS structures. */
1035 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1036 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1037
1038#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1039 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1040 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1041 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1042#endif
1043 }
1044 if (RT_SUCCESS(rc))
1045 return VINF_SUCCESS;
1046 }
1047 hmR0VmxStructsFree(pVM);
1048 return rc;
1049}
1050
1051
1052/**
1053 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1054 *
1055 * @param pVM The cross context VM structure.
1056 */
1057static void hmR0VmxStructsInit(PVMCC pVM)
1058{
1059 /* Paranoia. */
1060 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1061#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1062 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1063#endif
1064
1065 /*
1066 * Initialize members up-front so we can cleanup en masse on allocation failures.
1067 */
1068#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1069 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1070#endif
1071 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1072 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1073 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1074 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1075 {
1076 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1077 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1078 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1079 }
1080}
1081
1082#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1083/**
1084 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1085 *
1086 * @returns @c true if the MSR is intercepted, @c false otherwise.
1087 * @param pbMsrBitmap The MSR bitmap.
1088 * @param offMsr The MSR byte offset.
1089 * @param iBit The bit offset from the byte offset.
1090 */
1091DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1092{
1093 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1094 return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit);
1095}
1096#endif
1097
1098/**
1099 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1100 *
1101 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1102 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1103 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1104 * the read/write access of this MSR.
1105 *
1106 * @param pVCpu The cross context virtual CPU structure.
1107 * @param pVmcsInfo The VMCS info. object.
1108 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1109 * @param idMsr The MSR value.
1110 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1111 * include both a read -and- a write permission!
1112 *
1113 * @sa CPUMGetVmxMsrPermission.
1114 * @remarks Can be called with interrupts disabled.
1115 */
1116static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1117{
1118 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1119 Assert(pbMsrBitmap);
1120 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1121
1122 /*
1123 * MSR-bitmap Layout:
1124 * Byte index MSR range Interpreted as
1125 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1126 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1127 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1128 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1129 *
1130 * A bit corresponding to an MSR within the above range causes a VM-exit
1131 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1132 * the MSR range, it always cause a VM-exit.
1133 *
1134 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1135 */
1136 uint16_t const offBitmapRead = 0;
1137 uint16_t const offBitmapWrite = 0x800;
1138 uint16_t offMsr;
1139 int32_t iBit;
1140 if (idMsr <= UINT32_C(0x00001fff))
1141 {
1142 offMsr = 0;
1143 iBit = idMsr;
1144 }
1145 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1146 {
1147 offMsr = 0x400;
1148 iBit = idMsr - UINT32_C(0xc0000000);
1149 }
1150 else
1151 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1152
1153 /*
1154 * Set the MSR read permission.
1155 */
1156 uint16_t const offMsrRead = offBitmapRead + offMsr;
1157 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1158 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1159 {
1160#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1161 bool const fClear = !fIsNstGstVmcs ? true
1162 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1163#else
1164 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1165 bool const fClear = true;
1166#endif
1167 if (fClear)
1168 ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit);
1169 }
1170 else
1171 ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit);
1172
1173 /*
1174 * Set the MSR write permission.
1175 */
1176 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1177 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1178 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1179 {
1180#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1181 bool const fClear = !fIsNstGstVmcs ? true
1182 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1183#else
1184 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1185 bool const fClear = true;
1186#endif
1187 if (fClear)
1188 ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1189 }
1190 else
1191 ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1192}
1193
1194
1195/**
1196 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1197 * area.
1198 *
1199 * @returns VBox status code.
1200 * @param pVCpu The cross context virtual CPU structure.
1201 * @param pVmcsInfo The VMCS info. object.
1202 * @param cMsrs The number of MSRs.
1203 */
1204static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1205{
1206 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1207 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1208 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1209 {
1210 /* Commit the MSR counts to the VMCS and update the cache. */
1211 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1212 {
1213 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1214 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1215 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1216 pVmcsInfo->cEntryMsrLoad = cMsrs;
1217 pVmcsInfo->cExitMsrStore = cMsrs;
1218 pVmcsInfo->cExitMsrLoad = cMsrs;
1219 }
1220 return VINF_SUCCESS;
1221 }
1222
1223 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1224 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1225 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1226}
1227
1228
1229/**
1230 * Adds a new (or updates the value of an existing) guest/host MSR
1231 * pair to be swapped during the world-switch as part of the
1232 * auto-load/store MSR area in the VMCS.
1233 *
1234 * @returns VBox status code.
1235 * @param pVCpu The cross context virtual CPU structure.
1236 * @param pVmxTransient The VMX-transient structure.
1237 * @param idMsr The MSR.
1238 * @param uGuestMsrValue Value of the guest MSR.
1239 * @param fSetReadWrite Whether to set the guest read/write access of this
1240 * MSR (thus not causing a VM-exit).
1241 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1242 * necessary.
1243 */
1244static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1245 bool fSetReadWrite, bool fUpdateHostMsr)
1246{
1247 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1248 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1249 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1250 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1251 uint32_t i;
1252
1253 /* Paranoia. */
1254 Assert(pGuestMsrLoad);
1255
1256#ifndef DEBUG_bird
1257 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1258#endif
1259
1260 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1261 for (i = 0; i < cMsrs; i++)
1262 {
1263 if (pGuestMsrLoad[i].u32Msr == idMsr)
1264 break;
1265 }
1266
1267 bool fAdded = false;
1268 if (i == cMsrs)
1269 {
1270 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1271 ++cMsrs;
1272 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1273 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1274
1275 /* Set the guest to read/write this MSR without causing VM-exits. */
1276 if ( fSetReadWrite
1277 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1278 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1279
1280 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1281 fAdded = true;
1282 }
1283
1284 /* Update the MSR value for the newly added or already existing MSR. */
1285 pGuestMsrLoad[i].u32Msr = idMsr;
1286 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1287
1288 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1289 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1290 {
1291 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1292 pGuestMsrStore[i].u32Msr = idMsr;
1293 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1294 }
1295
1296 /* Update the corresponding slot in the host MSR area. */
1297 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1298 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1299 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1300 pHostMsr[i].u32Msr = idMsr;
1301
1302 /*
1303 * Only if the caller requests to update the host MSR value AND we've newly added the
1304 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1305 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1306 *
1307 * We do this for performance reasons since reading MSRs may be quite expensive.
1308 */
1309 if (fAdded)
1310 {
1311 if (fUpdateHostMsr)
1312 {
1313 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1314 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1315 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1316 }
1317 else
1318 {
1319 /* Someone else can do the work. */
1320 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1321 }
1322 }
1323 return VINF_SUCCESS;
1324}
1325
1326
1327/**
1328 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1329 * auto-load/store MSR area in the VMCS.
1330 *
1331 * @returns VBox status code.
1332 * @param pVCpu The cross context virtual CPU structure.
1333 * @param pVmxTransient The VMX-transient structure.
1334 * @param idMsr The MSR.
1335 */
1336static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1337{
1338 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1339 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1340 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1341 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1342
1343#ifndef DEBUG_bird
1344 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1345#endif
1346
1347 for (uint32_t i = 0; i < cMsrs; i++)
1348 {
1349 /* Find the MSR. */
1350 if (pGuestMsrLoad[i].u32Msr == idMsr)
1351 {
1352 /*
1353 * If it's the last MSR, we only need to reduce the MSR count.
1354 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1355 */
1356 if (i < cMsrs - 1)
1357 {
1358 /* Remove it from the VM-entry MSR-load area. */
1359 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1360 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1361
1362 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1363 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1364 {
1365 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1366 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1367 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1368 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1369 }
1370
1371 /* Remove it from the VM-exit MSR-load area. */
1372 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1373 Assert(pHostMsr[i].u32Msr == idMsr);
1374 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1375 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1376 }
1377
1378 /* Reduce the count to reflect the removed MSR and bail. */
1379 --cMsrs;
1380 break;
1381 }
1382 }
1383
1384 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1385 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1386 {
1387 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1388 AssertRCReturn(rc, rc);
1389
1390 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1391 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1392 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1393
1394 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1395 return VINF_SUCCESS;
1396 }
1397
1398 return VERR_NOT_FOUND;
1399}
1400
1401
1402/**
1403 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1404 *
1405 * @param pVCpu The cross context virtual CPU structure.
1406 * @param pVmcsInfo The VMCS info. object.
1407 *
1408 * @remarks No-long-jump zone!!!
1409 */
1410static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1411{
1412 RT_NOREF(pVCpu);
1413 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1414
1415 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1416 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1417 Assert(pHostMsrLoad);
1418 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1419 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1420 for (uint32_t i = 0; i < cMsrs; i++)
1421 {
1422 /*
1423 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1424 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1425 */
1426 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1427 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1428 else
1429 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1430 }
1431}
1432
1433
1434/**
1435 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1436 * perform lazy restoration of the host MSRs while leaving VT-x.
1437 *
1438 * @param pVCpu The cross context virtual CPU structure.
1439 *
1440 * @remarks No-long-jump zone!!!
1441 */
1442static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1443{
1444 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1445
1446 /*
1447 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1448 */
1449 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1450 {
1451 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1452 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1453 {
1454 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1455 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1456 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1457 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1458 }
1459 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1460 }
1461}
1462
1463
1464#ifdef VBOX_STRICT
1465
1466/**
1467 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1468 *
1469 * @param pVmcsInfo The VMCS info. object.
1470 */
1471static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1472{
1473 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1474
1475 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1476 {
1477 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1478 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1479 uint64_t uVmcsEferMsrVmcs;
1480 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1481 AssertRC(rc);
1482
1483 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1484 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1485 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1486 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1487 }
1488}
1489
1490
1491/**
1492 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1493 * VMCS are correct.
1494 *
1495 * @param pVCpu The cross context virtual CPU structure.
1496 * @param pVmcsInfo The VMCS info. object.
1497 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1498 */
1499static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1500{
1501 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1502
1503 /* Read the various MSR-area counts from the VMCS. */
1504 uint32_t cEntryLoadMsrs;
1505 uint32_t cExitStoreMsrs;
1506 uint32_t cExitLoadMsrs;
1507 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1508 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1509 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1510
1511 /* Verify all the MSR counts are the same. */
1512 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1513 Assert(cExitStoreMsrs == cExitLoadMsrs);
1514 uint32_t const cMsrs = cExitLoadMsrs;
1515
1516 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1517 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1518
1519 /* Verify the MSR counts are within the allocated page size. */
1520 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1521
1522 /* Verify the relevant contents of the MSR areas match. */
1523 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1524 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1525 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1526 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1527 for (uint32_t i = 0; i < cMsrs; i++)
1528 {
1529 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1530 if (fSeparateExitMsrStorePage)
1531 {
1532 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1533 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1534 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1535 }
1536
1537 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1538 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1539 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1540
1541 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1542 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1543 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1544 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1545
1546 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1547 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1548 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1549 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1550
1551 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1552 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1553 {
1554 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1555 if (fIsEferMsr)
1556 {
1557 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1558 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1559 }
1560 else
1561 {
1562 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1563 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1564 if ( pVM->hmr0.s.vmx.fLbr
1565 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1566 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1567 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1568 {
1569 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1570 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1571 pGuestMsrLoad->u32Msr, cMsrs));
1572 }
1573 else if (!fIsNstGstVmcs)
1574 {
1575 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1576 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1577 }
1578 else
1579 {
1580 /*
1581 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1582 * execute a nested-guest with MSR passthrough.
1583 *
1584 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1585 * allow passthrough too.
1586 */
1587 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1588 Assert(pvMsrBitmapNstGst);
1589 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1590 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1591 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1592 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1593 }
1594 }
1595 }
1596
1597 /* Move to the next MSR. */
1598 pHostMsrLoad++;
1599 pGuestMsrLoad++;
1600 pGuestMsrStore++;
1601 }
1602}
1603
1604#endif /* VBOX_STRICT */
1605
1606/**
1607 * Flushes the TLB using EPT.
1608 *
1609 * @param pVCpu The cross context virtual CPU structure of the calling
1610 * EMT. Can be NULL depending on @a enmTlbFlush.
1611 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1612 * enmTlbFlush.
1613 * @param enmTlbFlush Type of flush.
1614 *
1615 * @remarks Caller is responsible for making sure this function is called only
1616 * when NestedPaging is supported and providing @a enmTlbFlush that is
1617 * supported by the CPU.
1618 * @remarks Can be called with interrupts disabled.
1619 */
1620static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1621{
1622 uint64_t au64Descriptor[2];
1623 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1624 au64Descriptor[0] = 0;
1625 else
1626 {
1627 Assert(pVCpu);
1628 Assert(pVmcsInfo);
1629 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1630 }
1631 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1632
1633 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1634 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1635
1636 if ( RT_SUCCESS(rc)
1637 && pVCpu)
1638 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1639}
1640
1641
1642/**
1643 * Flushes the TLB using VPID.
1644 *
1645 * @param pVCpu The cross context virtual CPU structure of the calling
1646 * EMT. Can be NULL depending on @a enmTlbFlush.
1647 * @param enmTlbFlush Type of flush.
1648 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1649 * on @a enmTlbFlush).
1650 *
1651 * @remarks Can be called with interrupts disabled.
1652 */
1653static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1654{
1655 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1656
1657 uint64_t au64Descriptor[2];
1658 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1659 {
1660 au64Descriptor[0] = 0;
1661 au64Descriptor[1] = 0;
1662 }
1663 else
1664 {
1665 AssertPtr(pVCpu);
1666 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1667 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1668 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1669 au64Descriptor[1] = GCPtr;
1670 }
1671
1672 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1673 AssertMsg(rc == VINF_SUCCESS,
1674 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1675
1676 if ( RT_SUCCESS(rc)
1677 && pVCpu)
1678 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1679 NOREF(rc);
1680}
1681
1682
1683/**
1684 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1685 * otherwise there is nothing really to invalidate.
1686 *
1687 * @returns VBox status code.
1688 * @param pVCpu The cross context virtual CPU structure.
1689 * @param GCVirt Guest virtual address of the page to invalidate.
1690 */
1691VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1692{
1693 AssertPtr(pVCpu);
1694 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1695
1696 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1697 {
1698 /*
1699 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1700 * the EPT case. See @bugref{6043} and @bugref{6177}.
1701 *
1702 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1703 * as this function maybe called in a loop with individual addresses.
1704 */
1705 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1706 if (pVM->hmr0.s.vmx.fVpid)
1707 {
1708 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1709 {
1710 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1711 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1712 }
1713 else
1714 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1715 }
1716 else if (pVM->hmr0.s.fNestedPaging)
1717 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1718 }
1719
1720 return VINF_SUCCESS;
1721}
1722
1723
1724/**
1725 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1726 * case where neither EPT nor VPID is supported by the CPU.
1727 *
1728 * @param pHostCpu The HM physical-CPU structure.
1729 * @param pVCpu The cross context virtual CPU structure.
1730 *
1731 * @remarks Called with interrupts disabled.
1732 */
1733static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1734{
1735 AssertPtr(pVCpu);
1736 AssertPtr(pHostCpu);
1737
1738 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1739
1740 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1741 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1742 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1743 pVCpu->hmr0.s.fForceTLBFlush = false;
1744 return;
1745}
1746
1747
1748/**
1749 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1750 *
1751 * @param pHostCpu The HM physical-CPU structure.
1752 * @param pVCpu The cross context virtual CPU structure.
1753 * @param pVmcsInfo The VMCS info. object.
1754 *
1755 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1756 * nomenclature. The reason is, to avoid confusion in compare statements
1757 * since the host-CPU copies are named "ASID".
1758 *
1759 * @remarks Called with interrupts disabled.
1760 */
1761static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1762{
1763#ifdef VBOX_WITH_STATISTICS
1764 bool fTlbFlushed = false;
1765# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1766# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1767 if (!fTlbFlushed) \
1768 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1769 } while (0)
1770#else
1771# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1772# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1773#endif
1774
1775 AssertPtr(pVCpu);
1776 AssertPtr(pHostCpu);
1777 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1778
1779 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1780 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1781 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1782 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1783
1784 /*
1785 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1786 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1787 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1788 * cannot reuse the current ASID anymore.
1789 */
1790 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1791 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1792 {
1793 ++pHostCpu->uCurrentAsid;
1794 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1795 {
1796 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1797 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1798 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1799 }
1800
1801 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1802 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1803 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1804
1805 /*
1806 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1807 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1808 */
1809 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1810 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1811 HMVMX_SET_TAGGED_TLB_FLUSHED();
1812 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1813 }
1814 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1815 {
1816 /*
1817 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1818 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1819 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1820 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1821 * mappings, see @bugref{6568}.
1822 *
1823 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1824 */
1825 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1826 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1827 HMVMX_SET_TAGGED_TLB_FLUSHED();
1828 }
1829 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1830 {
1831 /*
1832 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1833 * address which requires flushing the TLB of EPT cached structures.
1834 *
1835 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1836 */
1837 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1838 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1839 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1840 HMVMX_SET_TAGGED_TLB_FLUSHED();
1841 }
1842
1843
1844 pVCpu->hmr0.s.fForceTLBFlush = false;
1845 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1846
1847 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1848 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1849 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1850 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1851 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1852 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1853 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1854 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1855 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1856
1857 /* Update VMCS with the VPID. */
1858 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1859 AssertRC(rc);
1860
1861#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1862}
1863
1864
1865/**
1866 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1867 *
1868 * @param pHostCpu The HM physical-CPU structure.
1869 * @param pVCpu The cross context virtual CPU structure.
1870 * @param pVmcsInfo The VMCS info. object.
1871 *
1872 * @remarks Called with interrupts disabled.
1873 */
1874static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1875{
1876 AssertPtr(pVCpu);
1877 AssertPtr(pHostCpu);
1878 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1879 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1880 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1881
1882 /*
1883 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1884 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1885 */
1886 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1887 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1888 {
1889 pVCpu->hmr0.s.fForceTLBFlush = true;
1890 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1891 }
1892
1893 /* Check for explicit TLB flushes. */
1894 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1895 {
1896 pVCpu->hmr0.s.fForceTLBFlush = true;
1897 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1898 }
1899
1900 /* Check for TLB flushes while switching to/from a nested-guest. */
1901 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1902 {
1903 pVCpu->hmr0.s.fForceTLBFlush = true;
1904 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1905 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1906 }
1907
1908 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1909 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1910
1911 if (pVCpu->hmr0.s.fForceTLBFlush)
1912 {
1913 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1914 pVCpu->hmr0.s.fForceTLBFlush = false;
1915 }
1916}
1917
1918
1919/**
1920 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1921 *
1922 * @param pHostCpu The HM physical-CPU structure.
1923 * @param pVCpu The cross context virtual CPU structure.
1924 *
1925 * @remarks Called with interrupts disabled.
1926 */
1927static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1928{
1929 AssertPtr(pVCpu);
1930 AssertPtr(pHostCpu);
1931 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1932 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1933 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1934
1935 /*
1936 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1937 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1938 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1939 * cannot reuse the current ASID anymore.
1940 */
1941 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1942 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1943 {
1944 pVCpu->hmr0.s.fForceTLBFlush = true;
1945 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1946 }
1947
1948 /* Check for explicit TLB flushes. */
1949 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1950 {
1951 /*
1952 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1953 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1954 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1955 * include fExplicitFlush's too) - an obscure corner case.
1956 */
1957 pVCpu->hmr0.s.fForceTLBFlush = true;
1958 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1959 }
1960
1961 /* Check for TLB flushes while switching to/from a nested-guest. */
1962 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1963 {
1964 pVCpu->hmr0.s.fForceTLBFlush = true;
1965 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1966 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1967 }
1968
1969 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1970 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1971 if (pVCpu->hmr0.s.fForceTLBFlush)
1972 {
1973 ++pHostCpu->uCurrentAsid;
1974 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1975 {
1976 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1977 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1978 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1979 }
1980
1981 pVCpu->hmr0.s.fForceTLBFlush = false;
1982 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1983 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1984 if (pHostCpu->fFlushAsidBeforeUse)
1985 {
1986 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1987 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1988 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1989 {
1990 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1991 pHostCpu->fFlushAsidBeforeUse = false;
1992 }
1993 else
1994 {
1995 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1996 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1997 }
1998 }
1999 }
2000
2001 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
2002 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
2003 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
2004 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
2005 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
2006 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
2007 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
2008
2009 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
2010 AssertRC(rc);
2011}
2012
2013
2014/**
2015 * Flushes the guest TLB entry based on CPU capabilities.
2016 *
2017 * @param pHostCpu The HM physical-CPU structure.
2018 * @param pVCpu The cross context virtual CPU structure.
2019 * @param pVmcsInfo The VMCS info. object.
2020 *
2021 * @remarks Called with interrupts disabled.
2022 */
2023static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2024{
2025#ifdef HMVMX_ALWAYS_FLUSH_TLB
2026 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2027#endif
2028 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2029 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2030 {
2031 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2032 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2033 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2034 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2035 default:
2036 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2037 break;
2038 }
2039 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2040}
2041
2042
2043/**
2044 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2045 * TLB entries from the host TLB before VM-entry.
2046 *
2047 * @returns VBox status code.
2048 * @param pVM The cross context VM structure.
2049 */
2050static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2051{
2052 /*
2053 * Determine optimal flush type for nested paging.
2054 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2055 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2056 */
2057 if (pVM->hmr0.s.fNestedPaging)
2058 {
2059 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2060 {
2061 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2062 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2063 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2064 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2065 else
2066 {
2067 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2068 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2069 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2070 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2071 }
2072
2073 /* Make sure the write-back cacheable memory type for EPT is supported. */
2074 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2075 {
2076 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2077 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2078 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2079 }
2080
2081 /* EPT requires a page-walk length of 4. */
2082 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2083 {
2084 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2085 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2086 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2087 }
2088 }
2089 else
2090 {
2091 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2092 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2093 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2094 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2095 }
2096 }
2097
2098 /*
2099 * Determine optimal flush type for VPID.
2100 */
2101 if (pVM->hmr0.s.vmx.fVpid)
2102 {
2103 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2104 {
2105 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2106 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2107 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2108 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2109 else
2110 {
2111 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2112 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2113 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2114 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2115 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2116 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2117 pVM->hmr0.s.vmx.fVpid = false;
2118 }
2119 }
2120 else
2121 {
2122 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2123 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2124 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2125 pVM->hmr0.s.vmx.fVpid = false;
2126 }
2127 }
2128
2129 /*
2130 * Setup the handler for flushing tagged-TLBs.
2131 */
2132 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2133 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2134 else if (pVM->hmr0.s.fNestedPaging)
2135 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2136 else if (pVM->hmr0.s.vmx.fVpid)
2137 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2138 else
2139 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2140
2141
2142 /*
2143 * Copy out the result to ring-3.
2144 */
2145 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2146 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2147 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2148 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2149 return VINF_SUCCESS;
2150}
2151
2152
2153/**
2154 * Sets up the LBR MSR ranges based on the host CPU.
2155 *
2156 * @returns VBox status code.
2157 * @param pVM The cross context VM structure.
2158 *
2159 * @sa nemR3DarwinSetupLbrMsrRange
2160 */
2161static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2162{
2163 Assert(pVM->hmr0.s.vmx.fLbr);
2164 uint32_t idLbrFromIpMsrFirst;
2165 uint32_t idLbrFromIpMsrLast;
2166 uint32_t idLbrToIpMsrFirst;
2167 uint32_t idLbrToIpMsrLast;
2168 uint32_t idLbrTosMsr;
2169
2170 /*
2171 * Determine the LBR MSRs supported for this host CPU family and model.
2172 *
2173 * See Intel spec. 17.4.8 "LBR Stack".
2174 * See Intel "Model-Specific Registers" spec.
2175 */
2176 uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
2177 | g_CpumHostFeatures.s.uModel;
2178 switch (uFamilyModel)
2179 {
2180 case 0x0f01: case 0x0f02:
2181 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2182 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2183 idLbrToIpMsrFirst = 0x0;
2184 idLbrToIpMsrLast = 0x0;
2185 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2186 break;
2187
2188 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2189 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2190 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2191 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2192 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2193 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2194 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2195 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2196 break;
2197
2198 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2199 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2200 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2201 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2202 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2203 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2204 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2205 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2206 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2207 break;
2208
2209 case 0x0617: case 0x061d: case 0x060f:
2210 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2211 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2212 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2213 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2214 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2215 break;
2216
2217 /* Atom and related microarchitectures we don't care about:
2218 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2219 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2220 case 0x0636: */
2221 /* All other CPUs: */
2222 default:
2223 {
2224 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2225 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2226 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2227 }
2228 }
2229
2230 /*
2231 * Validate.
2232 */
2233 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2234 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2235 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2236 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2237 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2238 {
2239 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2240 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2241 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2242 }
2243 NOREF(pVCpu0);
2244
2245 /*
2246 * Update the LBR info. to the VM struct. for use later.
2247 */
2248 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2249
2250 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2251 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2252
2253 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2254 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2255 return VINF_SUCCESS;
2256}
2257
2258#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2259
2260/**
2261 * Sets up the shadow VMCS fields arrays.
2262 *
2263 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2264 * executing the guest.
2265 *
2266 * @returns VBox status code.
2267 * @param pVM The cross context VM structure.
2268 */
2269static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2270{
2271 /*
2272 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2273 * when the host does not support it.
2274 */
2275 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2276 if ( !fGstVmwriteAll
2277 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2278 { /* likely. */ }
2279 else
2280 {
2281 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2282 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2283 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2284 }
2285
2286 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2287 uint32_t cRwFields = 0;
2288 uint32_t cRoFields = 0;
2289 for (uint32_t i = 0; i < cVmcsFields; i++)
2290 {
2291 VMXVMCSFIELD VmcsField;
2292 VmcsField.u = g_aVmcsFields[i];
2293
2294 /*
2295 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2296 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2297 * in the shadow VMCS fields array as they would be redundant.
2298 *
2299 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2300 * we must not include it in the shadow VMCS fields array. Guests attempting to
2301 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2302 * the required behavior.
2303 */
2304 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2305 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2306 {
2307 /*
2308 * Read-only fields are placed in a separate array so that while syncing shadow
2309 * VMCS fields later (which is more performance critical) we can avoid branches.
2310 *
2311 * However, if the guest can write to all fields (including read-only fields),
2312 * we treat it a as read/write field. Otherwise, writing to these fields would
2313 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2314 */
2315 if ( fGstVmwriteAll
2316 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2317 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2318 else
2319 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2320 }
2321 }
2322
2323 /* Update the counts. */
2324 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2325 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2326 return VINF_SUCCESS;
2327}
2328
2329
2330/**
2331 * Sets up the VMREAD and VMWRITE bitmaps.
2332 *
2333 * @param pVM The cross context VM structure.
2334 */
2335static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2336{
2337 /*
2338 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2339 */
2340 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2341 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2342 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2343 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2344 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2345
2346 /*
2347 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2348 * VMREAD and VMWRITE bitmaps.
2349 */
2350 {
2351 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2352 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2353 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2354 {
2355 uint32_t const uVmcsField = paShadowVmcsFields[i];
2356 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2357 Assert(uVmcsField >> 3 < cbBitmap);
2358 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2359 ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
2360 }
2361 }
2362
2363 /*
2364 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2365 * if the host supports VMWRITE to all supported VMCS fields.
2366 */
2367 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2368 {
2369 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2370 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2371 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2372 {
2373 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2374 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2375 Assert(uVmcsField >> 3 < cbBitmap);
2376 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2377 }
2378 }
2379}
2380
2381#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2382
2383/**
2384 * Sets up the virtual-APIC page address for the VMCS.
2385 *
2386 * @param pVmcsInfo The VMCS info. object.
2387 */
2388DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2389{
2390 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2391 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2392 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2393 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2394 AssertRC(rc);
2395}
2396
2397
2398/**
2399 * Sets up the MSR-bitmap address for the VMCS.
2400 *
2401 * @param pVmcsInfo The VMCS info. object.
2402 */
2403DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2404{
2405 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2406 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2407 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2408 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2409 AssertRC(rc);
2410}
2411
2412
2413/**
2414 * Sets up the APIC-access page address for the VMCS.
2415 *
2416 * @param pVCpu The cross context virtual CPU structure.
2417 */
2418DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2419{
2420 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2421 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2422 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2423 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2424 AssertRC(rc);
2425}
2426
2427#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2428
2429/**
2430 * Sets up the VMREAD bitmap address for the VMCS.
2431 *
2432 * @param pVCpu The cross context virtual CPU structure.
2433 */
2434DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2435{
2436 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2437 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2438 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2439 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2440 AssertRC(rc);
2441}
2442
2443
2444/**
2445 * Sets up the VMWRITE bitmap address for the VMCS.
2446 *
2447 * @param pVCpu The cross context virtual CPU structure.
2448 */
2449DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2450{
2451 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2452 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2453 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2454 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2455 AssertRC(rc);
2456}
2457
2458#endif
2459
2460/**
2461 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2462 * in the VMCS.
2463 *
2464 * @returns VBox status code.
2465 * @param pVmcsInfo The VMCS info. object.
2466 */
2467DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2468{
2469 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2470 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2471 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2472
2473 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2474 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2475 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2476
2477 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2478 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2479 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2480
2481 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2482 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2483 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2484 return VINF_SUCCESS;
2485}
2486
2487
2488/**
2489 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2490 *
2491 * @param pVCpu The cross context virtual CPU structure.
2492 * @param pVmcsInfo The VMCS info. object.
2493 */
2494static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2495{
2496 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2497
2498 /*
2499 * By default, ensure guest attempts to access any MSR cause VM-exits.
2500 * This shall later be relaxed for specific MSRs as necessary.
2501 *
2502 * Note: For nested-guests, the entire bitmap will be merged prior to
2503 * executing the nested-guest using hardware-assisted VMX and hence there
2504 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2505 */
2506 Assert(pVmcsInfo->pvMsrBitmap);
2507 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2508
2509 /*
2510 * The guest can access the following MSRs (read, write) without causing
2511 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2512 */
2513 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2514 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2515 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2516 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2517 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2518 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2519
2520 /*
2521 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2522 * associated with then. We never need to intercept access (writes need to be
2523 * executed without causing a VM-exit, reads will #GP fault anyway).
2524 *
2525 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2526 * read/write them. We swap the guest/host MSR value using the
2527 * auto-load/store MSR area.
2528 */
2529 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2530 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2531 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2532 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2533 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2534 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2535
2536 /*
2537 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2538 * required for 64-bit guests.
2539 */
2540 if (pVM->hmr0.s.fAllow64BitGuests)
2541 {
2542 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2543 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2544 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2545 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2546 }
2547
2548 /*
2549 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2550 */
2551#ifdef VBOX_STRICT
2552 Assert(pVmcsInfo->pvMsrBitmap);
2553 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2554 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2555#endif
2556}
2557
2558
2559/**
2560 * Sets up pin-based VM-execution controls in the VMCS.
2561 *
2562 * @returns VBox status code.
2563 * @param pVCpu The cross context virtual CPU structure.
2564 * @param pVmcsInfo The VMCS info. object.
2565 */
2566static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2567{
2568 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2569 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2570 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2571
2572 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2573 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2574
2575 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2576 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2577
2578 /* Enable the VMX-preemption timer. */
2579 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2580 {
2581 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2582 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2583 }
2584
2585#if 0
2586 /* Enable posted-interrupt processing. */
2587 if (pVM->hm.s.fPostedIntrs)
2588 {
2589 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2590 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2591 fVal |= VMX_PIN_CTLS_POSTED_INT;
2592 }
2593#endif
2594
2595 if ((fVal & fZap) != fVal)
2596 {
2597 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2598 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2599 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2600 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2601 }
2602
2603 /* Commit it to the VMCS and update our cache. */
2604 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2605 AssertRC(rc);
2606 pVmcsInfo->u32PinCtls = fVal;
2607
2608 return VINF_SUCCESS;
2609}
2610
2611
2612/**
2613 * Sets up secondary processor-based VM-execution controls in the VMCS.
2614 *
2615 * @returns VBox status code.
2616 * @param pVCpu The cross context virtual CPU structure.
2617 * @param pVmcsInfo The VMCS info. object.
2618 */
2619static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2620{
2621 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2622 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2623 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2624
2625 /* WBINVD causes a VM-exit. */
2626 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2627 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2628
2629 /* Enable EPT (aka nested-paging). */
2630 if (pVM->hmr0.s.fNestedPaging)
2631 fVal |= VMX_PROC_CTLS2_EPT;
2632
2633 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2634 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2635 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2636 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2637 fVal |= VMX_PROC_CTLS2_INVPCID;
2638
2639 /* Enable VPID. */
2640 if (pVM->hmr0.s.vmx.fVpid)
2641 fVal |= VMX_PROC_CTLS2_VPID;
2642
2643 /* Enable unrestricted guest execution. */
2644 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2645 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2646
2647#if 0
2648 if (pVM->hm.s.fVirtApicRegs)
2649 {
2650 /* Enable APIC-register virtualization. */
2651 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2652 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2653
2654 /* Enable virtual-interrupt delivery. */
2655 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2656 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2657 }
2658#endif
2659
2660 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2661 where the TPR shadow resides. */
2662 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2663 * done dynamically. */
2664 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2665 {
2666 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2667 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2668 }
2669
2670 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2671 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2672 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2673 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2674 fVal |= VMX_PROC_CTLS2_RDTSCP;
2675
2676 /* Enable Pause-Loop exiting. */
2677 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2678 && pVM->hm.s.vmx.cPleGapTicks
2679 && pVM->hm.s.vmx.cPleWindowTicks)
2680 {
2681 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2682
2683 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2684 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2685 }
2686
2687 if ((fVal & fZap) != fVal)
2688 {
2689 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2690 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2691 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2692 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2693 }
2694
2695 /* Commit it to the VMCS and update our cache. */
2696 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2697 AssertRC(rc);
2698 pVmcsInfo->u32ProcCtls2 = fVal;
2699
2700 return VINF_SUCCESS;
2701}
2702
2703
2704/**
2705 * Sets up processor-based VM-execution controls in the VMCS.
2706 *
2707 * @returns VBox status code.
2708 * @param pVCpu The cross context virtual CPU structure.
2709 * @param pVmcsInfo The VMCS info. object.
2710 */
2711static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2712{
2713 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2714 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2715 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2716
2717 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2718 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2719 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2720 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2721 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2722 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2723 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2724
2725 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2726 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2727 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2728 {
2729 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2730 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2731 }
2732
2733 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2734 if (!pVM->hmr0.s.fNestedPaging)
2735 {
2736 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2737 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2738 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2739 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2740 }
2741
2742 /* Use TPR shadowing if supported by the CPU. */
2743 if ( PDMHasApic(pVM)
2744 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2745 {
2746 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2747 /* CR8 writes cause a VM-exit based on TPR threshold. */
2748 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2749 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2750 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2751 }
2752 else
2753 {
2754 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2755 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2756 if (pVM->hmr0.s.fAllow64BitGuests)
2757 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2758 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2759 }
2760
2761 /* Use MSR-bitmaps if supported by the CPU. */
2762 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2763 {
2764 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2765 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2766 }
2767
2768 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2769 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2770 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2771
2772 if ((fVal & fZap) != fVal)
2773 {
2774 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2775 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2776 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2777 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2778 }
2779
2780 /* Commit it to the VMCS and update our cache. */
2781 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2782 AssertRC(rc);
2783 pVmcsInfo->u32ProcCtls = fVal;
2784
2785 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2786 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2787 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2788
2789 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2790 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2791 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2792
2793 /* Sanity check, should not really happen. */
2794 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2795 { /* likely */ }
2796 else
2797 {
2798 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2799 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2800 }
2801
2802 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2803 return VINF_SUCCESS;
2804}
2805
2806
2807/**
2808 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2809 * Processor-based VM-execution) control fields in the VMCS.
2810 *
2811 * @returns VBox status code.
2812 * @param pVCpu The cross context virtual CPU structure.
2813 * @param pVmcsInfo The VMCS info. object.
2814 */
2815static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2816{
2817#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2818 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2819 {
2820 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2821 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2822 }
2823#endif
2824
2825 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2826 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2827 AssertRC(rc);
2828
2829 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2830 if (RT_SUCCESS(rc))
2831 {
2832 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2833 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2834
2835 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2836 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2837
2838 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2839 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2840
2841 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2842 {
2843 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2844 AssertRC(rc);
2845 }
2846 return VINF_SUCCESS;
2847 }
2848 else
2849 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2850 return rc;
2851}
2852
2853
2854/**
2855 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2856 *
2857 * We shall setup those exception intercepts that don't change during the
2858 * lifetime of the VM here. The rest are done dynamically while loading the
2859 * guest state.
2860 *
2861 * @param pVCpu The cross context virtual CPU structure.
2862 * @param pVmcsInfo The VMCS info. object.
2863 */
2864static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2865{
2866 /*
2867 * The following exceptions are always intercepted:
2868 *
2869 * #AC - To prevent the guest from hanging the CPU and for dealing with
2870 * split-lock detecting host configs.
2871 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2872 * recursive #DBs can cause a CPU hang.
2873 * #PF - To sync our shadow page tables when nested-paging is not used.
2874 */
2875 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2876 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2877 | RT_BIT(X86_XCPT_DB)
2878 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2879
2880 /* Commit it to the VMCS. */
2881 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2882 AssertRC(rc);
2883
2884 /* Update our cache of the exception bitmap. */
2885 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2886}
2887
2888
2889#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2890/**
2891 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2892 *
2893 * @returns VBox status code.
2894 * @param pVmcsInfo The VMCS info. object.
2895 */
2896static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2897{
2898 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2899 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2900 AssertRC(rc);
2901
2902 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2903 if (RT_SUCCESS(rc))
2904 {
2905 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2906 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2907
2908 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2909 Assert(!pVmcsInfo->u64Cr0Mask);
2910 Assert(!pVmcsInfo->u64Cr4Mask);
2911 return VINF_SUCCESS;
2912 }
2913 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2914 return rc;
2915}
2916#endif
2917
2918
2919/**
2920 * Selector FNHMSVMVMRUN implementation.
2921 */
2922static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2923{
2924 hmR0VmxUpdateStartVmFunction(pVCpu);
2925 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2926}
2927
2928
2929/**
2930 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2931 * VMX.
2932 *
2933 * @returns VBox status code.
2934 * @param pVCpu The cross context virtual CPU structure.
2935 * @param pVmcsInfo The VMCS info. object.
2936 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2937 */
2938static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2939{
2940 Assert(pVmcsInfo->pvVmcs);
2941 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2942
2943 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2944 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2945 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2946
2947 LogFlowFunc(("\n"));
2948
2949 /*
2950 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2951 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2952 */
2953 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2954 if (RT_SUCCESS(rc))
2955 {
2956 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2957 if (RT_SUCCESS(rc))
2958 {
2959 /*
2960 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2961 * The host is always 64-bit since we no longer support 32-bit hosts.
2962 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2963 */
2964 if (!fIsNstGstVmcs)
2965 {
2966 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2967 if (RT_SUCCESS(rc))
2968 {
2969 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2970 if (RT_SUCCESS(rc))
2971 {
2972 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2973 if (RT_SUCCESS(rc))
2974 {
2975 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2976#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2977 /*
2978 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2979 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2980 * making it fit for use when VMCS shadowing is later enabled.
2981 */
2982 if (pVmcsInfo->pvShadowVmcs)
2983 {
2984 VMXVMCSREVID VmcsRevId;
2985 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2986 VmcsRevId.n.fIsShadowVmcs = 1;
2987 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2988 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2989 if (RT_SUCCESS(rc))
2990 { /* likely */ }
2991 else
2992 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2993 }
2994#endif
2995 }
2996 else
2997 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2998 }
2999 else
3000 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
3001 }
3002 else
3003 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
3004 }
3005 else
3006 {
3007#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3008 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
3009 if (RT_SUCCESS(rc))
3010 { /* likely */ }
3011 else
3012 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
3013#else
3014 AssertFailed();
3015#endif
3016 }
3017 }
3018 else
3019 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", pszVmcs, rc));
3020 }
3021 else
3022 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3023
3024 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3025 if (RT_SUCCESS(rc))
3026 {
3027 rc = hmR0VmxClearVmcs(pVmcsInfo);
3028 if (RT_SUCCESS(rc))
3029 { /* likely */ }
3030 else
3031 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3032 }
3033
3034 /*
3035 * Update the last-error record both for failures and success, so we
3036 * can propagate the status code back to ring-3 for diagnostics.
3037 */
3038 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3039 NOREF(pszVmcs);
3040 return rc;
3041}
3042
3043
3044/**
3045 * Does global VT-x initialization (called during module initialization).
3046 *
3047 * @returns VBox status code.
3048 */
3049VMMR0DECL(int) VMXR0GlobalInit(void)
3050{
3051#ifdef HMVMX_USE_FUNCTION_TABLE
3052 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3053# ifdef VBOX_STRICT
3054 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3055 Assert(g_aVMExitHandlers[i].pfn);
3056# endif
3057#endif
3058
3059 /*
3060 * For detecting whether DR6.RTM is writable or not (done in VMXR0InitVM).
3061 */
3062 RTTHREADPREEMPTSTATE Preempt = RTTHREADPREEMPTSTATE_INITIALIZER;
3063 RTThreadPreemptDisable(&Preempt);
3064 RTCCUINTXREG const fSavedDr6 = ASMGetDR6();
3065 ASMSetDR6(0);
3066 RTCCUINTXREG const fZeroDr6 = ASMGetDR6();
3067 ASMSetDR6(fSavedDr6);
3068 RTThreadPreemptRestore(&Preempt);
3069
3070 g_fDr6Zeroed = fZeroDr6;
3071
3072 return VINF_SUCCESS;
3073}
3074
3075
3076/**
3077 * Does global VT-x termination (called during module termination).
3078 */
3079VMMR0DECL(void) VMXR0GlobalTerm()
3080{
3081 /* Nothing to do currently. */
3082}
3083
3084
3085/**
3086 * Sets up and activates VT-x on the current CPU.
3087 *
3088 * @returns VBox status code.
3089 * @param pHostCpu The HM physical-CPU structure.
3090 * @param pVM The cross context VM structure. Can be
3091 * NULL after a host resume operation.
3092 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3093 * fEnabledByHost is @c true).
3094 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3095 * @a fEnabledByHost is @c true).
3096 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3097 * enable VT-x on the host.
3098 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3099 */
3100VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3101 PCSUPHWVIRTMSRS pHwvirtMsrs)
3102{
3103 AssertPtr(pHostCpu);
3104 AssertPtr(pHwvirtMsrs);
3105 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3106
3107 /* Enable VT-x if it's not already enabled by the host. */
3108 if (!fEnabledByHost)
3109 {
3110 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3111 if (RT_FAILURE(rc))
3112 return rc;
3113 }
3114
3115 /*
3116 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3117 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3118 * invalidated when flushing by VPID.
3119 */
3120 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3121 {
3122 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3123 pHostCpu->fFlushAsidBeforeUse = false;
3124 }
3125 else
3126 pHostCpu->fFlushAsidBeforeUse = true;
3127
3128 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3129 ++pHostCpu->cTlbFlushes;
3130
3131 return VINF_SUCCESS;
3132}
3133
3134
3135/**
3136 * Deactivates VT-x on the current CPU.
3137 *
3138 * @returns VBox status code.
3139 * @param pHostCpu The HM physical-CPU structure.
3140 * @param pvCpuPage Pointer to the VMXON region.
3141 * @param HCPhysCpuPage Physical address of the VMXON region.
3142 *
3143 * @remarks This function should never be called when SUPR0EnableVTx() or
3144 * similar was used to enable VT-x on the host.
3145 */
3146VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3147{
3148 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3149
3150 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3151 return hmR0VmxLeaveRootMode(pHostCpu);
3152}
3153
3154
3155/**
3156 * Does per-VM VT-x initialization.
3157 *
3158 * @returns VBox status code.
3159 * @param pVM The cross context VM structure.
3160 */
3161VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3162{
3163 AssertPtr(pVM);
3164 LogFlowFunc(("pVM=%p\n", pVM));
3165
3166 hmR0VmxStructsInit(pVM);
3167 int rc = hmR0VmxStructsAlloc(pVM);
3168 if (RT_FAILURE(rc))
3169 {
3170 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3171 return rc;
3172 }
3173
3174 /* Setup the crash dump page. */
3175#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3176 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3177 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3178#endif
3179
3180 /*
3181 * Copy out stuff that's for ring-3 and determin default configuration.
3182 */
3183 pVM->hm.s.ForR3.vmx.u64HostDr6Zeroed = g_fDr6Zeroed;
3184
3185 /* Since we do not emulate RTM, make sure DR6.RTM cannot be cleared by the
3186 guest and cause confusion there. It appears that the DR6.RTM bit can be
3187 cleared even if TSX-NI is disabled (microcode update / system / whatever). */
3188#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3189 if (pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg == 0)
3190 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = g_fDr6Zeroed != X86_DR6_RA1_MASK;
3191 else
3192#endif
3193 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg > 0;
3194 pVM->hm.s.ForR3.vmx.fAlwaysInterceptMovDRx = pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3195
3196 return VINF_SUCCESS;
3197}
3198
3199
3200/**
3201 * Does per-VM VT-x termination.
3202 *
3203 * @returns VBox status code.
3204 * @param pVM The cross context VM structure.
3205 */
3206VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3207{
3208 AssertPtr(pVM);
3209 LogFlowFunc(("pVM=%p\n", pVM));
3210
3211#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3212 if (pVM->hmr0.s.vmx.pbScratch)
3213 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3214#endif
3215 hmR0VmxStructsFree(pVM);
3216 return VINF_SUCCESS;
3217}
3218
3219
3220/**
3221 * Sets up the VM for execution using hardware-assisted VMX.
3222 * This function is only called once per-VM during initialization.
3223 *
3224 * @returns VBox status code.
3225 * @param pVM The cross context VM structure.
3226 */
3227VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3228{
3229 AssertPtr(pVM);
3230 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3231
3232 LogFlowFunc(("pVM=%p\n", pVM));
3233
3234 /*
3235 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3236 * without causing a #GP.
3237 */
3238 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3239 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3240 { /* likely */ }
3241 else
3242 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3243
3244 /*
3245 * Check that nested paging is supported if enabled and copy over the flag to the
3246 * ring-0 only structure.
3247 */
3248 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3249 AssertReturn( !fNestedPaging
3250 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3251 VERR_INCOMPATIBLE_CONFIG);
3252 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3253 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3254
3255 /*
3256 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3257 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3258 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3259 */
3260 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3261 AssertReturn( !fUnrestrictedGuest
3262 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3263 && fNestedPaging),
3264 VERR_INCOMPATIBLE_CONFIG);
3265 if ( !fUnrestrictedGuest
3266 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3267 || !pVM->hm.s.vmx.pRealModeTSS))
3268 {
3269 LogRelFunc(("Invalid real-on-v86 state.\n"));
3270 return VERR_INTERNAL_ERROR;
3271 }
3272 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3273
3274 /* Initialize these always, see hmR3InitFinalizeR0().*/
3275 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3276 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3277
3278 /* Setup the tagged-TLB flush handlers. */
3279 int rc = hmR0VmxSetupTaggedTlb(pVM);
3280 if (RT_FAILURE(rc))
3281 {
3282 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3283 return rc;
3284 }
3285
3286 /* Determine LBR capabilities. */
3287 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3288 if (pVM->hmr0.s.vmx.fLbr)
3289 {
3290 rc = hmR0VmxSetupLbrMsrRange(pVM);
3291 if (RT_FAILURE(rc))
3292 {
3293 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3294 return rc;
3295 }
3296 }
3297
3298#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3299 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3300 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3301 {
3302 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3303 if (RT_SUCCESS(rc))
3304 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3305 else
3306 {
3307 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3308 return rc;
3309 }
3310 }
3311#endif
3312
3313 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3314 {
3315 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3316 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3317
3318 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3319
3320 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3321 if (RT_SUCCESS(rc))
3322 {
3323#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3324 if (pVM->cpum.ro.GuestFeatures.fVmx)
3325 {
3326 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3327 if (RT_SUCCESS(rc))
3328 { /* likely */ }
3329 else
3330 {
3331 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3332 return rc;
3333 }
3334 }
3335#endif
3336 }
3337 else
3338 {
3339 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3340 return rc;
3341 }
3342 }
3343
3344 return VINF_SUCCESS;
3345}
3346
3347
3348/**
3349 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3350 * the VMCS.
3351 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3352 */
3353static uint64_t hmR0VmxExportHostControlRegs(void)
3354{
3355 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3356 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3357 uint64_t uHostCr4 = ASMGetCR4();
3358 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3359 return uHostCr4;
3360}
3361
3362
3363/**
3364 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3365 * the host-state area in the VMCS.
3366 *
3367 * @returns VBox status code.
3368 * @param pVCpu The cross context virtual CPU structure.
3369 * @param uHostCr4 The host CR4 value.
3370 */
3371static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3372{
3373 /*
3374 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3375 * will be messed up. We should -not- save the messed up state without restoring
3376 * the original host-state, see @bugref{7240}.
3377 *
3378 * This apparently can happen (most likely the FPU changes), deal with it rather than
3379 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3380 */
3381 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3382 {
3383 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3384 pVCpu->idCpu));
3385 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3386 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3387 }
3388
3389 /*
3390 * Get all the host info.
3391 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3392 * without also checking the cpuid bit.
3393 */
3394 uint32_t fRestoreHostFlags;
3395#if RT_INLINE_ASM_EXTERNAL
3396 if (uHostCr4 & X86_CR4_FSGSBASE)
3397 {
3398 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3399 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3400 }
3401 else
3402 {
3403 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3404 fRestoreHostFlags = 0;
3405 }
3406 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3407 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3408 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3409 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3410#else
3411 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3412 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3413 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3414 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3415 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3416 if (uHostCr4 & X86_CR4_FSGSBASE)
3417 {
3418 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3419 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3420 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3421 }
3422 else
3423 {
3424 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3425 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3426 fRestoreHostFlags = 0;
3427 }
3428 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3429 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3430 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3431 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3432 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3433#endif
3434
3435 /*
3436 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3437 * gain VM-entry and restore them before we get preempted.
3438 *
3439 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3440 */
3441 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3442 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3443 {
3444 if (!(uSelAll & X86_SEL_LDT))
3445 {
3446#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3447 do { \
3448 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3449 if ((a_uVmcsVar) & X86_SEL_RPL) \
3450 { \
3451 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3452 (a_uVmcsVar) = 0; \
3453 } \
3454 } while (0)
3455 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3456 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3457 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3458 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3459#undef VMXLOCAL_ADJUST_HOST_SEG
3460 }
3461 else
3462 {
3463#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3464 do { \
3465 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3466 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3467 { \
3468 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3469 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3470 else \
3471 { \
3472 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3473 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3474 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3475 } \
3476 (a_uVmcsVar) = 0; \
3477 } \
3478 } while (0)
3479 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3480 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3481 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3482 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3483#undef VMXLOCAL_ADJUST_HOST_SEG
3484 }
3485 }
3486
3487 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3488 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3489 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3490 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3491 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3492 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3493 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3494 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3495
3496 /*
3497 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3498 * them to the maximum limit (0xffff) on every VM-exit.
3499 */
3500 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3501 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3502
3503 /*
3504 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3505 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3506 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3507 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3508 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3509 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3510 * at 0xffff on hosts where we are sure it won't cause trouble.
3511 */
3512#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3513 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3514#else
3515 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3516#endif
3517 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3518
3519 /*
3520 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3521 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3522 * RPL should be too in most cases.
3523 */
3524 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3525 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3526 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3527 VERR_VMX_INVALID_HOST_STATE);
3528
3529 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3530 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3531
3532 /*
3533 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3534 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3535 * restoration if the host has something else. Task switching is not supported in 64-bit
3536 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3537 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3538 *
3539 * [1] See Intel spec. 3.5 "System Descriptor Types".
3540 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3541 */
3542 Assert(pDesc->System.u4Type == 11);
3543 if ( pDesc->System.u16LimitLow != 0x67
3544 || pDesc->System.u4LimitHigh)
3545 {
3546 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3547
3548 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3549 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3550 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3551 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3552 {
3553 /* The GDT is read-only but the writable GDT is available. */
3554 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3555 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3556 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3557 AssertRCReturn(rc, rc);
3558 }
3559 }
3560
3561 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3562
3563 /*
3564 * Do all the VMCS updates in one block to assist nested virtualization.
3565 */
3566 int rc;
3567 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3568 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3569 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3570 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3571 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3572 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3573 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3574 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3575 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3576 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3577 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3578 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3579
3580 return VINF_SUCCESS;
3581}
3582
3583
3584/**
3585 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3586 * host-state area of the VMCS.
3587 *
3588 * These MSRs will be automatically restored on the host after every successful
3589 * VM-exit.
3590 *
3591 * @param pVCpu The cross context virtual CPU structure.
3592 *
3593 * @remarks No-long-jump zone!!!
3594 */
3595static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3596{
3597 AssertPtr(pVCpu);
3598
3599 /*
3600 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3601 * rather than swapping them on every VM-entry.
3602 */
3603 hmR0VmxLazySaveHostMsrs(pVCpu);
3604
3605 /*
3606 * Host Sysenter MSRs.
3607 */
3608 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3609 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3610 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3611
3612 /*
3613 * Host EFER MSR.
3614 *
3615 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3616 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3617 */
3618 if (g_fHmVmxSupportsVmcsEfer)
3619 {
3620 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3621 AssertRC(rc);
3622 }
3623
3624 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3625 * vmxHCExportGuestEntryExitCtls(). */
3626}
3627
3628
3629/**
3630 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3631 *
3632 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3633 * these two bits are handled by VM-entry, see vmxHCExportGuestEntryExitCtls().
3634 *
3635 * @returns true if we need to load guest EFER, false otherwise.
3636 * @param pVCpu The cross context virtual CPU structure.
3637 * @param pVmxTransient The VMX-transient structure.
3638 *
3639 * @remarks Requires EFER, CR4.
3640 * @remarks No-long-jump zone!!!
3641 */
3642static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3643{
3644#ifdef HMVMX_ALWAYS_SWAP_EFER
3645 RT_NOREF2(pVCpu, pVmxTransient);
3646 return true;
3647#else
3648 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3649 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3650 uint64_t const u64GuestEfer = pCtx->msrEFER;
3651
3652# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3653 /*
3654 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3655 * the nested-guest.
3656 */
3657 if ( pVmxTransient->fIsNestedGuest
3658 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3659 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3660 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3661 return true;
3662# else
3663 RT_NOREF(pVmxTransient);
3664#endif
3665
3666 /*
3667 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3668 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3669 */
3670 if ( CPUMIsGuestInLongModeEx(pCtx)
3671 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3672 return true;
3673
3674 /*
3675 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3676 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3677 *
3678 * See Intel spec. 4.5 "IA-32e Paging".
3679 * See Intel spec. 4.1.1 "Three Paging Modes".
3680 *
3681 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3682 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3683 */
3684 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3685 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3686 if ( (pCtx->cr4 & X86_CR4_PAE)
3687 && (pCtx->cr0 & X86_CR0_PG))
3688 {
3689 /*
3690 * If nested paging is not used, verify that the guest paging mode matches the
3691 * shadow paging mode which is/will be placed in the VMCS (which is what will
3692 * actually be used while executing the guest and not the CR4 shadow value).
3693 */
3694 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3695 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3696 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3697 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3698 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3699 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3700 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3701 {
3702 /* Verify that the host is NX capable. */
3703 Assert(g_CpumHostFeatures.s.fNoExecute);
3704 return true;
3705 }
3706 }
3707
3708 return false;
3709#endif
3710}
3711
3712
3713/**
3714 * Exports the guest's RSP into the guest-state area in the VMCS.
3715 *
3716 * @param pVCpu The cross context virtual CPU structure.
3717 *
3718 * @remarks No-long-jump zone!!!
3719 */
3720static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3721{
3722 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3723 {
3724 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3725
3726 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3727 AssertRC(rc);
3728
3729 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3730 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3731 }
3732}
3733
3734
3735/**
3736 * Exports the guest hardware-virtualization state.
3737 *
3738 * @returns VBox status code.
3739 * @param pVCpu The cross context virtual CPU structure.
3740 * @param pVmxTransient The VMX-transient structure.
3741 *
3742 * @remarks No-long-jump zone!!!
3743 */
3744static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3745{
3746 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3747 {
3748#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3749 /*
3750 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3751 * VMCS shadowing.
3752 */
3753 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3754 {
3755 /*
3756 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3757 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3758 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3759 *
3760 * We check for VMX root mode here in case the guest executes VMXOFF without
3761 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3762 * not clear the current VMCS pointer.
3763 */
3764 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3765 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3766 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3767 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3768 {
3769 /* Paranoia. */
3770 Assert(!pVmxTransient->fIsNestedGuest);
3771
3772 /*
3773 * For performance reasons, also check if the nested hypervisor's current VMCS
3774 * was newly loaded or modified before copying it to the shadow VMCS.
3775 */
3776 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3777 {
3778 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3779 AssertRCReturn(rc, rc);
3780 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3781 }
3782 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3783 }
3784 else
3785 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3786 }
3787#else
3788 NOREF(pVmxTransient);
3789#endif
3790 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3791 }
3792 return VINF_SUCCESS;
3793}
3794
3795
3796/**
3797 * Exports the guest debug registers into the guest-state area in the VMCS.
3798 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3799 *
3800 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3801 *
3802 * @returns VBox status code.
3803 * @param pVCpu The cross context virtual CPU structure.
3804 * @param pVmxTransient The VMX-transient structure.
3805 *
3806 * @remarks No-long-jump zone!!!
3807 */
3808static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3809{
3810 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3811
3812 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3813 * stepping. */
3814 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3815 if (pVmxTransient->fIsNestedGuest)
3816 {
3817 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3818 AssertRC(rc);
3819
3820 /*
3821 * We don't want to always intercept MOV DRx for nested-guests as it causes
3822 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3823 * Instead, they are strictly only requested when the nested hypervisor intercepts
3824 * them -- handled while merging VMCS controls.
3825 *
3826 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3827 * then the nested-guest debug state should be actively loaded on the host so that
3828 * nested-guest reads its own debug registers without causing VM-exits.
3829 */
3830 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3831 && !CPUMIsGuestDebugStateActive(pVCpu))
3832 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3833 return VINF_SUCCESS;
3834 }
3835
3836#ifdef VBOX_STRICT
3837 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3838 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3839 {
3840 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3841 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3842 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3843 }
3844#endif
3845
3846 bool fSteppingDB = false;
3847 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3848 if (pVCpu->hm.s.fSingleInstruction)
3849 {
3850 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3851 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3852 {
3853 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3854 Assert(fSteppingDB == false);
3855 }
3856 else
3857 {
3858 pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF;
3859 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3860 pVCpu->hmr0.s.fClearTrapFlag = true;
3861 fSteppingDB = true;
3862 }
3863 }
3864
3865#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3866 bool fInterceptMovDRx = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3867#else
3868 bool fInterceptMovDRx = false;
3869#endif
3870 uint64_t u64GuestDr7;
3871 if ( fSteppingDB
3872 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3873 {
3874 /*
3875 * Use the combined guest and host DRx values found in the hypervisor register set
3876 * because the hypervisor debugger has breakpoints active or someone is single stepping
3877 * on the host side without a monitor trap flag.
3878 *
3879 * Note! DBGF expects a clean DR6 state before executing guest code.
3880 */
3881 if (!CPUMIsHyperDebugStateActive(pVCpu))
3882 {
3883 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3884 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3885 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3886 }
3887
3888 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3889 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3890 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3891 fInterceptMovDRx = true;
3892 }
3893 else
3894 {
3895 /*
3896 * If the guest has enabled debug registers, we need to load them prior to
3897 * executing guest code so they'll trigger at the right time.
3898 */
3899 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3900 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3901 {
3902 if (!CPUMIsGuestDebugStateActive(pVCpu))
3903 {
3904 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3905 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3906 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3907 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3908 }
3909#ifndef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3910 Assert(!fInterceptMovDRx);
3911#endif
3912 }
3913 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3914 {
3915 /*
3916 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3917 * must intercept #DB in order to maintain a correct DR6 guest value, and
3918 * because we need to intercept it to prevent nested #DBs from hanging the
3919 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3920 */
3921 fInterceptMovDRx = true;
3922 }
3923
3924 /* Update DR7 with the actual guest value. */
3925 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3926 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3927 }
3928
3929 if (fInterceptMovDRx)
3930 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3931 else
3932 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3933
3934 /*
3935 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3936 * monitor-trap flag and update our cache.
3937 */
3938 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3939 {
3940 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3941 AssertRC(rc);
3942 pVmcsInfo->u32ProcCtls = uProcCtls;
3943 }
3944
3945 /*
3946 * Update guest DR7.
3947 */
3948 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3949 AssertRC(rc);
3950
3951 /*
3952 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3953 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3954 *
3955 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3956 */
3957 if (fSteppingDB)
3958 {
3959 Assert(pVCpu->hm.s.fSingleInstruction);
3960 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3961
3962 uint32_t fIntrState = 0;
3963 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3964 AssertRC(rc);
3965
3966 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3967 {
3968 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3969 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3970 AssertRC(rc);
3971 }
3972 }
3973
3974 return VINF_SUCCESS;
3975}
3976
3977
3978/**
3979 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3980 * areas.
3981 *
3982 * These MSRs will automatically be loaded to the host CPU on every successful
3983 * VM-entry and stored from the host CPU on every successful VM-exit.
3984 *
3985 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3986 * actual host MSR values are not- updated here for performance reasons. See
3987 * hmR0VmxExportHostMsrs().
3988 *
3989 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3990 *
3991 * @returns VBox status code.
3992 * @param pVCpu The cross context virtual CPU structure.
3993 * @param pVmxTransient The VMX-transient structure.
3994 *
3995 * @remarks No-long-jump zone!!!
3996 */
3997static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3998{
3999 AssertPtr(pVCpu);
4000 AssertPtr(pVmxTransient);
4001
4002 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4003 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4004
4005 /*
4006 * MSRs that we use the auto-load/store MSR area in the VMCS.
4007 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
4008 * nothing to do here. The host MSR values are updated when it's safe in
4009 * hmR0VmxLazySaveHostMsrs().
4010 *
4011 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
4012 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
4013 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
4014 * for any MSR that are not part of the lazy MSRs so we do not need to place
4015 * those MSRs into the auto-load/store MSR area. Nothing to do here.
4016 */
4017 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
4018 {
4019 /* No auto-load/store MSRs currently. */
4020 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4021 }
4022
4023 /*
4024 * Guest Sysenter MSRs.
4025 */
4026 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
4027 {
4028 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
4029
4030 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
4031 {
4032 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
4033 AssertRC(rc);
4034 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4035 }
4036
4037 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
4038 {
4039 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
4040 AssertRC(rc);
4041 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4042 }
4043
4044 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
4045 {
4046 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
4047 AssertRC(rc);
4048 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4049 }
4050 }
4051
4052 /*
4053 * Guest/host EFER MSR.
4054 */
4055 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4056 {
4057 /* Whether we are using the VMCS to swap the EFER MSR must have been
4058 determined earlier while exporting VM-entry/VM-exit controls. */
4059 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4060 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4061
4062 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4063 {
4064 /*
4065 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4066 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4067 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4068 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4069 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4070 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4071 * during VM-entry.
4072 */
4073 uint64_t uGuestEferMsr = pCtx->msrEFER;
4074 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4075 {
4076 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4077 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4078 else
4079 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4080 }
4081
4082 /*
4083 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4084 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4085 */
4086 if (g_fHmVmxSupportsVmcsEfer)
4087 {
4088 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4089 AssertRC(rc);
4090 }
4091 else
4092 {
4093 /*
4094 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4095 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4096 */
4097 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4098 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4099 AssertRCReturn(rc, rc);
4100 }
4101
4102 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4103 }
4104 else if (!g_fHmVmxSupportsVmcsEfer)
4105 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4106
4107 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4108 }
4109
4110 /*
4111 * Other MSRs.
4112 */
4113 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4114 {
4115 /* Speculation Control (R/W). */
4116 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4117 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4118 {
4119 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4120 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4121 AssertRCReturn(rc, rc);
4122 }
4123
4124 /* Last Branch Record. */
4125 if (pVM->hmr0.s.vmx.fLbr)
4126 {
4127 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4128 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4129 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4130 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4131 Assert(cLbrStack <= 32);
4132 for (uint32_t i = 0; i < cLbrStack; i++)
4133 {
4134 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4135 pVmcsInfoShared->au64LbrFromIpMsr[i],
4136 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4137 AssertRCReturn(rc, rc);
4138
4139 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4140 if (idToIpMsrStart != 0)
4141 {
4142 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4143 pVmcsInfoShared->au64LbrToIpMsr[i],
4144 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4145 AssertRCReturn(rc, rc);
4146 }
4147 }
4148
4149 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4150 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4151 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4152 false /* fUpdateHostMsr */);
4153 AssertRCReturn(rc, rc);
4154 }
4155
4156 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4157 }
4158
4159 return VINF_SUCCESS;
4160}
4161
4162
4163/**
4164 * Wrapper for running the guest code in VT-x.
4165 *
4166 * @returns VBox status code, no informational status codes.
4167 * @param pVCpu The cross context virtual CPU structure.
4168 * @param pVmxTransient The VMX-transient structure.
4169 *
4170 * @remarks No-long-jump zone!!!
4171 */
4172DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4173{
4174 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4175 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4176
4177 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4178 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState == VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4179#ifdef VBOX_WITH_STATISTICS
4180 if (fResumeVM)
4181 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4182 else
4183 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4184#endif
4185 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4186 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4187 return rc;
4188}
4189
4190
4191/**
4192 * Reports world-switch error and dumps some useful debug info.
4193 *
4194 * @param pVCpu The cross context virtual CPU structure.
4195 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4196 * @param pVmxTransient The VMX-transient structure (only
4197 * exitReason updated).
4198 */
4199static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4200{
4201 Assert(pVCpu);
4202 Assert(pVmxTransient);
4203 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4204
4205 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4206 switch (rcVMRun)
4207 {
4208 case VERR_VMX_INVALID_VMXON_PTR:
4209 AssertFailed();
4210 break;
4211 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4212 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4213 {
4214 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4215 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4216 AssertRC(rc);
4217 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4218
4219 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4220 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4221 Cannot do it here as we may have been long preempted. */
4222
4223#ifdef VBOX_STRICT
4224 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4225 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4226 pVmxTransient->uExitReason));
4227 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4228 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4229 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4230 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4231 else
4232 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4233 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4234 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4235
4236 static struct
4237 {
4238 /** Name of the field to log. */
4239 const char *pszName;
4240 /** The VMCS field. */
4241 uint32_t uVmcsField;
4242 /** Whether host support of this field needs to be checked. */
4243 bool fCheckSupport;
4244 } const s_aVmcsFields[] =
4245 {
4246 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4247 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4248 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4249 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4250 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4251 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4252 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4253 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4254 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4255 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4256 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4257 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4258 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4259 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4260 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4261 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4262 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4263 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4264 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4265 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4266 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4267 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4268 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4269 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4270 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4271 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4272 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4273 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4274 /* The order of selector fields below are fixed! */
4275 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4276 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4277 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4278 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4279 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4280 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4281 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4282 /* End of ordered selector fields. */
4283 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4284 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4285 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4286 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4287 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4288 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4289 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4290 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4291 };
4292
4293 RTGDTR HostGdtr;
4294 ASMGetGDTR(&HostGdtr);
4295
4296 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4297 for (uint32_t i = 0; i < cVmcsFields; i++)
4298 {
4299 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4300
4301 bool fSupported;
4302 if (!s_aVmcsFields[i].fCheckSupport)
4303 fSupported = true;
4304 else
4305 {
4306 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4307 switch (uVmcsField)
4308 {
4309 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4310 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4311 case VMX_VMCS32_CTRL_PROC_EXEC2:
4312 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4313 break;
4314 default:
4315 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4316 }
4317 }
4318
4319 if (fSupported)
4320 {
4321 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4322 switch (uWidth)
4323 {
4324 case VMX_VMCSFIELD_WIDTH_16BIT:
4325 {
4326 uint16_t u16Val;
4327 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4328 AssertRC(rc);
4329 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4330
4331 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4332 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4333 {
4334 if (u16Val < HostGdtr.cbGdt)
4335 {
4336 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4337 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4338 "Host FS", "Host GS", "Host TR" };
4339 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4340 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4341 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4342 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4343 }
4344 else
4345 Log4((" Selector value exceeds GDT limit!\n"));
4346 }
4347 break;
4348 }
4349
4350 case VMX_VMCSFIELD_WIDTH_32BIT:
4351 {
4352 uint32_t u32Val;
4353 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4354 AssertRC(rc);
4355 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4356 break;
4357 }
4358
4359 case VMX_VMCSFIELD_WIDTH_64BIT:
4360 case VMX_VMCSFIELD_WIDTH_NATURAL:
4361 {
4362 uint64_t u64Val;
4363 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4364 AssertRC(rc);
4365 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4366 break;
4367 }
4368 }
4369 }
4370 }
4371
4372 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4373 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4374 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4375 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4376 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4377 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4378#endif /* VBOX_STRICT */
4379 break;
4380 }
4381
4382 default:
4383 /* Impossible */
4384 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4385 break;
4386 }
4387}
4388
4389
4390/**
4391 * Sets up the usage of TSC-offsetting and updates the VMCS.
4392 *
4393 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4394 * VMX-preemption timer.
4395 *
4396 * @param pVCpu The cross context virtual CPU structure.
4397 * @param pVmxTransient The VMX-transient structure.
4398 * @param idCurrentCpu The current CPU number.
4399 *
4400 * @remarks No-long-jump zone!!!
4401 */
4402static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4403{
4404 bool fOffsettedTsc;
4405 bool fParavirtTsc;
4406 uint64_t uTscOffset;
4407 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4408
4409 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4410 {
4411 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4412 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4413 uint64_t cTicksToDeadline;
4414 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4415 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4416 {
4417 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4418 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4419 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4420 if ((int64_t)cTicksToDeadline > 0)
4421 { /* hopefully */ }
4422 else
4423 {
4424 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4425 cTicksToDeadline = 0;
4426 }
4427 }
4428 else
4429 {
4430 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4431 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4432 &pVCpu->hmr0.s.vmx.uTscDeadline,
4433 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4434 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4435 if (cTicksToDeadline >= 128)
4436 { /* hopefully */ }
4437 else
4438 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4439 }
4440
4441 /* Make sure the returned values have sane upper and lower boundaries. */
4442 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4443 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4444 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4445 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4446
4447 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4448 * preemption timers here. We probably need to clamp the preemption timer,
4449 * after converting the timer value to the host. */
4450 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4451 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4452 AssertRC(rc);
4453 }
4454 else
4455 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4456
4457 if (fParavirtTsc)
4458 {
4459 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4460 information before every VM-entry, hence disable it for performance sake. */
4461#if 0
4462 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4463 AssertRC(rc);
4464#endif
4465 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4466 }
4467
4468 if ( fOffsettedTsc
4469 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4470 {
4471 if (pVmxTransient->fIsNestedGuest)
4472 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4473 hmR0VmxSetTscOffsetVmcs(pVmxTransient->pVmcsInfo, uTscOffset);
4474 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4475 }
4476 else
4477 {
4478 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4479 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4480 }
4481}
4482
4483
4484/**
4485 * Saves the guest state from the VMCS into the guest-CPU context.
4486 *
4487 * @returns VBox status code.
4488 * @param pVCpu The cross context virtual CPU structure.
4489 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4490 */
4491VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4492{
4493 AssertPtr(pVCpu);
4494 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4495 return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat);
4496}
4497
4498
4499/**
4500 * Gets VMX VM-exit auxiliary information.
4501 *
4502 * @returns VBox status code.
4503 * @param pVCpu The cross context virtual CPU structure.
4504 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4505 * @param fWhat What to fetch, HMVMX_READ_XXX.
4506 */
4507VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4508{
4509 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4510 if (RT_LIKELY(pVmxTransient))
4511 {
4512 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4513
4514 /* The exit reason is always available. */
4515 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4516
4517
4518 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4519 {
4520 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4521 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4522#ifdef VBOX_STRICT
4523 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4524#endif
4525 }
4526
4527 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4528 {
4529 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_INFO>(pVCpu, pVmxTransient);
4530 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4531#ifdef VBOX_STRICT
4532 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4533#endif
4534 }
4535
4536 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4537 {
4538 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_ERROR_CODE>(pVCpu, pVmxTransient);
4539 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4540#ifdef VBOX_STRICT
4541 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4542#endif
4543 }
4544
4545 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4546 {
4547 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_LEN>(pVCpu, pVmxTransient);
4548 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4549#ifdef VBOX_STRICT
4550 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4551#endif
4552 }
4553
4554 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4555 {
4556 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_INFO>(pVCpu, pVmxTransient);
4557 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4558#ifdef VBOX_STRICT
4559 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4560#endif
4561 }
4562
4563 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4564 {
4565 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE>(pVCpu, pVmxTransient);
4566 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4567#ifdef VBOX_STRICT
4568 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4569#endif
4570 }
4571
4572 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4573 {
4574 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_INFO>(pVCpu, pVmxTransient);
4575 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4576#ifdef VBOX_STRICT
4577 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4578#endif
4579 }
4580
4581 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4582 {
4583 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_LINEAR_ADDR>(pVCpu, pVmxTransient);
4584 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4585#ifdef VBOX_STRICT
4586 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4587#endif
4588 }
4589
4590 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4591 {
4592 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PHYSICAL_ADDR>(pVCpu, pVmxTransient);
4593 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4594#ifdef VBOX_STRICT
4595 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4596#endif
4597 }
4598
4599 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4600 {
4601#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4602 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PENDING_DBG_XCPTS>(pVCpu, pVmxTransient);
4603 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4604#else
4605 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4606#endif
4607#ifdef VBOX_STRICT
4608 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4609#endif
4610 }
4611
4612 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4613 return VINF_SUCCESS;
4614 }
4615 return VERR_NOT_AVAILABLE;
4616}
4617
4618
4619/**
4620 * Does the necessary state syncing before returning to ring-3 for any reason
4621 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4622 *
4623 * @returns VBox status code.
4624 * @param pVCpu The cross context virtual CPU structure.
4625 * @param fImportState Whether to import the guest state from the VMCS back
4626 * to the guest-CPU context.
4627 *
4628 * @remarks No-long-jmp zone!!!
4629 */
4630static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4631{
4632 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4633 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4634
4635 RTCPUID const idCpu = RTMpCpuId();
4636 Log4Func(("HostCpuId=%u\n", idCpu));
4637
4638 /*
4639 * !!! IMPORTANT !!!
4640 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4641 */
4642
4643 /* Save the guest state if necessary. */
4644 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4645 if (fImportState)
4646 {
4647 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4648 AssertRCReturn(rc, rc);
4649 }
4650
4651 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4652 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4653 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4654
4655 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4656#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
4657 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4658 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4659 || (!CPUMIsHyperDebugStateActive(pVCpu) && !pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx));
4660#else
4661 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4662 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4663 || !CPUMIsHyperDebugStateActive(pVCpu));
4664#endif
4665 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4666 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4667 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4668
4669 /* Restore host-state bits that VT-x only restores partially. */
4670 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4671 {
4672 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4673 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4674 }
4675 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4676
4677 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4678 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4679 {
4680 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4681 if (!fImportState)
4682 {
4683 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4684 AssertRCReturn(rc, rc);
4685 }
4686 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4687 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4688 }
4689 else
4690 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4691
4692 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4693 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4694
4695 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4696 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4697 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4698 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4699 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4700 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4701 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4702 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4703 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4704 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4705
4706 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4707
4708 /** @todo This partially defeats the purpose of having preemption hooks.
4709 * The problem is, deregistering the hooks should be moved to a place that
4710 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4711 * context.
4712 */
4713 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4714 AssertRCReturn(rc, rc);
4715
4716#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4717 /*
4718 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4719 * clear a shadow VMCS before allowing that VMCS to become active on another
4720 * logical processor. We may or may not be importing guest state which clears
4721 * it, so cover for it here.
4722 *
4723 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4724 */
4725 if ( pVmcsInfo->pvShadowVmcs
4726 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4727 {
4728 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4729 AssertRCReturn(rc, rc);
4730 }
4731
4732 /*
4733 * Flag that we need to re-export the host state if we switch to this VMCS before
4734 * executing guest or nested-guest code.
4735 */
4736 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4737#endif
4738
4739 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4740 NOREF(idCpu);
4741 return VINF_SUCCESS;
4742}
4743
4744
4745/**
4746 * Leaves the VT-x session.
4747 *
4748 * @returns VBox status code.
4749 * @param pVCpu The cross context virtual CPU structure.
4750 *
4751 * @remarks No-long-jmp zone!!!
4752 */
4753static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4754{
4755 HM_DISABLE_PREEMPT(pVCpu);
4756 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4757 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4758 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4759
4760 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4761 and done this from the VMXR0ThreadCtxCallback(). */
4762 if (!pVCpu->hmr0.s.fLeaveDone)
4763 {
4764 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4765 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4766 pVCpu->hmr0.s.fLeaveDone = true;
4767 }
4768 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4769
4770 /*
4771 * !!! IMPORTANT !!!
4772 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4773 */
4774
4775 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4776 /** @todo Deregistering here means we need to VMCLEAR always
4777 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4778 * for calling VMMR0ThreadCtxHookDisable here! */
4779 VMMR0ThreadCtxHookDisable(pVCpu);
4780
4781 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4782 int rc = HMR0LeaveCpu(pVCpu);
4783 HM_RESTORE_PREEMPT();
4784 return rc;
4785}
4786
4787
4788/**
4789 * Take necessary actions before going back to ring-3.
4790 *
4791 * An action requires us to go back to ring-3. This function does the necessary
4792 * steps before we can safely return to ring-3. This is not the same as longjmps
4793 * to ring-3, this is voluntary and prepares the guest so it may continue
4794 * executing outside HM (recompiler/IEM).
4795 *
4796 * @returns VBox status code.
4797 * @param pVCpu The cross context virtual CPU structure.
4798 * @param rcExit The reason for exiting to ring-3. Can be
4799 * VINF_VMM_UNKNOWN_RING3_CALL.
4800 */
4801static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
4802{
4803 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4804
4805 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4806 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
4807 {
4808 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
4809 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
4810 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4811 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
4812 }
4813
4814 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
4815 VMMRZCallRing3Disable(pVCpu);
4816 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
4817
4818 /*
4819 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
4820 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
4821 *
4822 * This is because execution may continue from ring-3 and we would need to inject
4823 * the event from there (hence place it back in TRPM).
4824 */
4825 if (pVCpu->hm.s.Event.fPending)
4826 {
4827 vmxHCPendingEventToTrpmTrap(pVCpu);
4828 Assert(!pVCpu->hm.s.Event.fPending);
4829
4830 /* Clear the events from the VMCS. */
4831 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
4832 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
4833 }
4834#ifdef VBOX_STRICT
4835 /*
4836 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
4837 * fatal), we don't care about verifying duplicate injection of events. Errors like
4838 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
4839 * function so those should and will be checked below.
4840 */
4841 else if (RT_SUCCESS(rcExit))
4842 {
4843 /*
4844 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
4845 * This can be pretty hard to debug otherwise, interrupts might get injected twice
4846 * occasionally, see @bugref{9180#c42}.
4847 *
4848 * However, if the VM-entry failed, any VM entry-interruption info. field would
4849 * be left unmodified as the event would not have been injected to the guest. In
4850 * such cases, don't assert, we're not going to continue guest execution anyway.
4851 */
4852 uint32_t uExitReason;
4853 uint32_t uEntryIntInfo;
4854 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
4855 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
4856 AssertRC(rc);
4857 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
4858 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
4859 }
4860#endif
4861
4862 /*
4863 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
4864 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
4865 * (e.g. TPR below threshold).
4866 */
4867 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4868 {
4869 Assert(!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
4870 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
4871 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
4872 }
4873
4874 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
4875 and if we're injecting an event we should have a TRPM trap pending. */
4876 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4877#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
4878 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4879#endif
4880
4881 /* Save guest state and restore host state bits. */
4882 int rc = hmR0VmxLeaveSession(pVCpu);
4883 AssertRCReturn(rc, rc);
4884 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4885
4886 /* Thread-context hooks are unregistered at this point!!! */
4887 /* Ring-3 callback notifications are unregistered at this point!!! */
4888
4889 /* Sync recompiler state. */
4890 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4891 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
4892 | CPUM_CHANGED_LDTR
4893 | CPUM_CHANGED_GDTR
4894 | CPUM_CHANGED_IDTR
4895 | CPUM_CHANGED_TR
4896 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4897 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
4898 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
4899 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
4900
4901 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
4902
4903 /* Update the exit-to-ring 3 reason. */
4904 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
4905
4906 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
4907 if ( rcExit != VINF_EM_RAW_INTERRUPT
4908 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4909 {
4910 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
4911 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
4912 }
4913
4914 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
4915 VMMRZCallRing3Enable(pVCpu);
4916 return rc;
4917}
4918
4919
4920/**
4921 * VMMRZCallRing3() callback wrapper which saves the guest state before we
4922 * longjump due to a ring-0 assertion.
4923 *
4924 * @returns VBox status code.
4925 * @param pVCpu The cross context virtual CPU structure.
4926 */
4927VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
4928{
4929 /*
4930 * !!! IMPORTANT !!!
4931 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
4932 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
4933 */
4934 VMMR0AssertionRemoveNotification(pVCpu);
4935 VMMRZCallRing3Disable(pVCpu);
4936 HM_DISABLE_PREEMPT(pVCpu);
4937
4938 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4939 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4940 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4941 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4942
4943 /* Restore host-state bits that VT-x only restores partially. */
4944 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4945 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4946 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4947
4948 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4949 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4950 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4951
4952 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4953 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4954 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4955
4956 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
4957 cleared as part of importing the guest state above. */
4958 hmR0VmxClearVmcs(pVmcsInfo);
4959
4960 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
4961 VMMR0ThreadCtxHookDisable(pVCpu);
4962
4963 /* Leave HM context. This takes care of local init (term). */
4964 HMR0LeaveCpu(pVCpu);
4965 HM_RESTORE_PREEMPT();
4966 return VINF_SUCCESS;
4967}
4968
4969
4970/**
4971 * Enters the VT-x session.
4972 *
4973 * @returns VBox status code.
4974 * @param pVCpu The cross context virtual CPU structure.
4975 */
4976VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
4977{
4978 AssertPtr(pVCpu);
4979 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
4980 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4981
4982 LogFlowFunc(("pVCpu=%p\n", pVCpu));
4983 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
4984 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
4985
4986#ifdef VBOX_STRICT
4987 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
4988 RTCCUINTREG uHostCr4 = ASMGetCR4();
4989 if (!(uHostCr4 & X86_CR4_VMXE))
4990 {
4991 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
4992 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4993 }
4994#endif
4995
4996 /*
4997 * Do the EMT scheduled L1D and MDS flush here if needed.
4998 */
4999 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5000 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5001 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5002 hmR0MdsClear();
5003
5004 /*
5005 * Load the appropriate VMCS as the current and active one.
5006 */
5007 PVMXVMCSINFO pVmcsInfo;
5008 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5009 if (!fInNestedGuestMode)
5010 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5011 else
5012 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5013 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5014 if (RT_SUCCESS(rc))
5015 {
5016 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5017 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5018 pVCpu->hmr0.s.fLeaveDone = false;
5019 Log4Func(("Loaded %s Vmcs. HostCpuId=%u\n", fInNestedGuestMode ? "nested-guest" : "guest", RTMpCpuId()));
5020 }
5021 return rc;
5022}
5023
5024
5025/**
5026 * The thread-context callback.
5027 *
5028 * This is used together with RTThreadCtxHookCreate() on platforms which
5029 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5030 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5031 *
5032 * @param enmEvent The thread-context event.
5033 * @param pVCpu The cross context virtual CPU structure.
5034 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5035 * @thread EMT(pVCpu)
5036 */
5037VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5038{
5039 AssertPtr(pVCpu);
5040 RT_NOREF1(fGlobalInit);
5041
5042 switch (enmEvent)
5043 {
5044 case RTTHREADCTXEVENT_OUT:
5045 {
5046 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5047 VMCPU_ASSERT_EMT(pVCpu);
5048
5049 /* No longjmps (logger flushes, locks) in this fragile context. */
5050 VMMRZCallRing3Disable(pVCpu);
5051 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5052
5053 /* Restore host-state (FPU, debug etc.) */
5054 if (!pVCpu->hmr0.s.fLeaveDone)
5055 {
5056 /*
5057 * Do -not- import the guest-state here as we might already be in the middle of importing
5058 * it, esp. bad if we're holding the PGM lock, see comment at the end of vmxHCImportGuestStateEx().
5059 */
5060 hmR0VmxLeave(pVCpu, false /* fImportState */);
5061 pVCpu->hmr0.s.fLeaveDone = true;
5062 }
5063
5064 /* Leave HM context, takes care of local init (term). */
5065 int rc = HMR0LeaveCpu(pVCpu);
5066 AssertRC(rc);
5067
5068 /* Restore longjmp state. */
5069 VMMRZCallRing3Enable(pVCpu);
5070 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5071 break;
5072 }
5073
5074 case RTTHREADCTXEVENT_IN:
5075 {
5076 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5077 VMCPU_ASSERT_EMT(pVCpu);
5078
5079 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5080 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5081 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5082 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5083 hmR0MdsClear();
5084
5085 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5086 VMMRZCallRing3Disable(pVCpu);
5087 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5088
5089 /* Initialize the bare minimum state required for HM. This takes care of
5090 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5091 int rc = hmR0EnterCpu(pVCpu);
5092 AssertRC(rc);
5093 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5094 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5095
5096 /* Load the active VMCS as the current one. */
5097 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5098 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5099 AssertRC(rc);
5100 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5101 pVCpu->hmr0.s.fLeaveDone = false;
5102
5103 /* Restore longjmp state. */
5104 VMMRZCallRing3Enable(pVCpu);
5105 break;
5106 }
5107
5108 default:
5109 break;
5110 }
5111}
5112
5113
5114/**
5115 * Exports the host state into the VMCS host-state area.
5116 * Sets up the VM-exit MSR-load area.
5117 *
5118 * The CPU state will be loaded from these fields on every successful VM-exit.
5119 *
5120 * @returns VBox status code.
5121 * @param pVCpu The cross context virtual CPU structure.
5122 *
5123 * @remarks No-long-jump zone!!!
5124 */
5125static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5126{
5127 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5128
5129 int rc = VINF_SUCCESS;
5130 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5131 {
5132 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5133
5134 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5135 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5136
5137 hmR0VmxExportHostMsrs(pVCpu);
5138
5139 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5140 }
5141 return rc;
5142}
5143
5144
5145/**
5146 * Saves the host state in the VMCS host-state.
5147 *
5148 * @returns VBox status code.
5149 * @param pVCpu The cross context virtual CPU structure.
5150 *
5151 * @remarks No-long-jump zone!!!
5152 */
5153VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5154{
5155 AssertPtr(pVCpu);
5156 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5157
5158 /*
5159 * Export the host state here while entering HM context.
5160 * When thread-context hooks are used, we might get preempted and have to re-save the host
5161 * state but most of the time we won't be, so do it here before we disable interrupts.
5162 */
5163 return hmR0VmxExportHostState(pVCpu);
5164}
5165
5166
5167/**
5168 * Exports the guest state into the VMCS guest-state area.
5169 *
5170 * The will typically be done before VM-entry when the guest-CPU state and the
5171 * VMCS state may potentially be out of sync.
5172 *
5173 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5174 * VM-entry controls.
5175 * Sets up the appropriate VMX non-root function to execute guest code based on
5176 * the guest CPU mode.
5177 *
5178 * @returns VBox strict status code.
5179 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5180 * without unrestricted guest execution and the VMMDev is not presently
5181 * mapped (e.g. EFI32).
5182 *
5183 * @param pVCpu The cross context virtual CPU structure.
5184 * @param pVmxTransient The VMX-transient structure.
5185 *
5186 * @remarks No-long-jump zone!!!
5187 */
5188static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5189{
5190 AssertPtr(pVCpu);
5191 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5192 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5193
5194 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5195
5196 /*
5197 * Determine real-on-v86 mode.
5198 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5199 */
5200 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5201 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5202 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5203 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5204 else
5205 {
5206 Assert(!pVmxTransient->fIsNestedGuest);
5207 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5208 }
5209
5210 /*
5211 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5212 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5213 */
5214 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5215 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5216
5217 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5218 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5219
5220 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5221 if (rcStrict == VINF_SUCCESS)
5222 { /* likely */ }
5223 else
5224 {
5225 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5226 return rcStrict;
5227 }
5228
5229 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5230 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5231
5232 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5233 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5234
5235 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5236 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5237 vmxHCExportGuestRip(pVCpu);
5238 hmR0VmxExportGuestRsp(pVCpu);
5239 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5240
5241 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5242 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5243
5244 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5245 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5246 | HM_CHANGED_GUEST_CR2
5247 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5248 | HM_CHANGED_GUEST_X87
5249 | HM_CHANGED_GUEST_SSE_AVX
5250 | HM_CHANGED_GUEST_OTHER_XSAVE
5251 | HM_CHANGED_GUEST_XCRx
5252 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5253 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5254 | HM_CHANGED_GUEST_TSC_AUX
5255 | HM_CHANGED_GUEST_OTHER_MSRS
5256 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5257
5258 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5259 return rc;
5260}
5261
5262
5263/**
5264 * Exports the state shared between the host and guest into the VMCS.
5265 *
5266 * @param pVCpu The cross context virtual CPU structure.
5267 * @param pVmxTransient The VMX-transient structure.
5268 *
5269 * @remarks No-long-jump zone!!!
5270 */
5271static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5272{
5273 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5274 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5275
5276 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5277 {
5278 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5279 AssertRC(rc);
5280 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5281
5282 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5283 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5284 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5285 }
5286
5287 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5288 {
5289 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5290 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5291 }
5292
5293 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5294 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5295}
5296
5297
5298/**
5299 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5300 *
5301 * @returns Strict VBox status code (i.e. informational status codes too).
5302 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5303 * without unrestricted guest execution and the VMMDev is not presently
5304 * mapped (e.g. EFI32).
5305 *
5306 * @param pVCpu The cross context virtual CPU structure.
5307 * @param pVmxTransient The VMX-transient structure.
5308 *
5309 * @remarks No-long-jump zone!!!
5310 */
5311static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5312{
5313 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5314 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5315
5316#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5317 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5318#endif
5319
5320 /*
5321 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5322 * changes. First try to export only these without going through all other changed-flag checks.
5323 */
5324 VBOXSTRICTRC rcStrict;
5325 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5326 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5327 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5328
5329 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5330 if ( (fCtxChanged & fMinimalMask)
5331 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5332 {
5333 vmxHCExportGuestRip(pVCpu);
5334 hmR0VmxExportGuestRsp(pVCpu);
5335 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5336 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5337 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5338 }
5339 /* If anything else also changed, go through the full export routine and export as required. */
5340 else if (fCtxChanged & fCtxMask)
5341 {
5342 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5343 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5344 { /* likely */}
5345 else
5346 {
5347 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5348 VBOXSTRICTRC_VAL(rcStrict)));
5349 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5350 return rcStrict;
5351 }
5352 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5353 }
5354 /* Nothing changed, nothing to load here. */
5355 else
5356 rcStrict = VINF_SUCCESS;
5357
5358#ifdef VBOX_STRICT
5359 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5360 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5361 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5362#endif
5363 return rcStrict;
5364}
5365
5366
5367/**
5368 * Map the APIC-access page for virtualizing APIC accesses.
5369 *
5370 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5371 * this not done as part of exporting guest state, see @bugref{8721}.
5372 *
5373 * @returns VBox status code.
5374 * @param pVCpu The cross context virtual CPU structure.
5375 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5376 */
5377static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5378{
5379 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5380 Assert(GCPhysApicBase);
5381
5382 LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5383
5384 /* Unalias the existing mapping. */
5385 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5386 AssertRCReturn(rc, rc);
5387
5388 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5389 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5390 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5391 AssertRCReturn(rc, rc);
5392
5393 return VINF_SUCCESS;
5394}
5395
5396
5397/**
5398 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5399 * CPU.
5400 *
5401 * @param idCpu The ID for the CPU the function is called on.
5402 * @param pvUser1 Null, not used.
5403 * @param pvUser2 Null, not used.
5404 */
5405static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5406{
5407 RT_NOREF3(idCpu, pvUser1, pvUser2);
5408 VMXDispatchHostNmi();
5409}
5410
5411
5412/**
5413 * Dispatching an NMI on the host CPU that received it.
5414 *
5415 * @returns VBox status code.
5416 * @param pVCpu The cross context virtual CPU structure.
5417 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5418 * executing when receiving the host NMI in VMX non-root
5419 * operation.
5420 */
5421static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5422{
5423 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5424 Assert(idCpu != NIL_RTCPUID);
5425
5426 /*
5427 * We don't want to delay dispatching the NMI any more than we have to. However,
5428 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5429 * after executing guest or nested-guest code for the following reasons:
5430 *
5431 * - We would need to perform VMREADs with interrupts disabled and is orders of
5432 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5433 * supported by the host hypervisor.
5434 *
5435 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5436 * longer period of time just for handling an edge case like host NMIs which do
5437 * not occur nearly as frequently as other VM-exits.
5438 *
5439 * Let's cover the most likely scenario first. Check if we are on the target CPU
5440 * and dispatch the NMI right away. This should be much faster than calling into
5441 * RTMpOnSpecific() machinery.
5442 */
5443 bool fDispatched = false;
5444 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5445 if (idCpu == RTMpCpuId())
5446 {
5447 VMXDispatchHostNmi();
5448 fDispatched = true;
5449 }
5450 ASMSetFlags(fEFlags);
5451 if (fDispatched)
5452 {
5453 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5454 return VINF_SUCCESS;
5455 }
5456
5457 /*
5458 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5459 * there should be no race or recursion even if we are unlucky enough to be preempted
5460 * (to the target CPU) without dispatching the host NMI above.
5461 */
5462 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5463 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5464}
5465
5466
5467#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5468/**
5469 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5470 * nested-guest using hardware-assisted VMX.
5471 *
5472 * @param pVCpu The cross context virtual CPU structure.
5473 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5474 * @param pVmcsInfoGst The guest VMCS info. object.
5475 */
5476static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5477{
5478 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5479 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5480 Assert(pu64MsrBitmap);
5481
5482 /*
5483 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5484 * MSR that is intercepted by the guest is also intercepted while executing the
5485 * nested-guest using hardware-assisted VMX.
5486 *
5487 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5488 * nested-guest VM-exit even if the outer guest is not intercepting some
5489 * MSRs. We cannot assume the caller has initialized the nested-guest
5490 * MSR bitmap in this case.
5491 *
5492 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5493 * each of its VM-entry, hence initializing it once per-VM while setting
5494 * up the nested-guest VMCS is not sufficient.
5495 */
5496 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5497 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5498 {
5499 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5500 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5501 Assert(pu64MsrBitmapNstGst);
5502 Assert(pu64MsrBitmapGst);
5503
5504 /** @todo Detect and use EVEX.POR? */
5505 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5506 for (uint32_t i = 0; i < cFrags; i++)
5507 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5508 }
5509 else
5510 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5511}
5512
5513
5514/**
5515 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5516 * hardware-assisted VMX execution of the nested-guest.
5517 *
5518 * For a guest, we don't modify these controls once we set up the VMCS and hence
5519 * this function is never called.
5520 *
5521 * For nested-guests since the nested hypervisor provides these controls on every
5522 * nested-guest VM-entry and could potentially change them everytime we need to
5523 * merge them before every nested-guest VM-entry.
5524 *
5525 * @returns VBox status code.
5526 * @param pVCpu The cross context virtual CPU structure.
5527 */
5528static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5529{
5530 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5531 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5532 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5533
5534 /*
5535 * Merge the controls with the requirements of the guest VMCS.
5536 *
5537 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5538 * VMCS with the features supported by the physical CPU as it's already done by the
5539 * VMLAUNCH/VMRESUME instruction emulation.
5540 *
5541 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5542 * derived from the VMX features supported by the physical CPU.
5543 */
5544
5545 /* Pin-based VM-execution controls. */
5546 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5547
5548 /* Processor-based VM-execution controls. */
5549 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5550 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5551 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5552 | VMX_PROC_CTLS_MOV_DR_EXIT /* hmR0VmxExportSharedDebugState makes
5553 sure guest DRx regs are loaded. */
5554 | VMX_PROC_CTLS_USE_TPR_SHADOW
5555 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5556
5557 /* Secondary processor-based VM-execution controls. */
5558 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5559 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5560 | VMX_PROC_CTLS2_INVPCID
5561 | VMX_PROC_CTLS2_VMCS_SHADOWING
5562 | VMX_PROC_CTLS2_RDTSCP
5563 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5564 | VMX_PROC_CTLS2_APIC_REG_VIRT
5565 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5566 | VMX_PROC_CTLS2_VMFUNC));
5567
5568 /*
5569 * VM-entry controls:
5570 * These controls contains state that depends on the nested-guest state (primarily
5571 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5572 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5573 * properly continue executing the nested-guest if the EFER MSR changes but does not
5574 * cause a nested-guest VM-exits.
5575 *
5576 * VM-exit controls:
5577 * These controls specify the host state on return. We cannot use the controls from
5578 * the nested hypervisor state as is as it would contain the guest state rather than
5579 * the host state. Since the host state is subject to change (e.g. preemption, trips
5580 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5581 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5582 *
5583 * VM-entry MSR-load:
5584 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5585 * context by the VMLAUNCH/VMRESUME instruction emulation.
5586 *
5587 * VM-exit MSR-store:
5588 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5589 * back into the VM-exit MSR-store area.
5590 *
5591 * VM-exit MSR-load areas:
5592 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5593 * can entirely ignore what the nested hypervisor wants to load here.
5594 */
5595
5596 /*
5597 * Exception bitmap.
5598 *
5599 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5600 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5601 * code more flexible if intercepting exceptions become more dynamic in the future we do
5602 * it as part of exporting the nested-guest state.
5603 */
5604 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5605
5606 /*
5607 * CR0/CR4 guest/host mask.
5608 *
5609 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5610 * cause VM-exits, so we need to merge them here.
5611 */
5612 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5613 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5614
5615 /*
5616 * Page-fault error-code mask and match.
5617 *
5618 * Although we require unrestricted guest execution (and thereby nested-paging) for
5619 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5620 * normally intercept #PFs, it might intercept them for debugging purposes.
5621 *
5622 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5623 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5624 */
5625 uint32_t u32XcptPFMask;
5626 uint32_t u32XcptPFMatch;
5627 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5628 {
5629 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5630 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5631 }
5632 else
5633 {
5634 u32XcptPFMask = 0;
5635 u32XcptPFMatch = 0;
5636 }
5637
5638 /*
5639 * Pause-Loop exiting.
5640 */
5641 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5642 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5643 * this will work... */
5644 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5645 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5646
5647 /*
5648 * Pending debug exceptions.
5649 * Currently just copy whatever the nested-guest provides us.
5650 */
5651 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5652
5653 /*
5654 * I/O Bitmap.
5655 *
5656 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5657 * intercept all I/O port accesses.
5658 */
5659 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5660 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5661
5662 /*
5663 * VMCS shadowing.
5664 *
5665 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5666 * enabled while executing the nested-guest.
5667 */
5668 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5669
5670 /*
5671 * APIC-access page.
5672 */
5673 RTHCPHYS HCPhysApicAccess;
5674 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5675 {
5676 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5677 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5678
5679 void *pvPage;
5680 PGMPAGEMAPLOCK PgLockApicAccess;
5681 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5682 if (RT_SUCCESS(rc))
5683 {
5684 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5685 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5686
5687 /** @todo Handle proper releasing of page-mapping lock later. */
5688 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5689 }
5690 else
5691 return rc;
5692 }
5693 else
5694 HCPhysApicAccess = 0;
5695
5696 /*
5697 * Virtual-APIC page and TPR threshold.
5698 */
5699 RTHCPHYS HCPhysVirtApic;
5700 uint32_t u32TprThreshold;
5701 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5702 {
5703 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
5704 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
5705
5706 void *pvPage;
5707 PGMPAGEMAPLOCK PgLockVirtApic;
5708 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
5709 if (RT_SUCCESS(rc))
5710 {
5711 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
5712 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
5713
5714 /** @todo Handle proper releasing of page-mapping lock later. */
5715 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
5716 }
5717 else
5718 return rc;
5719
5720 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
5721 }
5722 else
5723 {
5724 HCPhysVirtApic = 0;
5725 u32TprThreshold = 0;
5726
5727 /*
5728 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
5729 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
5730 * be taken care of by EPT/shadow paging.
5731 */
5732 if (pVM->hmr0.s.fAllow64BitGuests)
5733 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
5734 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
5735 }
5736
5737 /*
5738 * Validate basic assumptions.
5739 */
5740 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5741 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
5742 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
5743 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
5744
5745 /*
5746 * Commit it to the nested-guest VMCS.
5747 */
5748 int rc = VINF_SUCCESS;
5749 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
5750 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
5751 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
5752 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
5753 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
5754 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
5755 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
5756 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
5757 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
5758 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
5759 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
5760 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
5761 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
5762 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
5763 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
5764 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
5765 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
5766 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
5767 {
5768 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
5769 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
5770 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
5771 }
5772 if (pVmcsInfoNstGst->HCPhysVirtApic != HCPhysVirtApic)
5773 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
5774 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5775 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5776 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
5777 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
5778 AssertRC(rc);
5779
5780 /*
5781 * Update the nested-guest VMCS cache.
5782 */
5783 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
5784 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
5785 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
5786 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
5787 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
5788 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
5789 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
5790 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
5791 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
5792
5793 /*
5794 * We need to flush the TLB if we are switching the APIC-access page address.
5795 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
5796 */
5797 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5798 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
5799
5800 /*
5801 * MSR bitmap.
5802 *
5803 * The MSR bitmap address has already been initialized while setting up the nested-guest
5804 * VMCS, here we need to merge the MSR bitmaps.
5805 */
5806 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5807 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
5808
5809 return VINF_SUCCESS;
5810}
5811#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
5812
5813
5814/**
5815 * Does the preparations before executing guest code in VT-x.
5816 *
5817 * This may cause longjmps to ring-3 and may even result in rescheduling to the
5818 * recompiler/IEM. We must be cautious what we do here regarding committing
5819 * guest-state information into the VMCS assuming we assuredly execute the
5820 * guest in VT-x mode.
5821 *
5822 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
5823 * the common-state (TRPM/forceflags), we must undo those changes so that the
5824 * recompiler/IEM can (and should) use them when it resumes guest execution.
5825 * Otherwise such operations must be done when we can no longer exit to ring-3.
5826 *
5827 * @returns Strict VBox status code (i.e. informational status codes too).
5828 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
5829 * have been disabled.
5830 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
5831 * pending events).
5832 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
5833 * double-fault into the guest.
5834 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
5835 * dispatched directly.
5836 * @retval VINF_* scheduling changes, we have to go back to ring-3.
5837 *
5838 * @param pVCpu The cross context virtual CPU structure.
5839 * @param pVmxTransient The VMX-transient structure.
5840 * @param fStepping Whether we are single-stepping the guest in the
5841 * hypervisor debugger. Makes us ignore some of the reasons
5842 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
5843 * if event dispatching took place.
5844 */
5845static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
5846{
5847 Assert(VMMRZCallRing3IsEnabled(pVCpu));
5848
5849 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
5850
5851#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
5852 if (pVmxTransient->fIsNestedGuest)
5853 {
5854 RT_NOREF2(pVCpu, fStepping);
5855 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
5856 return VINF_EM_RESCHEDULE_REM;
5857 }
5858#endif
5859
5860 /*
5861 * Check and process force flag actions, some of which might require us to go back to ring-3.
5862 */
5863 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
5864 if (rcStrict == VINF_SUCCESS)
5865 {
5866 /* FFs don't get set all the time. */
5867#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5868 if ( pVmxTransient->fIsNestedGuest
5869 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5870 {
5871 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5872 return VINF_VMX_VMEXIT;
5873 }
5874#endif
5875 }
5876 else
5877 return rcStrict;
5878
5879 /*
5880 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
5881 */
5882 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5883 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
5884 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5885 && PDMHasApic(pVM))
5886 {
5887 /* Get the APIC base MSR from the virtual APIC device. */
5888 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
5889
5890 /* Map the APIC access page. */
5891 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
5892 AssertRCReturn(rc, rc);
5893
5894 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
5895 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
5896 }
5897
5898#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5899 /*
5900 * Merge guest VMCS controls with the nested-guest VMCS controls.
5901 *
5902 * Even if we have not executed the guest prior to this (e.g. when resuming from a
5903 * saved state), we should be okay with merging controls as we initialize the
5904 * guest VMCS controls as part of VM setup phase.
5905 */
5906 if ( pVmxTransient->fIsNestedGuest
5907 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
5908 {
5909 int rc = hmR0VmxMergeVmcsNested(pVCpu);
5910 AssertRCReturn(rc, rc);
5911 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
5912 }
5913#endif
5914
5915 /*
5916 * Evaluate events to be injected into the guest.
5917 *
5918 * Events in TRPM can be injected without inspecting the guest state.
5919 * If any new events (interrupts/NMI) are pending currently, we try to set up the
5920 * guest to cause a VM-exit the next time they are ready to receive the event.
5921 */
5922 if (TRPMHasTrap(pVCpu))
5923 vmxHCTrpmTrapToPendingEvent(pVCpu);
5924
5925 uint32_t fIntrState;
5926#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5927 if (!pVmxTransient->fIsNestedGuest)
5928 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5929 else
5930 rcStrict = vmxHCEvaluatePendingEventNested(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5931
5932 /*
5933 * While evaluating pending events if something failed (unlikely) or if we were
5934 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
5935 */
5936 if (rcStrict != VINF_SUCCESS)
5937 return rcStrict;
5938 if ( pVmxTransient->fIsNestedGuest
5939 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5940 {
5941 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5942 return VINF_VMX_VMEXIT;
5943 }
5944#else
5945 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5946 Assert(rcStrict == VINF_SUCCESS);
5947#endif
5948
5949 /*
5950 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
5951 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
5952 * also result in triple-faulting the VM.
5953 *
5954 * With nested-guests, the above does not apply since unrestricted guest execution is a
5955 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
5956 */
5957 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest, fIntrState, fStepping);
5958 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5959 { /* likely */ }
5960 else
5961 {
5962 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
5963 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
5964 return rcStrict;
5965 }
5966
5967 /*
5968 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
5969 * import CR3 themselves. We will need to update them here, as even as late as the above
5970 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
5971 * the below force flags to be set.
5972 */
5973 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5974 {
5975 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
5976 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
5977 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
5978 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
5979 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5980 }
5981
5982#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5983 /* Paranoia. */
5984 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
5985#endif
5986
5987 /*
5988 * No longjmps to ring-3 from this point on!!!
5989 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
5990 * This also disables flushing of the R0-logger instance (if any).
5991 */
5992 VMMRZCallRing3Disable(pVCpu);
5993
5994 /*
5995 * Export the guest state bits.
5996 *
5997 * We cannot perform longjmps while loading the guest state because we do not preserve the
5998 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
5999 * CPU migration.
6000 *
6001 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6002 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6003 */
6004 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6005 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6006 { /* likely */ }
6007 else
6008 {
6009 VMMRZCallRing3Enable(pVCpu);
6010 return rcStrict;
6011 }
6012
6013 /*
6014 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6015 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6016 * preemption disabled for a while. Since this is purely to aid the
6017 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6018 * disable interrupt on NT.
6019 *
6020 * We need to check for force-flags that could've possible been altered since we last
6021 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6022 * see @bugref{6398}).
6023 *
6024 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6025 * to ring-3 before executing guest code.
6026 */
6027 pVmxTransient->fEFlags = ASMIntDisableFlags();
6028
6029 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6030 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6031 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6032 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6033 {
6034 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6035 {
6036#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6037 /*
6038 * If we are executing a nested-guest make sure that we should intercept subsequent
6039 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6040 * the VM-exit instruction emulation happy.
6041 */
6042 if (pVmxTransient->fIsNestedGuest)
6043 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6044#endif
6045
6046 /*
6047 * We've injected any pending events. This is really the point of no return (to ring-3).
6048 *
6049 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6050 * returns from this function, so do -not- enable them here.
6051 */
6052 pVCpu->hm.s.Event.fPending = false;
6053 return VINF_SUCCESS;
6054 }
6055
6056 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6057 rcStrict = VINF_EM_RAW_INTERRUPT;
6058 }
6059 else
6060 {
6061 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6062 rcStrict = VINF_EM_RAW_TO_R3;
6063 }
6064
6065 ASMSetFlags(pVmxTransient->fEFlags);
6066 VMMRZCallRing3Enable(pVCpu);
6067
6068 return rcStrict;
6069}
6070
6071
6072/**
6073 * Final preparations before executing guest code using hardware-assisted VMX.
6074 *
6075 * We can no longer get preempted to a different host CPU and there are no returns
6076 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6077 * failures), this function is not intended to fail sans unrecoverable hardware
6078 * errors.
6079 *
6080 * @param pVCpu The cross context virtual CPU structure.
6081 * @param pVmxTransient The VMX-transient structure.
6082 *
6083 * @remarks Called with preemption disabled.
6084 * @remarks No-long-jump zone!!!
6085 */
6086static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6087{
6088 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6089 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6090 Assert(!pVCpu->hm.s.Event.fPending);
6091
6092 /*
6093 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6094 */
6095 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6096 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6097
6098 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6099 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6100 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6101 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6102
6103 if (!CPUMIsGuestFPUStateActive(pVCpu))
6104 {
6105 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6106 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6107 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6108 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6109 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6110 }
6111
6112 /*
6113 * Re-export the host state bits as we may've been preempted (only happens when
6114 * thread-context hooks are used or when the VM start function changes) or if
6115 * the host CR0 is modified while loading the guest FPU state above.
6116 *
6117 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6118 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6119 * see @bugref{8432}.
6120 *
6121 * This may also happen when switching to/from a nested-guest VMCS without leaving
6122 * ring-0.
6123 */
6124 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6125 {
6126 hmR0VmxExportHostState(pVCpu);
6127 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6128 }
6129 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6130
6131 /*
6132 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6133 */
6134 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6135 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6136 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6137
6138 /*
6139 * Store status of the shared guest/host debug state at the time of VM-entry.
6140 */
6141 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6142 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6143
6144 /*
6145 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6146 * more than one conditional check. The post-run side of our code shall determine
6147 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6148 */
6149 if (pVmcsInfo->pbVirtApic)
6150 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6151
6152 /*
6153 * Update the host MSRs values in the VM-exit MSR-load area.
6154 */
6155 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6156 {
6157 if (pVmcsInfo->cExitMsrLoad > 0)
6158 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6159 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6160 }
6161
6162 /*
6163 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6164 * VMX-preemption timer based on the next virtual sync clock deadline.
6165 */
6166 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6167 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6168 {
6169 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6170 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6171 }
6172
6173 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6174 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6175 if (!fIsRdtscIntercepted)
6176 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6177 else
6178 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6179
6180 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6181 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6182 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6183 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6184 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6185 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6186
6187 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6188
6189 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6190 as we're about to start executing the guest. */
6191
6192 /*
6193 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6194 *
6195 * This is done this late as updating the TSC offsetting/preemption timer above
6196 * figures out if we can skip intercepting RDTSCP by calculating the number of
6197 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6198 */
6199 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6200 && !fIsRdtscIntercepted)
6201 {
6202 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6203
6204 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6205 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6206 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6207 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6208 AssertRC(rc);
6209 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6210 pVmxTransient->fRemoveTscAuxMsr = true;
6211 }
6212
6213#ifdef VBOX_STRICT
6214 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6215 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6216 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6217 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6218#endif
6219
6220#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6221 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6222 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6223 * see @bugref{9180#c54}. */
6224 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6225 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6226 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6227#endif
6228}
6229
6230
6231/**
6232 * First C routine invoked after running guest code using hardware-assisted VMX.
6233 *
6234 * @param pVCpu The cross context virtual CPU structure.
6235 * @param pVmxTransient The VMX-transient structure.
6236 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6237 *
6238 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6239 *
6240 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6241 * unconditionally when it is safe to do so.
6242 */
6243static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6244{
6245 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6246 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6247 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6248 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6249 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6250 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6251
6252 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6253 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6254 {
6255 uint64_t uGstTsc;
6256 if (!pVmxTransient->fIsNestedGuest)
6257 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6258 else
6259 {
6260 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6261 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6262 }
6263 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6264 }
6265
6266 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6267 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6268 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6269
6270 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6271 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6272#ifdef VBOX_STRICT
6273 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6274#endif
6275 Assert(!ASMIntAreEnabled());
6276 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6277 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6278
6279#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6280 /*
6281 * Clean all the VMCS fields in the transient structure before reading
6282 * anything from the VMCS.
6283 */
6284 pVmxTransient->uExitReason = 0;
6285 pVmxTransient->uExitIntErrorCode = 0;
6286 pVmxTransient->uExitQual = 0;
6287 pVmxTransient->uGuestLinearAddr = 0;
6288 pVmxTransient->uExitIntInfo = 0;
6289 pVmxTransient->cbExitInstr = 0;
6290 pVmxTransient->ExitInstrInfo.u = 0;
6291 pVmxTransient->uEntryIntInfo = 0;
6292 pVmxTransient->uEntryXcptErrorCode = 0;
6293 pVmxTransient->cbEntryInstr = 0;
6294 pVmxTransient->uIdtVectoringInfo = 0;
6295 pVmxTransient->uIdtVectoringErrorCode = 0;
6296#endif
6297
6298 /*
6299 * Save the basic VM-exit reason and check if the VM-entry failed.
6300 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6301 */
6302 uint32_t uExitReason;
6303 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6304 AssertRC(rc);
6305 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6306 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6307
6308 /*
6309 * Log the VM-exit before logging anything else as otherwise it might be a
6310 * tad confusing what happens before and after the world-switch.
6311 */
6312 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6313
6314 /*
6315 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6316 * bitmap permissions, if it was added before VM-entry.
6317 */
6318 if (pVmxTransient->fRemoveTscAuxMsr)
6319 {
6320 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6321 pVmxTransient->fRemoveTscAuxMsr = false;
6322 }
6323
6324 /*
6325 * Check if VMLAUNCH/VMRESUME succeeded.
6326 * If this failed, we cause a guru meditation and cease further execution.
6327 */
6328 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6329 {
6330 /*
6331 * Update the VM-exit history array here even if the VM-entry failed due to:
6332 * - Invalid guest state.
6333 * - MSR loading.
6334 * - Machine-check event.
6335 *
6336 * In any of the above cases we will still have a "valid" VM-exit reason
6337 * despite @a fVMEntryFailed being false.
6338 *
6339 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6340 *
6341 * Note! We don't have CS or RIP at this point. Will probably address that later
6342 * by amending the history entry added here.
6343 */
6344 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6345 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6346
6347 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6348 {
6349 VMMRZCallRing3Enable(pVCpu);
6350 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6351
6352#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6353 vmxHCReadAllRoFieldsVmcs(pVCpu, pVmxTransient);
6354#endif
6355
6356 /*
6357 * Always import the guest-interruptibility state as we need it while evaluating
6358 * injecting events on re-entry. We could in *theory* postpone reading it for
6359 * exits that does not involve instruction emulation, but since most exits are
6360 * for instruction emulation (exceptions being external interrupts, shadow
6361 * paging building page faults and EPT violations, and interrupt window stuff)
6362 * this is a reasonable simplification.
6363 *
6364 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6365 * checking for real-mode while exporting the state because all bits that cause
6366 * mode changes wrt CR0 are intercepted.
6367 *
6368 * Note! This mask _must_ match the default value for the default a_fDonePostExit
6369 * value for the vmxHCImportGuestState template!
6370 */
6371 /** @todo r=bird: consider dropping the INHIBIT_XXX and fetch the state
6372 * explicitly in the exit handlers and injection function. That way we have
6373 * fewer clusters of vmread spread around the code, because the EM history
6374 * executor won't execute very many non-exiting instructions before stopping. */
6375 rc = vmxHCImportGuestState< CPUMCTX_EXTRN_INHIBIT_INT
6376 | CPUMCTX_EXTRN_INHIBIT_NMI
6377#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6378 | HMVMX_CPUMCTX_EXTRN_ALL
6379#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6380 | CPUMCTX_EXTRN_RFLAGS
6381#endif
6382 , 0 /*a_fDoneLocal*/, 0 /*a_fDonePostExit*/>(pVCpu, pVmcsInfo, __FUNCTION__);
6383 AssertRC(rc);
6384
6385 /*
6386 * Sync the TPR shadow with our APIC state.
6387 */
6388 if ( !pVmxTransient->fIsNestedGuest
6389 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6390 {
6391 Assert(pVmcsInfo->pbVirtApic);
6392 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6393 {
6394 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6395 AssertRC(rc);
6396 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6397 }
6398 }
6399
6400 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6401 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6402 || pVmxTransient->fWasHyperDebugStateActive == false);
6403 return;
6404 }
6405 }
6406#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6407 else if (pVmxTransient->fIsNestedGuest)
6408 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6409#endif
6410 else
6411 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6412
6413 VMMRZCallRing3Enable(pVCpu);
6414}
6415
6416
6417/**
6418 * Runs the guest code using hardware-assisted VMX the normal way.
6419 *
6420 * @returns VBox status code.
6421 * @param pVCpu The cross context virtual CPU structure.
6422 * @param pcLoops Pointer to the number of executed loops.
6423 */
6424static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6425{
6426 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6427 Assert(pcLoops);
6428 Assert(*pcLoops <= cMaxResumeLoops);
6429 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6430
6431#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6432 /*
6433 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6434 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6435 * guest VMCS while entering the VMX ring-0 session.
6436 */
6437 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6438 {
6439 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6440 if (RT_SUCCESS(rc))
6441 { /* likely */ }
6442 else
6443 {
6444 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6445 return rc;
6446 }
6447 }
6448#endif
6449
6450 VMXTRANSIENT VmxTransient;
6451 RT_ZERO(VmxTransient);
6452 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6453 Assert(!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
6454
6455 /* Paranoia. */
6456 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6457
6458 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6459 for (;;)
6460 {
6461 Assert(!HMR0SuspendPending());
6462 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6463 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6464
6465 /*
6466 * Preparatory work for running nested-guest code, this may force us to
6467 * return to ring-3.
6468 *
6469 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6470 */
6471 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6472 if (rcStrict != VINF_SUCCESS)
6473 break;
6474
6475 /* Interrupts are disabled at this point! */
6476 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6477 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6478 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6479 /* Interrupts are re-enabled at this point! */
6480
6481 /*
6482 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6483 */
6484 if (RT_SUCCESS(rcRun))
6485 { /* very likely */ }
6486 else
6487 {
6488 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6489 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6490 return rcRun;
6491 }
6492
6493 /*
6494 * Profile the VM-exit.
6495 */
6496 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6497 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6498 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6499 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6500 HMVMX_START_EXIT_DISPATCH_PROF();
6501
6502 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6503
6504 /*
6505 * Handle the VM-exit.
6506 */
6507#ifdef HMVMX_USE_FUNCTION_TABLE
6508 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6509#else
6510 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6511#endif
6512 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6513 if (rcStrict == VINF_SUCCESS)
6514 {
6515 if (++(*pcLoops) <= cMaxResumeLoops)
6516 continue;
6517 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6518 rcStrict = VINF_EM_RAW_INTERRUPT;
6519 }
6520 break;
6521 }
6522
6523 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6524 return rcStrict;
6525}
6526
6527
6528#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6529/**
6530 * Runs the nested-guest code using hardware-assisted VMX.
6531 *
6532 * @returns VBox status code.
6533 * @param pVCpu The cross context virtual CPU structure.
6534 * @param pcLoops Pointer to the number of executed loops.
6535 *
6536 * @sa hmR0VmxRunGuestCodeNormal.
6537 */
6538static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6539{
6540 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6541 Assert(pcLoops);
6542 Assert(*pcLoops <= cMaxResumeLoops);
6543 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6544
6545 /*
6546 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6547 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6548 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6549 */
6550 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6551 {
6552 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6553 if (RT_SUCCESS(rc))
6554 { /* likely */ }
6555 else
6556 {
6557 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6558 return rc;
6559 }
6560 }
6561
6562 VMXTRANSIENT VmxTransient;
6563 RT_ZERO(VmxTransient);
6564 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6565 VmxTransient.fIsNestedGuest = true;
6566 Assert(pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
6567
6568 /* Paranoia. */
6569 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6570
6571 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info on demand in ring-0. */
6572 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6573
6574 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6575 for (;;)
6576 {
6577 Assert(!HMR0SuspendPending());
6578 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6579 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6580
6581 /*
6582 * Preparatory work for running guest code, this may force us to
6583 * return to ring-3.
6584 *
6585 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6586 */
6587 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6588 if (rcStrict != VINF_SUCCESS)
6589 break;
6590
6591 /* Interrupts are disabled at this point! */
6592 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6593 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6594 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6595 /* Interrupts are re-enabled at this point! */
6596
6597 /*
6598 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6599 */
6600 if (RT_SUCCESS(rcRun))
6601 { /* very likely */ }
6602 else
6603 {
6604 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6605 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6606 rcStrict = rcRun;
6607 break;
6608 }
6609
6610 /*
6611 * Profile the VM-exit.
6612 */
6613 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6614 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6615 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6616 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6617 HMVMX_START_EXIT_DISPATCH_PROF();
6618
6619 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6620
6621 /*
6622 * Handle the VM-exit.
6623 */
6624 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6625 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6626 if (rcStrict == VINF_SUCCESS)
6627 {
6628 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6629 {
6630 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6631 rcStrict = VINF_VMX_VMEXIT;
6632 }
6633 else
6634 {
6635 if (++(*pcLoops) <= cMaxResumeLoops)
6636 continue;
6637 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6638 rcStrict = VINF_EM_RAW_INTERRUPT;
6639 }
6640 }
6641 else
6642 Assert(rcStrict != VINF_VMX_VMEXIT);
6643 break;
6644 }
6645
6646 /* Ensure VM-exit auxiliary info. is no longer available. */
6647 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6648
6649 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6650 return rcStrict;
6651}
6652#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6653
6654
6655/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6656 * probes.
6657 *
6658 * The following few functions and associated structure contains the bloat
6659 * necessary for providing detailed debug events and dtrace probes as well as
6660 * reliable host side single stepping. This works on the principle of
6661 * "subclassing" the normal execution loop and workers. We replace the loop
6662 * method completely and override selected helpers to add necessary adjustments
6663 * to their core operation.
6664 *
6665 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6666 * any performance for debug and analysis features.
6667 *
6668 * @{
6669 */
6670
6671/**
6672 * Single steps guest code using hardware-assisted VMX.
6673 *
6674 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6675 * but single-stepping through the hypervisor debugger.
6676 *
6677 * @returns Strict VBox status code (i.e. informational status codes too).
6678 * @param pVCpu The cross context virtual CPU structure.
6679 * @param pcLoops Pointer to the number of executed loops.
6680 *
6681 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6682 */
6683static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6684{
6685 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6686 Assert(pcLoops);
6687 Assert(*pcLoops <= cMaxResumeLoops);
6688
6689 VMXTRANSIENT VmxTransient;
6690 RT_ZERO(VmxTransient);
6691 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6692
6693 /* Set HMCPU indicators. */
6694 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
6695 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
6696 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6697 pVCpu->hmr0.s.fUsingDebugLoop = true;
6698
6699 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
6700 VMXRUNDBGSTATE DbgState;
6701 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
6702 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6703
6704 /*
6705 * The loop.
6706 */
6707 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6708 for (;;)
6709 {
6710 Assert(!HMR0SuspendPending());
6711 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6712 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6713 bool fStepping = pVCpu->hm.s.fSingleInstruction;
6714
6715 /* Set up VM-execution controls the next two can respond to. */
6716 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6717
6718 /*
6719 * Preparatory work for running guest code, this may force us to
6720 * return to ring-3.
6721 *
6722 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6723 */
6724 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
6725 if (rcStrict != VINF_SUCCESS)
6726 break;
6727
6728 /* Interrupts are disabled at this point! */
6729 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6730
6731 /* Override any obnoxious code in the above two calls. */
6732 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6733
6734 /*
6735 * Finally execute the guest.
6736 */
6737 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6738
6739 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6740 /* Interrupts are re-enabled at this point! */
6741
6742 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
6743 if (RT_SUCCESS(rcRun))
6744 { /* very likely */ }
6745 else
6746 {
6747 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6748 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6749 return rcRun;
6750 }
6751
6752 /* Profile the VM-exit. */
6753 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6754 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
6755 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6756 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6757 HMVMX_START_EXIT_DISPATCH_PROF();
6758
6759 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6760
6761 /*
6762 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
6763 */
6764 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
6765 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6766 if (rcStrict != VINF_SUCCESS)
6767 break;
6768 if (++(*pcLoops) > cMaxResumeLoops)
6769 {
6770 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6771 rcStrict = VINF_EM_RAW_INTERRUPT;
6772 break;
6773 }
6774
6775 /*
6776 * Stepping: Did the RIP change, if so, consider it a single step.
6777 * Otherwise, make sure one of the TFs gets set.
6778 */
6779 if (fStepping)
6780 {
6781 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
6782 AssertRC(rc);
6783 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
6784 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
6785 {
6786 rcStrict = VINF_EM_DBG_STEPPED;
6787 break;
6788 }
6789 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
6790 }
6791
6792 /*
6793 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
6794 */
6795 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
6796 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6797
6798 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
6799 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
6800 Assert(rcStrict == VINF_SUCCESS);
6801 }
6802
6803 /*
6804 * Clear the X86_EFL_TF if necessary.
6805 */
6806 if (pVCpu->hmr0.s.fClearTrapFlag)
6807 {
6808 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
6809 AssertRC(rc);
6810 pVCpu->hmr0.s.fClearTrapFlag = false;
6811 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
6812 }
6813 /** @todo there seems to be issues with the resume flag when the monitor trap
6814 * flag is pending without being used. Seen early in bios init when
6815 * accessing APIC page in protected mode. */
6816
6817/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
6818 * out of the above loop. */
6819
6820 /* Restore HMCPU indicators. */
6821 pVCpu->hmr0.s.fUsingDebugLoop = false;
6822 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6823 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
6824
6825 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6826 return rcStrict;
6827}
6828
6829/** @} */
6830
6831
6832/**
6833 * Checks if any expensive dtrace probes are enabled and we should go to the
6834 * debug loop.
6835 *
6836 * @returns true if we should use debug loop, false if not.
6837 */
6838static bool hmR0VmxAnyExpensiveProbesEnabled(void)
6839{
6840 /* It's probably faster to OR the raw 32-bit counter variables together.
6841 Since the variables are in an array and the probes are next to one
6842 another (more or less), we have good locality. So, better read
6843 eight-nine cache lines ever time and only have one conditional, than
6844 128+ conditionals, right? */
6845 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
6846 | VBOXVMM_XCPT_DE_ENABLED_RAW()
6847 | VBOXVMM_XCPT_DB_ENABLED_RAW()
6848 | VBOXVMM_XCPT_BP_ENABLED_RAW()
6849 | VBOXVMM_XCPT_OF_ENABLED_RAW()
6850 | VBOXVMM_XCPT_BR_ENABLED_RAW()
6851 | VBOXVMM_XCPT_UD_ENABLED_RAW()
6852 | VBOXVMM_XCPT_NM_ENABLED_RAW()
6853 | VBOXVMM_XCPT_DF_ENABLED_RAW()
6854 | VBOXVMM_XCPT_TS_ENABLED_RAW()
6855 | VBOXVMM_XCPT_NP_ENABLED_RAW()
6856 | VBOXVMM_XCPT_SS_ENABLED_RAW()
6857 | VBOXVMM_XCPT_GP_ENABLED_RAW()
6858 | VBOXVMM_XCPT_PF_ENABLED_RAW()
6859 | VBOXVMM_XCPT_MF_ENABLED_RAW()
6860 | VBOXVMM_XCPT_AC_ENABLED_RAW()
6861 | VBOXVMM_XCPT_XF_ENABLED_RAW()
6862 | VBOXVMM_XCPT_VE_ENABLED_RAW()
6863 | VBOXVMM_XCPT_SX_ENABLED_RAW()
6864 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
6865 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
6866 ) != 0
6867 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
6868 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
6869 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
6870 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
6871 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
6872 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
6873 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
6874 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
6875 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
6876 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
6877 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
6878 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
6879 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
6880 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
6881 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
6882 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
6883 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
6884 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
6885 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
6886 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
6887 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
6888 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
6889 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
6890 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
6891 | VBOXVMM_INSTR_STR_ENABLED_RAW()
6892 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
6893 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
6894 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
6895 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
6896 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
6897 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
6898 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
6899 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
6900 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
6901 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
6902 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
6903 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
6904 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
6905 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
6906 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
6907 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
6908 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
6909 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
6910 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
6911 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
6912 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
6913 ) != 0
6914 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
6915 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
6916 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
6917 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
6918 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
6919 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
6920 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
6921 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
6922 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
6923 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
6924 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
6925 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
6926 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
6927 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
6928 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
6929 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
6930 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
6931 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
6932 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
6933 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
6934 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
6935 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
6936 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
6937 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
6938 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
6939 | VBOXVMM_EXIT_STR_ENABLED_RAW()
6940 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
6941 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
6942 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
6943 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
6944 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
6945 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
6946 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
6947 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
6948 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
6949 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
6950 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
6951 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
6952 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
6953 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
6954 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
6955 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
6956 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
6957 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
6958 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
6959 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
6960 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
6961 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
6962 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
6963 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
6964 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
6965 ) != 0;
6966}
6967
6968
6969/**
6970 * Runs the guest using hardware-assisted VMX.
6971 *
6972 * @returns Strict VBox status code (i.e. informational status codes too).
6973 * @param pVCpu The cross context virtual CPU structure.
6974 */
6975VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
6976{
6977 AssertPtr(pVCpu);
6978 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6979 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6980 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
6981 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
6982
6983 VBOXSTRICTRC rcStrict;
6984 uint32_t cLoops = 0;
6985 for (;;)
6986 {
6987#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6988 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
6989#else
6990 NOREF(pCtx);
6991 bool const fInNestedGuestMode = false;
6992#endif
6993 if (!fInNestedGuestMode)
6994 {
6995 if ( !pVCpu->hm.s.fUseDebugLoop
6996 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
6997 && !DBGFIsStepping(pVCpu)
6998 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
6999 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
7000 else
7001 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
7002 }
7003#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7004 else
7005 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7006
7007 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7008 {
7009 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7010 continue;
7011 }
7012 if (rcStrict == VINF_VMX_VMEXIT)
7013 {
7014 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7015 continue;
7016 }
7017#endif
7018 break;
7019 }
7020
7021 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7022 switch (rcLoop)
7023 {
7024 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7025 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7026 }
7027
7028 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7029 if (RT_FAILURE(rc2))
7030 {
7031 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7032 rcStrict = rc2;
7033 }
7034 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7035 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7036 return rcStrict;
7037}
7038
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use