VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 103131

Last change on this file since 103131 was 102664, checked in by vboxsync, 10 months ago

VMM/HMVMXR0: Nested VMX: bugref:10318 Logging.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 281.3 KB
Line 
1/* $Id: HMVMXR0.cpp 102664 2023-12-21 07:49:12Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_HM
33#define VMCPU_INCL_CPUM_GST_CTX
34#include <iprt/x86.h>
35#include <iprt/asm-amd64-x86.h>
36#include <iprt/thread.h>
37#include <iprt/mem.h>
38#include <iprt/mp.h>
39
40#include <VBox/vmm/pdmapi.h>
41#include <VBox/vmm/dbgf.h>
42#include <VBox/vmm/iem.h>
43#include <VBox/vmm/iom.h>
44#include <VBox/vmm/tm.h>
45#include <VBox/vmm/em.h>
46#include <VBox/vmm/gcm.h>
47#include <VBox/vmm/gim.h>
48#include <VBox/vmm/apic.h>
49#include "HMInternal.h"
50#include <VBox/vmm/vmcc.h>
51#include <VBox/vmm/hmvmxinline.h>
52#include "HMVMXR0.h"
53#include "VMXInternal.h"
54#include "dtrace/VBoxVMM.h"
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60#ifdef DEBUG_ramshankar
61# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
62# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
63# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
64# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
65# define HMVMX_ALWAYS_CLEAN_TRANSIENT
66# define HMVMX_ALWAYS_CHECK_GUEST_STATE
67# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
68# define HMVMX_ALWAYS_TRAP_PF
69# define HMVMX_ALWAYS_FLUSH_TLB
70# define HMVMX_ALWAYS_SWAP_EFER
71#endif
72
73/** Enables the fAlwaysInterceptMovDRx related code. */
74#define VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX 1
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * VMX page allocation information.
82 */
83typedef struct
84{
85 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
86 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
87 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
88 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
89} VMXPAGEALLOCINFO;
90/** Pointer to VMX page-allocation info. */
91typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
92/** Pointer to a const VMX page-allocation info. */
93typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
94AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
95
96
97/*********************************************************************************************************************************
98* Internal Functions *
99*********************************************************************************************************************************/
100static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
101static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
102
103
104/*********************************************************************************************************************************
105* Global Variables *
106*********************************************************************************************************************************/
107/** The DR6 value after writing zero to the register.
108 * Set by VMXR0GlobalInit(). */
109static uint64_t g_fDr6Zeroed = 0;
110
111
112/**
113 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
114 * @returns @c true if it's part of LBR stack, @c false otherwise.
115 *
116 * @param pVM The cross context VM structure.
117 * @param idMsr The MSR.
118 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
119 * Optional, can be NULL.
120 *
121 * @remarks Must only be called when LBR is enabled.
122 */
123DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
124{
125 Assert(pVM->hmr0.s.vmx.fLbr);
126 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
127 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
128 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
129 if (idxMsr < cLbrStack)
130 {
131 if (pidxMsr)
132 *pidxMsr = idxMsr;
133 return true;
134 }
135 return false;
136}
137
138
139/**
140 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
141 * @returns @c true if it's part of LBR stack, @c false otherwise.
142 *
143 * @param pVM The cross context VM structure.
144 * @param idMsr The MSR.
145 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
146 * Optional, can be NULL.
147 *
148 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
149 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
150 */
151DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
152{
153 Assert(pVM->hmr0.s.vmx.fLbr);
154 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
155 {
156 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
157 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
158 if (idxMsr < cLbrStack)
159 {
160 if (pidxMsr)
161 *pidxMsr = idxMsr;
162 return true;
163 }
164 }
165 return false;
166}
167
168
169/**
170 * Gets the active (in use) VMCS info. object for the specified VCPU.
171 *
172 * This is either the guest or nested-guest VMCS info. and need not necessarily
173 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
174 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
175 * current VMCS while returning to ring-3. However, the VMCS info. object for that
176 * VMCS would still be active and returned here so that we could dump the VMCS
177 * fields to ring-3 for diagnostics. This function is thus only used to
178 * distinguish between the nested-guest or guest VMCS.
179 *
180 * @returns The active VMCS information.
181 * @param pVCpu The cross context virtual CPU structure.
182 *
183 * @thread EMT.
184 * @remarks This function may be called with preemption or interrupts disabled!
185 */
186DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
187{
188 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
189 return &pVCpu->hmr0.s.vmx.VmcsInfo;
190 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
191}
192
193
194/**
195 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
196 * area.
197 *
198 * @returns @c true if it's different, @c false otherwise.
199 * @param pVmcsInfo The VMCS info. object.
200 */
201DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
202{
203 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
204 && pVmcsInfo->pvGuestMsrStore);
205}
206
207
208/**
209 * Sets the given Processor-based VM-execution controls.
210 *
211 * @param pVmxTransient The VMX-transient structure.
212 * @param uProcCtls The Processor-based VM-execution controls to set.
213 */
214static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
218 {
219 pVmcsInfo->u32ProcCtls |= uProcCtls;
220 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
221 AssertRC(rc);
222 }
223}
224
225
226/**
227 * Removes the given Processor-based VM-execution controls.
228 *
229 * @param pVCpu The cross context virtual CPU structure.
230 * @param pVmxTransient The VMX-transient structure.
231 * @param uProcCtls The Processor-based VM-execution controls to remove.
232 *
233 * @remarks When executing a nested-guest, this will not remove any of the specified
234 * controls if the nested hypervisor has set any one of them.
235 */
236static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
237{
238 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
239 if (pVmcsInfo->u32ProcCtls & uProcCtls)
240 {
241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
242 if ( !pVmxTransient->fIsNestedGuest
243 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
244#else
245 NOREF(pVCpu);
246 if (!pVmxTransient->fIsNestedGuest)
247#endif
248 {
249 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
250 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
251 AssertRC(rc);
252 }
253 }
254}
255
256
257/**
258 * Sets the TSC offset for the current VMCS.
259 *
260 * @param uTscOffset The TSC offset to set.
261 * @param pVmcsInfo The VMCS info. object.
262 */
263static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
264{
265 if (pVmcsInfo->u64TscOffset != uTscOffset)
266 {
267 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
268 AssertRC(rc);
269 pVmcsInfo->u64TscOffset = uTscOffset;
270 }
271}
272
273
274/**
275 * Loads the VMCS specified by the VMCS info. object.
276 *
277 * @returns VBox status code.
278 * @param pVmcsInfo The VMCS info. object.
279 *
280 * @remarks Can be called with interrupts disabled.
281 */
282static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
283{
284 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
285 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
286
287 return VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
288}
289
290
291/**
292 * Clears the VMCS specified by the VMCS info. object.
293 *
294 * @returns VBox status code.
295 * @param pVmcsInfo The VMCS info. object.
296 *
297 * @remarks Can be called with interrupts disabled.
298 */
299static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
300{
301 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
302 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
303
304 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
305 if (RT_SUCCESS(rc))
306 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
307 return rc;
308}
309
310
311/**
312 * Checks whether the MSR belongs to the set of guest MSRs that we restore
313 * lazily while leaving VT-x.
314 *
315 * @returns true if it does, false otherwise.
316 * @param pVCpu The cross context virtual CPU structure.
317 * @param idMsr The MSR to check.
318 */
319static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
320{
321 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
322 {
323 switch (idMsr)
324 {
325 case MSR_K8_LSTAR:
326 case MSR_K6_STAR:
327 case MSR_K8_SF_MASK:
328 case MSR_K8_KERNEL_GS_BASE:
329 return true;
330 }
331 }
332 return false;
333}
334
335
336/**
337 * Loads a set of guests MSRs to allow read/passthru to the guest.
338 *
339 * The name of this function is slightly confusing. This function does NOT
340 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
341 * common prefix for functions dealing with "lazy restoration" of the shared
342 * MSRs.
343 *
344 * @param pVCpu The cross context virtual CPU structure.
345 *
346 * @remarks No-long-jump zone!!!
347 */
348static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
349{
350 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
351 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
352
353 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
354 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
355 {
356 /*
357 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
358 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
359 * we can skip a few MSR writes.
360 *
361 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
362 * guest MSR values in the guest-CPU context might be different to what's currently
363 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
364 * CPU, see @bugref{8728}.
365 */
366 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
367 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
368 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
369 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
370 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
371 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
372 {
373#ifdef VBOX_STRICT
374 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
375 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
376 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
377 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
378#endif
379 }
380 else
381 {
382 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
383 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
384 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
385 /* The system call flag mask register isn't as benign and accepting of all
386 values as the above, so mask it to avoid #GP'ing on corrupted input. */
387 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
388 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
389 }
390 }
391 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
392}
393
394
395/**
396 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
397 *
398 * @returns @c true if found, @c false otherwise.
399 * @param pVmcsInfo The VMCS info. object.
400 * @param idMsr The MSR to find.
401 */
402static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
403{
404 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
405 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
406 Assert(pMsrs);
407 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
408 for (uint32_t i = 0; i < cMsrs; i++)
409 {
410 if (pMsrs[i].u32Msr == idMsr)
411 return true;
412 }
413 return false;
414}
415
416
417/**
418 * Performs lazy restoration of the set of host MSRs if they were previously
419 * loaded with guest MSR values.
420 *
421 * @param pVCpu The cross context virtual CPU structure.
422 *
423 * @remarks No-long-jump zone!!!
424 * @remarks The guest MSRs should have been saved back into the guest-CPU
425 * context by vmxHCImportGuestState()!!!
426 */
427static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
428{
429 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
430 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
431
432 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
433 {
434 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
435 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
436 {
437 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
438 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
439 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
440 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
441 }
442 }
443 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
444}
445
446
447/**
448 * Sets pfnStartVm to the best suited variant.
449 *
450 * This must be called whenever anything changes relative to the hmR0VmXStartVm
451 * variant selection:
452 * - pVCpu->hm.s.fLoadSaveGuestXcr0
453 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
454 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
455 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
456 * - Perhaps: CPUMCTX.fXStateMask (windows only)
457 *
458 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
459 * cannot be changed at runtime.
460 */
461static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
462{
463 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
464 {
465 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
466 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
467 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
468 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
469 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
470 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
471 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
472 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
473 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
474 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
475 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
476 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
477 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
478 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
479 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
480 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
481 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
482 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
483 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
484 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
485 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
486 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
487 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
488 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
489 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
490 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
491 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
492 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
493 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
494 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
495 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
496 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
497 };
498 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
499 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
500 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
501 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
502 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
503 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
504 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
505 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
506}
507
508
509/**
510 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
511 * stack.
512 *
513 * @returns Strict VBox status code (i.e. informational status codes too).
514 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
515 * @param pVCpu The cross context virtual CPU structure.
516 * @param uValue The value to push to the guest stack.
517 */
518static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
519{
520 /*
521 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
522 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
523 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
524 */
525 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
526 if (pCtx->sp == 1)
527 return VINF_EM_RESET;
528 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
529 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
530 AssertRC(rc);
531 return rc;
532}
533
534
535/**
536 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
537 * unreferenced local parameters in the template code...
538 */
539DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
540{
541 RT_NOREF(pVCpu);
542 return VMXWriteVmcs16(uFieldEnc, u16Val);
543}
544
545
546/**
547 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
548 * unreferenced local parameters in the template code...
549 */
550DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
551{
552 RT_NOREF(pVCpu);
553 return VMXWriteVmcs32(uFieldEnc, u32Val);
554}
555
556
557/**
558 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
559 * unreferenced local parameters in the template code...
560 */
561DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
562{
563 RT_NOREF(pVCpu);
564 return VMXWriteVmcs64(uFieldEnc, u64Val);
565}
566
567
568/**
569 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
570 * unreferenced local parameters in the template code...
571 */
572DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
573{
574 RT_NOREF(pVCpu);
575 return VMXReadVmcs16(uFieldEnc, pu16Val);
576}
577
578
579/**
580 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
581 * unreferenced local parameters in the template code...
582 */
583DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
584{
585 RT_NOREF(pVCpu);
586 return VMXReadVmcs32(uFieldEnc, pu32Val);
587}
588
589
590/**
591 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
592 * unreferenced local parameters in the template code...
593 */
594DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
595{
596 RT_NOREF(pVCpu);
597 return VMXReadVmcs64(uFieldEnc, pu64Val);
598}
599
600
601/*
602 * Instantiate the code we share with the NEM darwin backend.
603 */
604#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
605#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
606
607#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
608#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
609#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
610#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
611
612#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
613#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
614#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
615#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
616
617#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
618#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
619#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
620#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
621
622#include "../VMMAll/VMXAllTemplate.cpp.h"
623
624#undef VMX_VMCS_WRITE_16
625#undef VMX_VMCS_WRITE_32
626#undef VMX_VMCS_WRITE_64
627#undef VMX_VMCS_WRITE_NW
628
629#undef VMX_VMCS_READ_16
630#undef VMX_VMCS_READ_32
631#undef VMX_VMCS_READ_64
632#undef VMX_VMCS_READ_NW
633
634#undef VM_IS_VMX_PREEMPT_TIMER_USED
635#undef VM_IS_VMX_NESTED_PAGING
636#undef VM_IS_VMX_UNRESTRICTED_GUEST
637#undef VCPU_2_VMXSTATS
638#undef VCPU_2_VMXSTATE
639
640
641/**
642 * Updates the VM's last error record.
643 *
644 * If there was a VMX instruction error, reads the error data from the VMCS and
645 * updates VCPU's last error record as well.
646 *
647 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
648 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
649 * VERR_VMX_INVALID_VMCS_FIELD.
650 * @param rc The error code.
651 */
652static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
653{
654 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
655 || rc == VERR_VMX_UNABLE_TO_START_VM)
656 {
657 AssertPtrReturnVoid(pVCpu);
658 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
659 }
660 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
661}
662
663
664/**
665 * Enters VMX root mode operation on the current CPU.
666 *
667 * @returns VBox status code.
668 * @param pHostCpu The HM physical-CPU structure.
669 * @param pVM The cross context VM structure. Can be
670 * NULL, after a resume.
671 * @param HCPhysCpuPage Physical address of the VMXON region.
672 * @param pvCpuPage Pointer to the VMXON region.
673 */
674static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
675{
676 Assert(pHostCpu);
677 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
678 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
679 Assert(pvCpuPage);
680 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
681
682 if (pVM)
683 {
684 /* Write the VMCS revision identifier to the VMXON region. */
685 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
686 }
687
688 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
689 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
690
691 /* Enable the VMX bit in CR4 if necessary. */
692 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
693
694 /* Record whether VMXE was already prior to us enabling it above. */
695 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
696
697 /* Enter VMX root mode. */
698 int rc = VMXEnable(HCPhysCpuPage);
699 if (RT_FAILURE(rc))
700 {
701 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
702 if (!pHostCpu->fVmxeAlreadyEnabled)
703 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
704
705 if (pVM)
706 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
707 }
708
709 /* Restore interrupts. */
710 ASMSetFlags(fEFlags);
711 return rc;
712}
713
714
715/**
716 * Exits VMX root mode operation on the current CPU.
717 *
718 * @returns VBox status code.
719 * @param pHostCpu The HM physical-CPU structure.
720 */
721static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
722{
723 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
724
725 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
726 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
727
728 /* If we're for some reason not in VMX root mode, then don't leave it. */
729 RTCCUINTREG const uHostCr4 = ASMGetCR4();
730
731 int rc;
732 if (uHostCr4 & X86_CR4_VMXE)
733 {
734 /* Exit VMX root mode and clear the VMX bit in CR4. */
735 VMXDisable();
736
737 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
738 if (!pHostCpu->fVmxeAlreadyEnabled)
739 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
740
741 rc = VINF_SUCCESS;
742 }
743 else
744 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
745
746 /* Restore interrupts. */
747 ASMSetFlags(fEFlags);
748 return rc;
749}
750
751
752/**
753 * Allocates pages specified as specified by an array of VMX page allocation info
754 * objects.
755 *
756 * The pages contents are zero'd after allocation.
757 *
758 * @returns VBox status code.
759 * @param phMemObj Where to return the handle to the allocation.
760 * @param paAllocInfo The pointer to the first element of the VMX
761 * page-allocation info object array.
762 * @param cEntries The number of elements in the @a paAllocInfo array.
763 */
764static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
765{
766 *phMemObj = NIL_RTR0MEMOBJ;
767
768 /* Figure out how many pages to allocate. */
769 uint32_t cPages = 0;
770 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
771 cPages += !!paAllocInfo[iPage].fValid;
772
773 /* Allocate the pages. */
774 if (cPages)
775 {
776 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
777 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
778 if (RT_FAILURE(rc))
779 return rc;
780
781 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
782 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
783 RT_BZERO(pvFirstPage, cbPages);
784
785 uint32_t iPage = 0;
786 for (uint32_t i = 0; i < cEntries; i++)
787 if (paAllocInfo[i].fValid)
788 {
789 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
790 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
791 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
792 AssertPtr(pvPage);
793
794 Assert(paAllocInfo[iPage].pHCPhys);
795 Assert(paAllocInfo[iPage].ppVirt);
796 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
797 *paAllocInfo[iPage].ppVirt = pvPage;
798
799 /* Move to next page. */
800 ++iPage;
801 }
802
803 /* Make sure all valid (requested) pages have been assigned. */
804 Assert(iPage == cPages);
805 }
806 return VINF_SUCCESS;
807}
808
809
810/**
811 * Frees pages allocated using hmR0VmxPagesAllocZ.
812 *
813 * @param phMemObj Pointer to the memory object handle. Will be set to
814 * NIL.
815 */
816DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
817{
818 /* We can cleanup wholesale since it's all one allocation. */
819 if (*phMemObj != NIL_RTR0MEMOBJ)
820 {
821 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
822 *phMemObj = NIL_RTR0MEMOBJ;
823 }
824}
825
826
827/**
828 * Initializes a VMCS info. object.
829 *
830 * @param pVmcsInfo The VMCS info. object.
831 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
832 */
833static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
834{
835 RT_ZERO(*pVmcsInfo);
836 RT_ZERO(*pVmcsInfoShared);
837
838 pVmcsInfo->pShared = pVmcsInfoShared;
839 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
840 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
841 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
842 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
843 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
844 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
845 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
846 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
847 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
848 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
849 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
850 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
851}
852
853
854/**
855 * Frees the VT-x structures for a VMCS info. object.
856 *
857 * @param pVmcsInfo The VMCS info. object.
858 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
859 */
860static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
861{
862 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
863 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
864}
865
866
867/**
868 * Allocates the VT-x structures for a VMCS info. object.
869 *
870 * @returns VBox status code.
871 * @param pVCpu The cross context virtual CPU structure.
872 * @param pVmcsInfo The VMCS info. object.
873 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
874 *
875 * @remarks The caller is expected to take care of any and all allocation failures.
876 * This function will not perform any cleanup for failures half-way
877 * through.
878 */
879static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
880{
881 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
882
883 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
884 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
885 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
886 VMXPAGEALLOCINFO aAllocInfo[] =
887 {
888 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
889 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
890 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
891 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
892 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
893 };
894
895 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
896 if (RT_FAILURE(rc))
897 return rc;
898
899 /*
900 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
901 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
902 */
903 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
904 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
905 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
906 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
907
908 /*
909 * Get the virtual-APIC page rather than allocating them again.
910 */
911 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
912 {
913 if (!fIsNstGstVmcs)
914 {
915 if (PDMHasApic(pVM))
916 {
917 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
918 if (RT_FAILURE(rc))
919 return rc;
920 Assert(pVmcsInfo->pbVirtApic);
921 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
922 }
923 }
924 else
925 {
926 /* These are setup later while marging the nested-guest VMCS. */
927 Assert(pVmcsInfo->pbVirtApic == NULL);
928 Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS);
929 }
930 }
931
932 return VINF_SUCCESS;
933}
934
935
936/**
937 * Free all VT-x structures for the VM.
938 *
939 * @param pVM The cross context VM structure.
940 */
941static void hmR0VmxStructsFree(PVMCC pVM)
942{
943 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
944#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
945 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
946 {
947 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
948 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
949 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
950 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
951 }
952#endif
953
954 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
955 {
956 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
957 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
958#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
959 if (pVM->cpum.ro.GuestFeatures.fVmx)
960 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
961#endif
962 }
963}
964
965
966/**
967 * Allocate all VT-x structures for the VM.
968 *
969 * @returns IPRT status code.
970 * @param pVM The cross context VM structure.
971 *
972 * @remarks This functions will cleanup on memory allocation failures.
973 */
974static int hmR0VmxStructsAlloc(PVMCC pVM)
975{
976 /*
977 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
978 * The VMCS size cannot be more than 4096 bytes.
979 *
980 * See Intel spec. Appendix A.1 "Basic VMX Information".
981 */
982 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
983 if (cbVmcs <= X86_PAGE_4K_SIZE)
984 { /* likely */ }
985 else
986 {
987 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
988 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
989 }
990
991 /*
992 * Allocate per-VM VT-x structures.
993 */
994 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
995 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
996 VMXPAGEALLOCINFO aAllocInfo[] =
997 {
998 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
999 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
1000 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
1001#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1002 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
1003#endif
1004 };
1005
1006 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1007 if (RT_SUCCESS(rc))
1008 {
1009#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1010 /* Allocate the shadow VMCS-fields array. */
1011 if (fUseVmcsShadowing)
1012 {
1013 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1014 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1015 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1016 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1017 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1018 rc = VERR_NO_MEMORY;
1019 }
1020#endif
1021
1022 /*
1023 * Allocate per-VCPU VT-x structures.
1024 */
1025 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1026 {
1027 /* Allocate the guest VMCS structures. */
1028 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1029 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1030
1031#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1032 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1033 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1034 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1035#endif
1036 }
1037 if (RT_SUCCESS(rc))
1038 return VINF_SUCCESS;
1039 }
1040 hmR0VmxStructsFree(pVM);
1041 return rc;
1042}
1043
1044
1045/**
1046 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1047 *
1048 * @param pVM The cross context VM structure.
1049 */
1050static void hmR0VmxStructsInit(PVMCC pVM)
1051{
1052 /* Paranoia. */
1053 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1054#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1055 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1056#endif
1057
1058 /*
1059 * Initialize members up-front so we can cleanup en masse on allocation failures.
1060 */
1061#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1062 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1063#endif
1064 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1065 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1066 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1067 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1068 {
1069 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1070 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1071 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1072 }
1073}
1074
1075#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1076/**
1077 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1078 *
1079 * @returns @c true if the MSR is intercepted, @c false otherwise.
1080 * @param pbMsrBitmap The MSR bitmap.
1081 * @param offMsr The MSR byte offset.
1082 * @param iBit The bit offset from the byte offset.
1083 */
1084DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1085{
1086 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1087 return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit);
1088}
1089#endif
1090
1091/**
1092 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1093 *
1094 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1095 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1096 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1097 * the read/write access of this MSR.
1098 *
1099 * @param pVCpu The cross context virtual CPU structure.
1100 * @param pVmcsInfo The VMCS info. object.
1101 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1102 * @param idMsr The MSR value.
1103 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1104 * include both a read -and- a write permission!
1105 *
1106 * @sa CPUMGetVmxMsrPermission.
1107 * @remarks Can be called with interrupts disabled.
1108 */
1109static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1110{
1111 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1112 Assert(pbMsrBitmap);
1113 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1114
1115 /*
1116 * MSR-bitmap Layout:
1117 * Byte index MSR range Interpreted as
1118 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1119 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1120 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1121 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1122 *
1123 * A bit corresponding to an MSR within the above range causes a VM-exit
1124 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1125 * the MSR range, it always cause a VM-exit.
1126 *
1127 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1128 */
1129 uint16_t const offBitmapRead = 0;
1130 uint16_t const offBitmapWrite = 0x800;
1131 uint16_t offMsr;
1132 int32_t iBit;
1133 if (idMsr <= UINT32_C(0x00001fff))
1134 {
1135 offMsr = 0;
1136 iBit = idMsr;
1137 }
1138 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1139 {
1140 offMsr = 0x400;
1141 iBit = idMsr - UINT32_C(0xc0000000);
1142 }
1143 else
1144 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1145
1146 /*
1147 * Set the MSR read permission.
1148 */
1149 uint16_t const offMsrRead = offBitmapRead + offMsr;
1150 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1151 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1152 {
1153#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1154 bool const fClear = !fIsNstGstVmcs ? true
1155 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1156#else
1157 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1158 bool const fClear = true;
1159#endif
1160 if (fClear)
1161 ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit);
1162 }
1163 else
1164 ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit);
1165
1166 /*
1167 * Set the MSR write permission.
1168 */
1169 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1170 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1171 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1172 {
1173#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1174 bool const fClear = !fIsNstGstVmcs ? true
1175 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1176#else
1177 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1178 bool const fClear = true;
1179#endif
1180 if (fClear)
1181 ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1182 }
1183 else
1184 ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1185}
1186
1187
1188/**
1189 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1190 * area.
1191 *
1192 * @returns VBox status code.
1193 * @param pVCpu The cross context virtual CPU structure.
1194 * @param pVmcsInfo The VMCS info. object.
1195 * @param cMsrs The number of MSRs.
1196 */
1197static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1198{
1199 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1200 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1201 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1202 {
1203 /* Commit the MSR counts to the VMCS and update the cache. */
1204 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1205 {
1206 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1207 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1208 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1209 pVmcsInfo->cEntryMsrLoad = cMsrs;
1210 pVmcsInfo->cExitMsrStore = cMsrs;
1211 pVmcsInfo->cExitMsrLoad = cMsrs;
1212 }
1213 return VINF_SUCCESS;
1214 }
1215
1216 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1217 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1218 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1219}
1220
1221
1222/**
1223 * Adds a new (or updates the value of an existing) guest/host MSR
1224 * pair to be swapped during the world-switch as part of the
1225 * auto-load/store MSR area in the VMCS.
1226 *
1227 * @returns VBox status code.
1228 * @param pVCpu The cross context virtual CPU structure.
1229 * @param pVmxTransient The VMX-transient structure.
1230 * @param idMsr The MSR.
1231 * @param uGuestMsrValue Value of the guest MSR.
1232 * @param fSetReadWrite Whether to set the guest read/write access of this
1233 * MSR (thus not causing a VM-exit).
1234 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1235 * necessary.
1236 */
1237static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1238 bool fSetReadWrite, bool fUpdateHostMsr)
1239{
1240 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1241 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1242 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1243 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1244 uint32_t i;
1245
1246 /* Paranoia. */
1247 Assert(pGuestMsrLoad);
1248
1249#ifndef DEBUG_bird
1250 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1251#endif
1252
1253 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1254 for (i = 0; i < cMsrs; i++)
1255 {
1256 if (pGuestMsrLoad[i].u32Msr == idMsr)
1257 break;
1258 }
1259
1260 bool fAdded = false;
1261 if (i == cMsrs)
1262 {
1263 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1264 ++cMsrs;
1265 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1266 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1267
1268 /* Set the guest to read/write this MSR without causing VM-exits. */
1269 if ( fSetReadWrite
1270 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1271 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1272
1273 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1274 fAdded = true;
1275 }
1276
1277 /* Update the MSR value for the newly added or already existing MSR. */
1278 pGuestMsrLoad[i].u32Msr = idMsr;
1279 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1280
1281 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1282 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1283 {
1284 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1285 pGuestMsrStore[i].u32Msr = idMsr;
1286 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1287 }
1288
1289 /* Update the corresponding slot in the host MSR area. */
1290 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1291 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1292 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1293 pHostMsr[i].u32Msr = idMsr;
1294
1295 /*
1296 * Only if the caller requests to update the host MSR value AND we've newly added the
1297 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1298 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1299 *
1300 * We do this for performance reasons since reading MSRs may be quite expensive.
1301 */
1302 if (fAdded)
1303 {
1304 if (fUpdateHostMsr)
1305 {
1306 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1307 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1308 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1309 }
1310 else
1311 {
1312 /* Someone else can do the work. */
1313 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1314 }
1315 }
1316 return VINF_SUCCESS;
1317}
1318
1319
1320/**
1321 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1322 * auto-load/store MSR area in the VMCS.
1323 *
1324 * @returns VBox status code.
1325 * @param pVCpu The cross context virtual CPU structure.
1326 * @param pVmxTransient The VMX-transient structure.
1327 * @param idMsr The MSR.
1328 */
1329static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1330{
1331 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1332 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1333 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1334 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1335
1336#ifndef DEBUG_bird
1337 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1338#endif
1339
1340 for (uint32_t i = 0; i < cMsrs; i++)
1341 {
1342 /* Find the MSR. */
1343 if (pGuestMsrLoad[i].u32Msr == idMsr)
1344 {
1345 /*
1346 * If it's the last MSR, we only need to reduce the MSR count.
1347 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1348 */
1349 if (i < cMsrs - 1)
1350 {
1351 /* Remove it from the VM-entry MSR-load area. */
1352 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1353 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1354
1355 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1356 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1357 {
1358 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1359 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1360 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1361 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1362 }
1363
1364 /* Remove it from the VM-exit MSR-load area. */
1365 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1366 Assert(pHostMsr[i].u32Msr == idMsr);
1367 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1368 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1369 }
1370
1371 /* Reduce the count to reflect the removed MSR and bail. */
1372 --cMsrs;
1373 break;
1374 }
1375 }
1376
1377 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1378 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1379 {
1380 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1381 AssertRCReturn(rc, rc);
1382
1383 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1384 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1385 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1386
1387 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1388 return VINF_SUCCESS;
1389 }
1390
1391 return VERR_NOT_FOUND;
1392}
1393
1394
1395/**
1396 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1397 *
1398 * @param pVCpu The cross context virtual CPU structure.
1399 * @param pVmcsInfo The VMCS info. object.
1400 *
1401 * @remarks No-long-jump zone!!!
1402 */
1403static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1404{
1405 RT_NOREF(pVCpu);
1406 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1407
1408 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1409 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1410 Assert(pHostMsrLoad);
1411 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1412 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1413 for (uint32_t i = 0; i < cMsrs; i++)
1414 {
1415 /*
1416 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1417 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1418 */
1419 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1420 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1421 else
1422 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1423 }
1424}
1425
1426
1427/**
1428 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1429 * perform lazy restoration of the host MSRs while leaving VT-x.
1430 *
1431 * @param pVCpu The cross context virtual CPU structure.
1432 *
1433 * @remarks No-long-jump zone!!!
1434 */
1435static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1436{
1437 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1438
1439 /*
1440 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1441 */
1442 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1443 {
1444 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1445 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1446 {
1447 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1448 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1449 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1450 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1451 }
1452 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1453 }
1454}
1455
1456
1457#ifdef VBOX_STRICT
1458
1459/**
1460 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1461 *
1462 * @param pVmcsInfo The VMCS info. object.
1463 */
1464static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1465{
1466 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1467
1468 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1469 {
1470 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1471 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1472 uint64_t uVmcsEferMsrVmcs;
1473 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1474 AssertRC(rc);
1475
1476 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1477 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1478 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1479 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1480 }
1481}
1482
1483
1484/**
1485 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1486 * VMCS are correct.
1487 *
1488 * @param pVCpu The cross context virtual CPU structure.
1489 * @param pVmcsInfo The VMCS info. object.
1490 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1491 */
1492static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1493{
1494 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1495
1496 /* Read the various MSR-area counts from the VMCS. */
1497 uint32_t cEntryLoadMsrs;
1498 uint32_t cExitStoreMsrs;
1499 uint32_t cExitLoadMsrs;
1500 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1501 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1502 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1503
1504 /* Verify all the MSR counts are the same. */
1505 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1506 Assert(cExitStoreMsrs == cExitLoadMsrs);
1507 uint32_t const cMsrs = cExitLoadMsrs;
1508
1509 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1510 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1511
1512 /* Verify the MSR counts are within the allocated page size. */
1513 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1514
1515 /* Verify the relevant contents of the MSR areas match. */
1516 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1517 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1518 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1519 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1520 for (uint32_t i = 0; i < cMsrs; i++)
1521 {
1522 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1523 if (fSeparateExitMsrStorePage)
1524 {
1525 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1526 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1527 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1528 }
1529
1530 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1531 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1532 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1533
1534 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1535 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1536 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1537 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1538
1539 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1540 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1541 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1542 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1543
1544 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1545 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1546 {
1547 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1548 if (fIsEferMsr)
1549 {
1550 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1551 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1552 }
1553 else
1554 {
1555 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1556 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1557 if ( pVM->hmr0.s.vmx.fLbr
1558 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1559 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1560 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1561 {
1562 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1563 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1564 pGuestMsrLoad->u32Msr, cMsrs));
1565 }
1566 else if (!fIsNstGstVmcs)
1567 {
1568 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1569 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1570 }
1571 else
1572 {
1573 /*
1574 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1575 * execute a nested-guest with MSR passthrough.
1576 *
1577 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1578 * allow passthrough too.
1579 */
1580 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1581 Assert(pvMsrBitmapNstGst);
1582 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1583 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1584 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1585 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1586 }
1587 }
1588 }
1589
1590 /* Move to the next MSR. */
1591 pHostMsrLoad++;
1592 pGuestMsrLoad++;
1593 pGuestMsrStore++;
1594 }
1595}
1596
1597#endif /* VBOX_STRICT */
1598
1599/**
1600 * Flushes the TLB using EPT.
1601 *
1602 * @param pVCpu The cross context virtual CPU structure of the calling
1603 * EMT. Can be NULL depending on @a enmTlbFlush.
1604 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1605 * enmTlbFlush.
1606 * @param enmTlbFlush Type of flush.
1607 *
1608 * @remarks Caller is responsible for making sure this function is called only
1609 * when NestedPaging is supported and providing @a enmTlbFlush that is
1610 * supported by the CPU.
1611 * @remarks Can be called with interrupts disabled.
1612 */
1613static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1614{
1615 uint64_t au64Descriptor[2];
1616 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1617 au64Descriptor[0] = 0;
1618 else
1619 {
1620 Assert(pVCpu);
1621 Assert(pVmcsInfo);
1622 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1623 }
1624 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1625
1626 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1627 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1628
1629 if ( RT_SUCCESS(rc)
1630 && pVCpu)
1631 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1632}
1633
1634
1635/**
1636 * Flushes the TLB using VPID.
1637 *
1638 * @param pVCpu The cross context virtual CPU structure of the calling
1639 * EMT. Can be NULL depending on @a enmTlbFlush.
1640 * @param enmTlbFlush Type of flush.
1641 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1642 * on @a enmTlbFlush).
1643 *
1644 * @remarks Can be called with interrupts disabled.
1645 */
1646static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1647{
1648 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1649
1650 uint64_t au64Descriptor[2];
1651 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1652 {
1653 au64Descriptor[0] = 0;
1654 au64Descriptor[1] = 0;
1655 }
1656 else
1657 {
1658 AssertPtr(pVCpu);
1659 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1660 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1661 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1662 au64Descriptor[1] = GCPtr;
1663 }
1664
1665 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1666 AssertMsg(rc == VINF_SUCCESS,
1667 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1668
1669 if ( RT_SUCCESS(rc)
1670 && pVCpu)
1671 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1672 NOREF(rc);
1673}
1674
1675
1676/**
1677 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1678 * otherwise there is nothing really to invalidate.
1679 *
1680 * @returns VBox status code.
1681 * @param pVCpu The cross context virtual CPU structure.
1682 * @param GCVirt Guest virtual address of the page to invalidate.
1683 */
1684VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1685{
1686 AssertPtr(pVCpu);
1687 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1688
1689 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1690 {
1691 /*
1692 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1693 * the EPT case. See @bugref{6043} and @bugref{6177}.
1694 *
1695 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1696 * as this function maybe called in a loop with individual addresses.
1697 */
1698 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1699 if (pVM->hmr0.s.vmx.fVpid)
1700 {
1701 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1702 {
1703 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1704 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1705 }
1706 else
1707 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1708 }
1709 else if (pVM->hmr0.s.fNestedPaging)
1710 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1711 }
1712
1713 return VINF_SUCCESS;
1714}
1715
1716
1717/**
1718 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1719 * case where neither EPT nor VPID is supported by the CPU.
1720 *
1721 * @param pHostCpu The HM physical-CPU structure.
1722 * @param pVCpu The cross context virtual CPU structure.
1723 *
1724 * @remarks Called with interrupts disabled.
1725 */
1726static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1727{
1728 AssertPtr(pVCpu);
1729 AssertPtr(pHostCpu);
1730
1731 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1732
1733 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1734 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1735 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1736 pVCpu->hmr0.s.fForceTLBFlush = false;
1737 return;
1738}
1739
1740
1741/**
1742 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1743 *
1744 * @param pHostCpu The HM physical-CPU structure.
1745 * @param pVCpu The cross context virtual CPU structure.
1746 * @param pVmcsInfo The VMCS info. object.
1747 *
1748 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1749 * nomenclature. The reason is, to avoid confusion in compare statements
1750 * since the host-CPU copies are named "ASID".
1751 *
1752 * @remarks Called with interrupts disabled.
1753 */
1754static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1755{
1756#ifdef VBOX_WITH_STATISTICS
1757 bool fTlbFlushed = false;
1758# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1759# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1760 if (!fTlbFlushed) \
1761 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1762 } while (0)
1763#else
1764# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1765# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1766#endif
1767
1768 AssertPtr(pVCpu);
1769 AssertPtr(pHostCpu);
1770 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1771
1772 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1773 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1774 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1775 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1776
1777 /*
1778 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1779 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1780 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1781 * cannot reuse the current ASID anymore.
1782 */
1783 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1784 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1785 {
1786 ++pHostCpu->uCurrentAsid;
1787 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1788 {
1789 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1790 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1791 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1792 }
1793
1794 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1795 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1796 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1797
1798 /*
1799 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1800 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1801 */
1802 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1803 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1804 HMVMX_SET_TAGGED_TLB_FLUSHED();
1805 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1806 }
1807 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1808 {
1809 /*
1810 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1811 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1812 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1813 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1814 * mappings, see @bugref{6568}.
1815 *
1816 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1817 */
1818 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1819 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1820 HMVMX_SET_TAGGED_TLB_FLUSHED();
1821 }
1822 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1823 {
1824 /*
1825 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1826 * address which requires flushing the TLB of EPT cached structures.
1827 *
1828 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1829 */
1830 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1831 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1832 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1833 HMVMX_SET_TAGGED_TLB_FLUSHED();
1834 }
1835
1836
1837 pVCpu->hmr0.s.fForceTLBFlush = false;
1838 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1839
1840 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1841 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1842 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1843 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1844 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1845 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1846 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1847 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1848 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1849
1850 /* Update VMCS with the VPID. */
1851 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1852 AssertRC(rc);
1853
1854#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1855}
1856
1857
1858/**
1859 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1860 *
1861 * @param pHostCpu The HM physical-CPU structure.
1862 * @param pVCpu The cross context virtual CPU structure.
1863 * @param pVmcsInfo The VMCS info. object.
1864 *
1865 * @remarks Called with interrupts disabled.
1866 */
1867static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1868{
1869 AssertPtr(pVCpu);
1870 AssertPtr(pHostCpu);
1871 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1872 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1873 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1874
1875 /*
1876 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1877 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1878 */
1879 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1880 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1881 {
1882 pVCpu->hmr0.s.fForceTLBFlush = true;
1883 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1884 }
1885
1886 /* Check for explicit TLB flushes. */
1887 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1888 {
1889 pVCpu->hmr0.s.fForceTLBFlush = true;
1890 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1891 }
1892
1893 /* Check for TLB flushes while switching to/from a nested-guest. */
1894 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1895 {
1896 pVCpu->hmr0.s.fForceTLBFlush = true;
1897 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1898 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1899 }
1900
1901 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1902 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1903
1904 if (pVCpu->hmr0.s.fForceTLBFlush)
1905 {
1906 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1907 pVCpu->hmr0.s.fForceTLBFlush = false;
1908 }
1909}
1910
1911
1912/**
1913 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1914 *
1915 * @param pHostCpu The HM physical-CPU structure.
1916 * @param pVCpu The cross context virtual CPU structure.
1917 *
1918 * @remarks Called with interrupts disabled.
1919 */
1920static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1921{
1922 AssertPtr(pVCpu);
1923 AssertPtr(pHostCpu);
1924 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1925 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1926 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1927
1928 /*
1929 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1930 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1931 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1932 * cannot reuse the current ASID anymore.
1933 */
1934 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1935 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1936 {
1937 pVCpu->hmr0.s.fForceTLBFlush = true;
1938 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1939 }
1940
1941 /* Check for explicit TLB flushes. */
1942 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1943 {
1944 /*
1945 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1946 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1947 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1948 * include fExplicitFlush's too) - an obscure corner case.
1949 */
1950 pVCpu->hmr0.s.fForceTLBFlush = true;
1951 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1952 }
1953
1954 /* Check for TLB flushes while switching to/from a nested-guest. */
1955 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1956 {
1957 pVCpu->hmr0.s.fForceTLBFlush = true;
1958 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1959 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1960 }
1961
1962 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1963 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1964 if (pVCpu->hmr0.s.fForceTLBFlush)
1965 {
1966 ++pHostCpu->uCurrentAsid;
1967 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1968 {
1969 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1970 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1971 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1972 }
1973
1974 pVCpu->hmr0.s.fForceTLBFlush = false;
1975 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1976 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1977 if (pHostCpu->fFlushAsidBeforeUse)
1978 {
1979 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1980 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1981 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1982 {
1983 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1984 pHostCpu->fFlushAsidBeforeUse = false;
1985 }
1986 else
1987 {
1988 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1989 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1990 }
1991 }
1992 }
1993
1994 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1995 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1996 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1997 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1998 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1999 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
2000 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
2001
2002 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
2003 AssertRC(rc);
2004}
2005
2006
2007/**
2008 * Flushes the guest TLB entry based on CPU capabilities.
2009 *
2010 * @param pHostCpu The HM physical-CPU structure.
2011 * @param pVCpu The cross context virtual CPU structure.
2012 * @param pVmcsInfo The VMCS info. object.
2013 *
2014 * @remarks Called with interrupts disabled.
2015 */
2016static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2017{
2018#ifdef HMVMX_ALWAYS_FLUSH_TLB
2019 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2020#endif
2021 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2022 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2023 {
2024 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2025 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2026 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2027 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2028 default:
2029 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2030 break;
2031 }
2032 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2033}
2034
2035
2036/**
2037 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2038 * TLB entries from the host TLB before VM-entry.
2039 *
2040 * @returns VBox status code.
2041 * @param pVM The cross context VM structure.
2042 */
2043static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2044{
2045 /*
2046 * Determine optimal flush type for nested paging.
2047 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2048 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2049 */
2050 if (pVM->hmr0.s.fNestedPaging)
2051 {
2052 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2053 {
2054 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2055 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2056 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2057 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2058 else
2059 {
2060 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2061 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2062 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2063 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2064 }
2065
2066 /* Make sure the write-back cacheable memory type for EPT is supported. */
2067 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2068 {
2069 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2070 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2071 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2072 }
2073
2074 /* EPT requires a page-walk length of 4. */
2075 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2076 {
2077 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2078 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2079 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2080 }
2081 }
2082 else
2083 {
2084 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2085 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2086 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2087 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2088 }
2089 }
2090
2091 /*
2092 * Determine optimal flush type for VPID.
2093 */
2094 if (pVM->hmr0.s.vmx.fVpid)
2095 {
2096 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2097 {
2098 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2099 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2100 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2101 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2102 else
2103 {
2104 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2105 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2106 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2107 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2108 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2109 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2110 pVM->hmr0.s.vmx.fVpid = false;
2111 }
2112 }
2113 else
2114 {
2115 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2116 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2117 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2118 pVM->hmr0.s.vmx.fVpid = false;
2119 }
2120 }
2121
2122 /*
2123 * Setup the handler for flushing tagged-TLBs.
2124 */
2125 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2126 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2127 else if (pVM->hmr0.s.fNestedPaging)
2128 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2129 else if (pVM->hmr0.s.vmx.fVpid)
2130 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2131 else
2132 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2133
2134
2135 /*
2136 * Copy out the result to ring-3.
2137 */
2138 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2139 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2140 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2141 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2142 return VINF_SUCCESS;
2143}
2144
2145
2146/**
2147 * Sets up the LBR MSR ranges based on the host CPU.
2148 *
2149 * @returns VBox status code.
2150 * @param pVM The cross context VM structure.
2151 *
2152 * @sa nemR3DarwinSetupLbrMsrRange
2153 */
2154static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2155{
2156 Assert(pVM->hmr0.s.vmx.fLbr);
2157 uint32_t idLbrFromIpMsrFirst;
2158 uint32_t idLbrFromIpMsrLast;
2159 uint32_t idLbrToIpMsrFirst;
2160 uint32_t idLbrToIpMsrLast;
2161 uint32_t idLbrTosMsr;
2162
2163 /*
2164 * Determine the LBR MSRs supported for this host CPU family and model.
2165 *
2166 * See Intel spec. 17.4.8 "LBR Stack".
2167 * See Intel "Model-Specific Registers" spec.
2168 */
2169 uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
2170 | g_CpumHostFeatures.s.uModel;
2171 switch (uFamilyModel)
2172 {
2173 case 0x0f01: case 0x0f02:
2174 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2175 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2176 idLbrToIpMsrFirst = 0x0;
2177 idLbrToIpMsrLast = 0x0;
2178 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2179 break;
2180
2181 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2182 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2183 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2184 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2185 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2186 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2187 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2188 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2189 break;
2190
2191 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2192 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2193 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2194 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2195 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2196 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2197 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2198 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2199 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2200 break;
2201
2202 case 0x0617: case 0x061d: case 0x060f:
2203 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2204 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2205 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2206 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2207 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2208 break;
2209
2210 /* Atom and related microarchitectures we don't care about:
2211 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2212 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2213 case 0x0636: */
2214 /* All other CPUs: */
2215 default:
2216 {
2217 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2218 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2219 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2220 }
2221 }
2222
2223 /*
2224 * Validate.
2225 */
2226 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2227 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2228 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2229 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2230 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2231 {
2232 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2233 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2234 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2235 }
2236 NOREF(pVCpu0);
2237
2238 /*
2239 * Update the LBR info. to the VM struct. for use later.
2240 */
2241 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2242
2243 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2244 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2245
2246 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2247 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2248 return VINF_SUCCESS;
2249}
2250
2251#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2252
2253/**
2254 * Sets up the shadow VMCS fields arrays.
2255 *
2256 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2257 * executing the guest.
2258 *
2259 * @returns VBox status code.
2260 * @param pVM The cross context VM structure.
2261 */
2262static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2263{
2264 /*
2265 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2266 * when the host does not support it.
2267 */
2268 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2269 if ( !fGstVmwriteAll
2270 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2271 { /* likely. */ }
2272 else
2273 {
2274 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2275 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2276 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2277 }
2278
2279 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2280 uint32_t cRwFields = 0;
2281 uint32_t cRoFields = 0;
2282 for (uint32_t i = 0; i < cVmcsFields; i++)
2283 {
2284 VMXVMCSFIELD VmcsField;
2285 VmcsField.u = g_aVmcsFields[i];
2286
2287 /*
2288 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2289 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2290 * in the shadow VMCS fields array as they would be redundant.
2291 *
2292 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2293 * we must not include it in the shadow VMCS fields array. Guests attempting to
2294 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2295 * the required behavior.
2296 */
2297 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2298 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2299 {
2300 /*
2301 * Read-only fields are placed in a separate array so that while syncing shadow
2302 * VMCS fields later (which is more performance critical) we can avoid branches.
2303 *
2304 * However, if the guest can write to all fields (including read-only fields),
2305 * we treat it a as read/write field. Otherwise, writing to these fields would
2306 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2307 */
2308 if ( fGstVmwriteAll
2309 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2310 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2311 else
2312 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2313 }
2314 }
2315
2316 /* Update the counts. */
2317 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2318 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2319 return VINF_SUCCESS;
2320}
2321
2322
2323/**
2324 * Sets up the VMREAD and VMWRITE bitmaps.
2325 *
2326 * @param pVM The cross context VM structure.
2327 */
2328static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2329{
2330 /*
2331 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2332 */
2333 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2334 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2335 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2336 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2337 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2338
2339 /*
2340 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2341 * VMREAD and VMWRITE bitmaps.
2342 */
2343 {
2344 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2345 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2346 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2347 {
2348 uint32_t const uVmcsField = paShadowVmcsFields[i];
2349 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2350 Assert(uVmcsField >> 3 < cbBitmap);
2351 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2352 ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
2353 }
2354 }
2355
2356 /*
2357 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2358 * if the host supports VMWRITE to all supported VMCS fields.
2359 */
2360 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2361 {
2362 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2363 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2364 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2365 {
2366 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2367 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2368 Assert(uVmcsField >> 3 < cbBitmap);
2369 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2370 }
2371 }
2372}
2373
2374#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2375
2376/**
2377 * Sets up the virtual-APIC page address for the VMCS.
2378 *
2379 * @param pVmcsInfo The VMCS info. object.
2380 */
2381DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2382{
2383 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2384 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2385 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2386 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2387 AssertRC(rc);
2388}
2389
2390
2391/**
2392 * Sets up the MSR-bitmap address for the VMCS.
2393 *
2394 * @param pVmcsInfo The VMCS info. object.
2395 */
2396DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2397{
2398 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2399 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2400 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2401 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2402 AssertRC(rc);
2403}
2404
2405
2406/**
2407 * Sets up the APIC-access page address for the VMCS.
2408 *
2409 * @param pVCpu The cross context virtual CPU structure.
2410 */
2411DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2412{
2413 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2414 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2415 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2416 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2417 AssertRC(rc);
2418}
2419
2420#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2421
2422/**
2423 * Sets up the VMREAD bitmap address for the VMCS.
2424 *
2425 * @param pVCpu The cross context virtual CPU structure.
2426 */
2427DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2428{
2429 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2430 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2431 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2432 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2433 AssertRC(rc);
2434}
2435
2436
2437/**
2438 * Sets up the VMWRITE bitmap address for the VMCS.
2439 *
2440 * @param pVCpu The cross context virtual CPU structure.
2441 */
2442DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2443{
2444 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2445 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2446 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2447 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2448 AssertRC(rc);
2449}
2450
2451#endif
2452
2453/**
2454 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2455 * in the VMCS.
2456 *
2457 * @returns VBox status code.
2458 * @param pVmcsInfo The VMCS info. object.
2459 */
2460DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2461{
2462 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2463 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2464 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2465
2466 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2467 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2468 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2469
2470 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2471 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2472 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2473
2474 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2475 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2476 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2477 return VINF_SUCCESS;
2478}
2479
2480
2481/**
2482 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2483 *
2484 * @param pVCpu The cross context virtual CPU structure.
2485 * @param pVmcsInfo The VMCS info. object.
2486 */
2487static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2488{
2489 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2490
2491 /*
2492 * By default, ensure guest attempts to access any MSR cause VM-exits.
2493 * This shall later be relaxed for specific MSRs as necessary.
2494 *
2495 * Note: For nested-guests, the entire bitmap will be merged prior to
2496 * executing the nested-guest using hardware-assisted VMX and hence there
2497 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2498 */
2499 Assert(pVmcsInfo->pvMsrBitmap);
2500 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2501
2502 /*
2503 * The guest can access the following MSRs (read, write) without causing
2504 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2505 */
2506 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2507 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2508 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2509 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2510 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2511 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2512
2513 /*
2514 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2515 * associated with then. We never need to intercept access (writes need to be
2516 * executed without causing a VM-exit, reads will #GP fault anyway).
2517 *
2518 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2519 * read/write them. We swap the guest/host MSR value using the
2520 * auto-load/store MSR area.
2521 */
2522 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2523 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2524 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2525 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2526 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2527 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2528
2529 /*
2530 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2531 * required for 64-bit guests.
2532 */
2533 if (pVM->hmr0.s.fAllow64BitGuests)
2534 {
2535 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2536 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2537 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2538 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2539 }
2540
2541 /*
2542 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2543 */
2544#ifdef VBOX_STRICT
2545 Assert(pVmcsInfo->pvMsrBitmap);
2546 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2547 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2548#endif
2549}
2550
2551
2552/**
2553 * Sets up pin-based VM-execution controls in the VMCS.
2554 *
2555 * @returns VBox status code.
2556 * @param pVCpu The cross context virtual CPU structure.
2557 * @param pVmcsInfo The VMCS info. object.
2558 */
2559static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2560{
2561 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2562 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2563 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2564
2565 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2566 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2567
2568 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2569 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2570
2571 /* Enable the VMX-preemption timer. */
2572 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2573 {
2574 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2575 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2576 }
2577
2578#if 0
2579 /* Enable posted-interrupt processing. */
2580 if (pVM->hm.s.fPostedIntrs)
2581 {
2582 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2583 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2584 fVal |= VMX_PIN_CTLS_POSTED_INT;
2585 }
2586#endif
2587
2588 if ((fVal & fZap) != fVal)
2589 {
2590 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2591 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2592 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2593 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2594 }
2595
2596 /* Commit it to the VMCS and update our cache. */
2597 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2598 AssertRC(rc);
2599 pVmcsInfo->u32PinCtls = fVal;
2600
2601 return VINF_SUCCESS;
2602}
2603
2604
2605/**
2606 * Sets up secondary processor-based VM-execution controls in the VMCS.
2607 *
2608 * @returns VBox status code.
2609 * @param pVCpu The cross context virtual CPU structure.
2610 * @param pVmcsInfo The VMCS info. object.
2611 */
2612static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2613{
2614 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2615 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2616 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2617
2618 /* WBINVD causes a VM-exit. */
2619 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2620 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2621
2622 /* Enable EPT (aka nested-paging). */
2623 if (pVM->hmr0.s.fNestedPaging)
2624 fVal |= VMX_PROC_CTLS2_EPT;
2625
2626 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2627 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2628 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2629 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2630 fVal |= VMX_PROC_CTLS2_INVPCID;
2631
2632 /* Enable VPID. */
2633 if (pVM->hmr0.s.vmx.fVpid)
2634 fVal |= VMX_PROC_CTLS2_VPID;
2635
2636 /* Enable unrestricted guest execution. */
2637 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2638 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2639
2640#if 0
2641 if (pVM->hm.s.fVirtApicRegs)
2642 {
2643 /* Enable APIC-register virtualization. */
2644 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2645 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2646
2647 /* Enable virtual-interrupt delivery. */
2648 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2649 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2650 }
2651#endif
2652
2653 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2654 where the TPR shadow resides. */
2655 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2656 * done dynamically. */
2657 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2658 {
2659 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2660 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2661 }
2662
2663 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2664 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2665 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2666 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2667 fVal |= VMX_PROC_CTLS2_RDTSCP;
2668
2669 /* Enable Pause-Loop exiting. */
2670 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2671 && pVM->hm.s.vmx.cPleGapTicks
2672 && pVM->hm.s.vmx.cPleWindowTicks)
2673 {
2674 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2675
2676 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2677 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2678 }
2679
2680 if ((fVal & fZap) != fVal)
2681 {
2682 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2683 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2684 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2685 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2686 }
2687
2688 /* Commit it to the VMCS and update our cache. */
2689 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2690 AssertRC(rc);
2691 pVmcsInfo->u32ProcCtls2 = fVal;
2692
2693 return VINF_SUCCESS;
2694}
2695
2696
2697/**
2698 * Sets up processor-based VM-execution controls in the VMCS.
2699 *
2700 * @returns VBox status code.
2701 * @param pVCpu The cross context virtual CPU structure.
2702 * @param pVmcsInfo The VMCS info. object.
2703 */
2704static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2705{
2706 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2707 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2708 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2709
2710 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2711 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2712 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2713 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2714 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2715 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2716 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2717
2718 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2719 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2720 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2721 {
2722 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2723 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2724 }
2725
2726 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2727 if (!pVM->hmr0.s.fNestedPaging)
2728 {
2729 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2730 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2731 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2732 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2733 }
2734
2735 /* Use TPR shadowing if supported by the CPU. */
2736 if ( PDMHasApic(pVM)
2737 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2738 {
2739 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2740 /* CR8 writes cause a VM-exit based on TPR threshold. */
2741 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2742 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2743 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2744 }
2745 else
2746 {
2747 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2748 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2749 if (pVM->hmr0.s.fAllow64BitGuests)
2750 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2751 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2752 }
2753
2754 /* Use MSR-bitmaps if supported by the CPU. */
2755 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2756 {
2757 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2758 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2759 }
2760
2761 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2762 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2763 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2764
2765 if ((fVal & fZap) != fVal)
2766 {
2767 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2768 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2769 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2770 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2771 }
2772
2773 /* Commit it to the VMCS and update our cache. */
2774 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2775 AssertRC(rc);
2776 pVmcsInfo->u32ProcCtls = fVal;
2777
2778 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2779 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2780 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2781
2782 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2783 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2784 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2785
2786 /* Sanity check, should not really happen. */
2787 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2788 { /* likely */ }
2789 else
2790 {
2791 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2792 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2793 }
2794
2795 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2796 return VINF_SUCCESS;
2797}
2798
2799
2800/**
2801 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2802 * Processor-based VM-execution) control fields in the VMCS.
2803 *
2804 * @returns VBox status code.
2805 * @param pVCpu The cross context virtual CPU structure.
2806 * @param pVmcsInfo The VMCS info. object.
2807 */
2808static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2809{
2810#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2811 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2812 {
2813 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2814 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2815 }
2816#endif
2817
2818 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2819 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2820 AssertRC(rc);
2821
2822 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2823 if (RT_SUCCESS(rc))
2824 {
2825 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2826 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2827
2828 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2829 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2830
2831 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2832 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2833
2834 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2835 {
2836 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2837 AssertRC(rc);
2838 }
2839 return VINF_SUCCESS;
2840 }
2841 else
2842 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2843 return rc;
2844}
2845
2846
2847/**
2848 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2849 *
2850 * We shall setup those exception intercepts that don't change during the
2851 * lifetime of the VM here. The rest are done dynamically while loading the
2852 * guest state.
2853 *
2854 * @param pVCpu The cross context virtual CPU structure.
2855 * @param pVmcsInfo The VMCS info. object.
2856 */
2857static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2858{
2859 /*
2860 * The following exceptions are always intercepted:
2861 *
2862 * #AC - To prevent the guest from hanging the CPU and for dealing with
2863 * split-lock detecting host configs.
2864 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2865 * recursive #DBs can cause a CPU hang.
2866 * #PF - To sync our shadow page tables when nested-paging is not used.
2867 */
2868 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2869 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2870 | RT_BIT(X86_XCPT_DB)
2871 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2872
2873 /* Commit it to the VMCS. */
2874 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2875 AssertRC(rc);
2876
2877 /* Update our cache of the exception bitmap. */
2878 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2879}
2880
2881
2882#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2883/**
2884 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2885 *
2886 * @returns VBox status code.
2887 * @param pVmcsInfo The VMCS info. object.
2888 */
2889static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2890{
2891 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2892 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2893 AssertRC(rc);
2894
2895 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2896 if (RT_SUCCESS(rc))
2897 {
2898 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2899 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2900
2901 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2902 Assert(!pVmcsInfo->u64Cr0Mask);
2903 Assert(!pVmcsInfo->u64Cr4Mask);
2904 return VINF_SUCCESS;
2905 }
2906 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2907 return rc;
2908}
2909#endif
2910
2911
2912/**
2913 * Selector FNHMSVMVMRUN implementation.
2914 */
2915static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2916{
2917 hmR0VmxUpdateStartVmFunction(pVCpu);
2918 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2919}
2920
2921
2922/**
2923 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2924 * VMX.
2925 *
2926 * @returns VBox status code.
2927 * @param pVCpu The cross context virtual CPU structure.
2928 * @param pVmcsInfo The VMCS info. object.
2929 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2930 */
2931static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2932{
2933 Assert(pVmcsInfo->pvVmcs);
2934 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2935
2936 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2937 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2938 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2939
2940 LogFlowFunc(("\n"));
2941
2942 /*
2943 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2944 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2945 */
2946 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2947 if (RT_SUCCESS(rc))
2948 {
2949 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2950 if (RT_SUCCESS(rc))
2951 {
2952 /*
2953 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2954 * The host is always 64-bit since we no longer support 32-bit hosts.
2955 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2956 */
2957 if (!fIsNstGstVmcs)
2958 {
2959 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2960 if (RT_SUCCESS(rc))
2961 {
2962 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2963 if (RT_SUCCESS(rc))
2964 {
2965 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2966 if (RT_SUCCESS(rc))
2967 {
2968 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2969#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2970 /*
2971 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2972 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2973 * making it fit for use when VMCS shadowing is later enabled.
2974 */
2975 if (pVmcsInfo->pvShadowVmcs)
2976 {
2977 VMXVMCSREVID VmcsRevId;
2978 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2979 VmcsRevId.n.fIsShadowVmcs = 1;
2980 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2981 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2982 if (RT_SUCCESS(rc))
2983 { /* likely */ }
2984 else
2985 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2986 }
2987#endif
2988 }
2989 else
2990 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2991 }
2992 else
2993 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2994 }
2995 else
2996 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2997 }
2998 else
2999 {
3000#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3001 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
3002 if (RT_SUCCESS(rc))
3003 { /* likely */ }
3004 else
3005 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
3006#else
3007 AssertFailed();
3008#endif
3009 }
3010 }
3011 else
3012 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", pszVmcs, rc));
3013 }
3014 else
3015 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3016
3017 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3018 if (RT_SUCCESS(rc))
3019 {
3020 rc = hmR0VmxClearVmcs(pVmcsInfo);
3021 if (RT_SUCCESS(rc))
3022 { /* likely */ }
3023 else
3024 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3025 }
3026
3027 /*
3028 * Update the last-error record both for failures and success, so we
3029 * can propagate the status code back to ring-3 for diagnostics.
3030 */
3031 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3032 NOREF(pszVmcs);
3033 return rc;
3034}
3035
3036
3037/**
3038 * Does global VT-x initialization (called during module initialization).
3039 *
3040 * @returns VBox status code.
3041 */
3042VMMR0DECL(int) VMXR0GlobalInit(void)
3043{
3044#ifdef HMVMX_USE_FUNCTION_TABLE
3045 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3046# ifdef VBOX_STRICT
3047 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3048 Assert(g_aVMExitHandlers[i].pfn);
3049# endif
3050#endif
3051
3052 /*
3053 * For detecting whether DR6.RTM is writable or not (done in VMXR0InitVM).
3054 */
3055 RTTHREADPREEMPTSTATE Preempt = RTTHREADPREEMPTSTATE_INITIALIZER;
3056 RTThreadPreemptDisable(&Preempt);
3057 RTCCUINTXREG const fSavedDr6 = ASMGetDR6();
3058 ASMSetDR6(0);
3059 RTCCUINTXREG const fZeroDr6 = ASMGetDR6();
3060 ASMSetDR6(fSavedDr6);
3061 RTThreadPreemptRestore(&Preempt);
3062
3063 g_fDr6Zeroed = fZeroDr6;
3064
3065 return VINF_SUCCESS;
3066}
3067
3068
3069/**
3070 * Does global VT-x termination (called during module termination).
3071 */
3072VMMR0DECL(void) VMXR0GlobalTerm()
3073{
3074 /* Nothing to do currently. */
3075}
3076
3077
3078/**
3079 * Sets up and activates VT-x on the current CPU.
3080 *
3081 * @returns VBox status code.
3082 * @param pHostCpu The HM physical-CPU structure.
3083 * @param pVM The cross context VM structure. Can be
3084 * NULL after a host resume operation.
3085 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3086 * fEnabledByHost is @c true).
3087 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3088 * @a fEnabledByHost is @c true).
3089 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3090 * enable VT-x on the host.
3091 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3092 */
3093VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3094 PCSUPHWVIRTMSRS pHwvirtMsrs)
3095{
3096 AssertPtr(pHostCpu);
3097 AssertPtr(pHwvirtMsrs);
3098 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3099
3100 /* Enable VT-x if it's not already enabled by the host. */
3101 if (!fEnabledByHost)
3102 {
3103 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3104 if (RT_FAILURE(rc))
3105 return rc;
3106 }
3107
3108 /*
3109 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3110 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3111 * invalidated when flushing by VPID.
3112 */
3113 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3114 {
3115 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3116 pHostCpu->fFlushAsidBeforeUse = false;
3117 }
3118 else
3119 pHostCpu->fFlushAsidBeforeUse = true;
3120
3121 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3122 ++pHostCpu->cTlbFlushes;
3123
3124 return VINF_SUCCESS;
3125}
3126
3127
3128/**
3129 * Deactivates VT-x on the current CPU.
3130 *
3131 * @returns VBox status code.
3132 * @param pHostCpu The HM physical-CPU structure.
3133 * @param pvCpuPage Pointer to the VMXON region.
3134 * @param HCPhysCpuPage Physical address of the VMXON region.
3135 *
3136 * @remarks This function should never be called when SUPR0EnableVTx() or
3137 * similar was used to enable VT-x on the host.
3138 */
3139VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3140{
3141 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3142
3143 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3144 return hmR0VmxLeaveRootMode(pHostCpu);
3145}
3146
3147
3148/**
3149 * Does per-VM VT-x initialization.
3150 *
3151 * @returns VBox status code.
3152 * @param pVM The cross context VM structure.
3153 */
3154VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3155{
3156 AssertPtr(pVM);
3157 LogFlowFunc(("pVM=%p\n", pVM));
3158
3159 hmR0VmxStructsInit(pVM);
3160 int rc = hmR0VmxStructsAlloc(pVM);
3161 if (RT_FAILURE(rc))
3162 {
3163 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3164 return rc;
3165 }
3166
3167 /* Setup the crash dump page. */
3168#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3169 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3170 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3171#endif
3172
3173 /*
3174 * Copy out stuff that's for ring-3 and determin default configuration.
3175 */
3176 pVM->hm.s.ForR3.vmx.u64HostDr6Zeroed = g_fDr6Zeroed;
3177
3178 /* Since we do not emulate RTM, make sure DR6.RTM cannot be cleared by the
3179 guest and cause confusion there. It appears that the DR6.RTM bit can be
3180 cleared even if TSX-NI is disabled (microcode update / system / whatever). */
3181#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3182 if (pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg == 0)
3183 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = g_fDr6Zeroed != X86_DR6_RA1_MASK;
3184 else
3185#endif
3186 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg > 0;
3187 pVM->hm.s.ForR3.vmx.fAlwaysInterceptMovDRx = pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3188
3189 return VINF_SUCCESS;
3190}
3191
3192
3193/**
3194 * Does per-VM VT-x termination.
3195 *
3196 * @returns VBox status code.
3197 * @param pVM The cross context VM structure.
3198 */
3199VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3200{
3201 AssertPtr(pVM);
3202 LogFlowFunc(("pVM=%p\n", pVM));
3203
3204#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3205 if (pVM->hmr0.s.vmx.pbScratch)
3206 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3207#endif
3208 hmR0VmxStructsFree(pVM);
3209 return VINF_SUCCESS;
3210}
3211
3212
3213/**
3214 * Sets up the VM for execution using hardware-assisted VMX.
3215 * This function is only called once per-VM during initialization.
3216 *
3217 * @returns VBox status code.
3218 * @param pVM The cross context VM structure.
3219 */
3220VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3221{
3222 AssertPtr(pVM);
3223 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3224
3225 LogFlowFunc(("pVM=%p\n", pVM));
3226
3227 /*
3228 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3229 * without causing a #GP.
3230 */
3231 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3232 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3233 { /* likely */ }
3234 else
3235 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3236
3237 /*
3238 * Check that nested paging is supported if enabled and copy over the flag to the
3239 * ring-0 only structure.
3240 */
3241 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3242 AssertReturn( !fNestedPaging
3243 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3244 VERR_INCOMPATIBLE_CONFIG);
3245 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3246 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3247
3248 /*
3249 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3250 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3251 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3252 */
3253 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3254 AssertReturn( !fUnrestrictedGuest
3255 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3256 && fNestedPaging),
3257 VERR_INCOMPATIBLE_CONFIG);
3258 if ( !fUnrestrictedGuest
3259 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3260 || !pVM->hm.s.vmx.pRealModeTSS))
3261 {
3262 LogRelFunc(("Invalid real-on-v86 state.\n"));
3263 return VERR_INTERNAL_ERROR;
3264 }
3265 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3266
3267 /* Initialize these always, see hmR3InitFinalizeR0().*/
3268 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3269 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3270
3271 /* Setup the tagged-TLB flush handlers. */
3272 int rc = hmR0VmxSetupTaggedTlb(pVM);
3273 if (RT_FAILURE(rc))
3274 {
3275 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3276 return rc;
3277 }
3278
3279 /* Determine LBR capabilities. */
3280 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3281 if (pVM->hmr0.s.vmx.fLbr)
3282 {
3283 rc = hmR0VmxSetupLbrMsrRange(pVM);
3284 if (RT_FAILURE(rc))
3285 {
3286 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3287 return rc;
3288 }
3289 }
3290
3291#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3292 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3293 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3294 {
3295 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3296 if (RT_SUCCESS(rc))
3297 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3298 else
3299 {
3300 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3301 return rc;
3302 }
3303 }
3304#endif
3305
3306 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3307 {
3308 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3309 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3310
3311 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3312
3313 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3314 if (RT_SUCCESS(rc))
3315 {
3316#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3317 if (pVM->cpum.ro.GuestFeatures.fVmx)
3318 {
3319 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3320 if (RT_SUCCESS(rc))
3321 { /* likely */ }
3322 else
3323 {
3324 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3325 return rc;
3326 }
3327 }
3328#endif
3329 }
3330 else
3331 {
3332 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3333 return rc;
3334 }
3335 }
3336
3337 return VINF_SUCCESS;
3338}
3339
3340
3341/**
3342 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3343 * the VMCS.
3344 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3345 */
3346static uint64_t hmR0VmxExportHostControlRegs(void)
3347{
3348 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3349 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3350 uint64_t uHostCr4 = ASMGetCR4();
3351 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3352 return uHostCr4;
3353}
3354
3355
3356/**
3357 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3358 * the host-state area in the VMCS.
3359 *
3360 * @returns VBox status code.
3361 * @param pVCpu The cross context virtual CPU structure.
3362 * @param uHostCr4 The host CR4 value.
3363 */
3364static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3365{
3366 /*
3367 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3368 * will be messed up. We should -not- save the messed up state without restoring
3369 * the original host-state, see @bugref{7240}.
3370 *
3371 * This apparently can happen (most likely the FPU changes), deal with it rather than
3372 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3373 */
3374 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3375 {
3376 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3377 pVCpu->idCpu));
3378 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3379 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3380 }
3381
3382 /*
3383 * Get all the host info.
3384 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3385 * without also checking the cpuid bit.
3386 */
3387 uint32_t fRestoreHostFlags;
3388#if RT_INLINE_ASM_EXTERNAL
3389 if (uHostCr4 & X86_CR4_FSGSBASE)
3390 {
3391 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3392 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3393 }
3394 else
3395 {
3396 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3397 fRestoreHostFlags = 0;
3398 }
3399 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3400 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3401 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3402 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3403#else
3404 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3405 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3406 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3407 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3408 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3409 if (uHostCr4 & X86_CR4_FSGSBASE)
3410 {
3411 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3412 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3413 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3414 }
3415 else
3416 {
3417 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3418 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3419 fRestoreHostFlags = 0;
3420 }
3421 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3422 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3423 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3424 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3425 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3426#endif
3427
3428 /*
3429 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3430 * gain VM-entry and restore them before we get preempted.
3431 *
3432 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3433 */
3434 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3435 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3436 {
3437 if (!(uSelAll & X86_SEL_LDT))
3438 {
3439#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3440 do { \
3441 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3442 if ((a_uVmcsVar) & X86_SEL_RPL) \
3443 { \
3444 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3445 (a_uVmcsVar) = 0; \
3446 } \
3447 } while (0)
3448 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3449 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3450 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3451 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3452#undef VMXLOCAL_ADJUST_HOST_SEG
3453 }
3454 else
3455 {
3456#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3457 do { \
3458 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3459 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3460 { \
3461 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3462 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3463 else \
3464 { \
3465 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3466 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3467 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3468 } \
3469 (a_uVmcsVar) = 0; \
3470 } \
3471 } while (0)
3472 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3473 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3474 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3475 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3476#undef VMXLOCAL_ADJUST_HOST_SEG
3477 }
3478 }
3479
3480 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3481 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3482 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3483 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3484 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3485 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3486 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3487 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3488
3489 /*
3490 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3491 * them to the maximum limit (0xffff) on every VM-exit.
3492 */
3493 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3494 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3495
3496 /*
3497 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3498 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3499 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3500 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3501 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3502 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3503 * at 0xffff on hosts where we are sure it won't cause trouble.
3504 */
3505#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3506 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3507#else
3508 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3509#endif
3510 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3511
3512 /*
3513 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3514 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3515 * RPL should be too in most cases.
3516 */
3517 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3518 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3519 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3520 VERR_VMX_INVALID_HOST_STATE);
3521
3522 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3523 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3524
3525 /*
3526 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3527 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3528 * restoration if the host has something else. Task switching is not supported in 64-bit
3529 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3530 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3531 *
3532 * [1] See Intel spec. 3.5 "System Descriptor Types".
3533 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3534 */
3535 Assert(pDesc->System.u4Type == 11);
3536 if ( pDesc->System.u16LimitLow != 0x67
3537 || pDesc->System.u4LimitHigh)
3538 {
3539 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3540
3541 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3542 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3543 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3544 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3545 {
3546 /* The GDT is read-only but the writable GDT is available. */
3547 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3548 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3549 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3550 AssertRCReturn(rc, rc);
3551 }
3552 }
3553
3554 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3555
3556 /*
3557 * Do all the VMCS updates in one block to assist nested virtualization.
3558 */
3559 int rc;
3560 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3561 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3562 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3563 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3564 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3565 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3566 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3567 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3568 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3569 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3570 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3571 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3572
3573 return VINF_SUCCESS;
3574}
3575
3576
3577/**
3578 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3579 * host-state area of the VMCS.
3580 *
3581 * These MSRs will be automatically restored on the host after every successful
3582 * VM-exit.
3583 *
3584 * @param pVCpu The cross context virtual CPU structure.
3585 *
3586 * @remarks No-long-jump zone!!!
3587 */
3588static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3589{
3590 AssertPtr(pVCpu);
3591
3592 /*
3593 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3594 * rather than swapping them on every VM-entry.
3595 */
3596 hmR0VmxLazySaveHostMsrs(pVCpu);
3597
3598 /*
3599 * Host Sysenter MSRs.
3600 */
3601 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3602 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3603 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3604
3605 /*
3606 * Host EFER MSR.
3607 *
3608 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3609 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3610 */
3611 if (g_fHmVmxSupportsVmcsEfer)
3612 {
3613 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3614 AssertRC(rc);
3615 }
3616
3617 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3618 * vmxHCExportGuestEntryExitCtls(). */
3619}
3620
3621
3622/**
3623 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3624 *
3625 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3626 * these two bits are handled by VM-entry, see vmxHCExportGuestEntryExitCtls().
3627 *
3628 * @returns true if we need to load guest EFER, false otherwise.
3629 * @param pVCpu The cross context virtual CPU structure.
3630 * @param pVmxTransient The VMX-transient structure.
3631 *
3632 * @remarks Requires EFER, CR4.
3633 * @remarks No-long-jump zone!!!
3634 */
3635static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3636{
3637#ifdef HMVMX_ALWAYS_SWAP_EFER
3638 RT_NOREF2(pVCpu, pVmxTransient);
3639 return true;
3640#else
3641 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3642 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3643 uint64_t const u64GuestEfer = pCtx->msrEFER;
3644
3645# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3646 /*
3647 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3648 * the nested-guest.
3649 */
3650 if ( pVmxTransient->fIsNestedGuest
3651 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3652 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3653 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3654 return true;
3655# else
3656 RT_NOREF(pVmxTransient);
3657#endif
3658
3659 /*
3660 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3661 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3662 */
3663 if ( CPUMIsGuestInLongModeEx(pCtx)
3664 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3665 return true;
3666
3667 /*
3668 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3669 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3670 *
3671 * See Intel spec. 4.5 "IA-32e Paging".
3672 * See Intel spec. 4.1.1 "Three Paging Modes".
3673 *
3674 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3675 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3676 */
3677 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3678 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3679 if ( (pCtx->cr4 & X86_CR4_PAE)
3680 && (pCtx->cr0 & X86_CR0_PG))
3681 {
3682 /*
3683 * If nested paging is not used, verify that the guest paging mode matches the
3684 * shadow paging mode which is/will be placed in the VMCS (which is what will
3685 * actually be used while executing the guest and not the CR4 shadow value).
3686 */
3687 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3688 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3689 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3690 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3691 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3692 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3693 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3694 {
3695 /* Verify that the host is NX capable. */
3696 Assert(g_CpumHostFeatures.s.fNoExecute);
3697 return true;
3698 }
3699 }
3700
3701 return false;
3702#endif
3703}
3704
3705
3706/**
3707 * Exports the guest's RSP into the guest-state area in the VMCS.
3708 *
3709 * @param pVCpu The cross context virtual CPU structure.
3710 *
3711 * @remarks No-long-jump zone!!!
3712 */
3713static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3714{
3715 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3716 {
3717 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3718
3719 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3720 AssertRC(rc);
3721
3722 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3723 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3724 }
3725}
3726
3727
3728/**
3729 * Exports the guest hardware-virtualization state.
3730 *
3731 * @returns VBox status code.
3732 * @param pVCpu The cross context virtual CPU structure.
3733 * @param pVmxTransient The VMX-transient structure.
3734 *
3735 * @remarks No-long-jump zone!!!
3736 */
3737static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3738{
3739 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3740 {
3741#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3742 /*
3743 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3744 * VMCS shadowing.
3745 */
3746 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3747 {
3748 /*
3749 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3750 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3751 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3752 *
3753 * We check for VMX root mode here in case the guest executes VMXOFF without
3754 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3755 * not clear the current VMCS pointer.
3756 */
3757 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3758 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3759 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3760 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3761 {
3762 /* Paranoia. */
3763 Assert(!pVmxTransient->fIsNestedGuest);
3764
3765 /*
3766 * For performance reasons, also check if the nested hypervisor's current VMCS
3767 * was newly loaded or modified before copying it to the shadow VMCS.
3768 */
3769 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3770 {
3771 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3772 AssertRCReturn(rc, rc);
3773 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3774 }
3775 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3776 }
3777 else
3778 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3779 }
3780#else
3781 NOREF(pVmxTransient);
3782#endif
3783 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3784 }
3785 return VINF_SUCCESS;
3786}
3787
3788
3789/**
3790 * Exports the guest debug registers into the guest-state area in the VMCS.
3791 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3792 *
3793 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3794 *
3795 * @returns VBox status code.
3796 * @param pVCpu The cross context virtual CPU structure.
3797 * @param pVmxTransient The VMX-transient structure.
3798 *
3799 * @remarks No-long-jump zone!!!
3800 */
3801static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3802{
3803 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3804
3805 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3806 * stepping. */
3807 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3808 if (pVmxTransient->fIsNestedGuest)
3809 {
3810 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3811 AssertRC(rc);
3812
3813 /*
3814 * We don't want to always intercept MOV DRx for nested-guests as it causes
3815 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3816 * Instead, they are strictly only requested when the nested hypervisor intercepts
3817 * them -- handled while merging VMCS controls.
3818 *
3819 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3820 * then the nested-guest debug state should be actively loaded on the host so that
3821 * nested-guest reads its own debug registers without causing VM-exits.
3822 */
3823 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3824 && !CPUMIsGuestDebugStateActive(pVCpu))
3825 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3826 return VINF_SUCCESS;
3827 }
3828
3829#ifdef VBOX_STRICT
3830 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3831 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3832 {
3833 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3834 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3835 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3836 }
3837#endif
3838
3839 bool fSteppingDB = false;
3840 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3841 if (pVCpu->hm.s.fSingleInstruction)
3842 {
3843 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3844 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3845 {
3846 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3847 Assert(fSteppingDB == false);
3848 }
3849 else
3850 {
3851 pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF;
3852 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3853 pVCpu->hmr0.s.fClearTrapFlag = true;
3854 fSteppingDB = true;
3855 }
3856 }
3857
3858#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3859 bool fInterceptMovDRx = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3860#else
3861 bool fInterceptMovDRx = false;
3862#endif
3863 uint64_t u64GuestDr7;
3864 if ( fSteppingDB
3865 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3866 {
3867 /*
3868 * Use the combined guest and host DRx values found in the hypervisor register set
3869 * because the hypervisor debugger has breakpoints active or someone is single stepping
3870 * on the host side without a monitor trap flag.
3871 *
3872 * Note! DBGF expects a clean DR6 state before executing guest code.
3873 */
3874 if (!CPUMIsHyperDebugStateActive(pVCpu))
3875 {
3876 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3877 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3878 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3879 }
3880
3881 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3882 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3883 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3884 fInterceptMovDRx = true;
3885 }
3886 else
3887 {
3888 /*
3889 * If the guest has enabled debug registers, we need to load them prior to
3890 * executing guest code so they'll trigger at the right time.
3891 */
3892 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3893 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3894 {
3895 if (!CPUMIsGuestDebugStateActive(pVCpu))
3896 {
3897 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3898 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3899 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3900 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3901 }
3902#ifndef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3903 Assert(!fInterceptMovDRx);
3904#endif
3905 }
3906 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3907 {
3908 /*
3909 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3910 * must intercept #DB in order to maintain a correct DR6 guest value, and
3911 * because we need to intercept it to prevent nested #DBs from hanging the
3912 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3913 */
3914 fInterceptMovDRx = true;
3915 }
3916
3917 /* Update DR7 with the actual guest value. */
3918 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3919 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3920 }
3921
3922 if (fInterceptMovDRx)
3923 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3924 else
3925 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3926
3927 /*
3928 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3929 * monitor-trap flag and update our cache.
3930 */
3931 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3932 {
3933 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3934 AssertRC(rc);
3935 pVmcsInfo->u32ProcCtls = uProcCtls;
3936 }
3937
3938 /*
3939 * Update guest DR7.
3940 */
3941 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3942 AssertRC(rc);
3943
3944 /*
3945 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3946 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3947 *
3948 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3949 */
3950 if (fSteppingDB)
3951 {
3952 Assert(pVCpu->hm.s.fSingleInstruction);
3953 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3954
3955 uint32_t fIntrState = 0;
3956 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3957 AssertRC(rc);
3958
3959 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3960 {
3961 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3962 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3963 AssertRC(rc);
3964 }
3965 }
3966
3967 return VINF_SUCCESS;
3968}
3969
3970
3971/**
3972 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3973 * areas.
3974 *
3975 * These MSRs will automatically be loaded to the host CPU on every successful
3976 * VM-entry and stored from the host CPU on every successful VM-exit.
3977 *
3978 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3979 * actual host MSR values are not- updated here for performance reasons. See
3980 * hmR0VmxExportHostMsrs().
3981 *
3982 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3983 *
3984 * @returns VBox status code.
3985 * @param pVCpu The cross context virtual CPU structure.
3986 * @param pVmxTransient The VMX-transient structure.
3987 *
3988 * @remarks No-long-jump zone!!!
3989 */
3990static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3991{
3992 AssertPtr(pVCpu);
3993 AssertPtr(pVmxTransient);
3994
3995 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3996 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3997
3998 /*
3999 * MSRs that we use the auto-load/store MSR area in the VMCS.
4000 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
4001 * nothing to do here. The host MSR values are updated when it's safe in
4002 * hmR0VmxLazySaveHostMsrs().
4003 *
4004 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
4005 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
4006 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
4007 * for any MSR that are not part of the lazy MSRs so we do not need to place
4008 * those MSRs into the auto-load/store MSR area. Nothing to do here.
4009 */
4010 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
4011 {
4012 /* No auto-load/store MSRs currently. */
4013 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4014 }
4015
4016 /*
4017 * Guest Sysenter MSRs.
4018 */
4019 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
4020 {
4021 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
4022
4023 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
4024 {
4025 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
4026 AssertRC(rc);
4027 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4028 }
4029
4030 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
4031 {
4032 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
4033 AssertRC(rc);
4034 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4035 }
4036
4037 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
4038 {
4039 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
4040 AssertRC(rc);
4041 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4042 }
4043 }
4044
4045 /*
4046 * Guest/host EFER MSR.
4047 */
4048 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4049 {
4050 /* Whether we are using the VMCS to swap the EFER MSR must have been
4051 determined earlier while exporting VM-entry/VM-exit controls. */
4052 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4053 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4054
4055 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4056 {
4057 /*
4058 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4059 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4060 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4061 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4062 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4063 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4064 * during VM-entry.
4065 */
4066 uint64_t uGuestEferMsr = pCtx->msrEFER;
4067 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4068 {
4069 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4070 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4071 else
4072 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4073 }
4074
4075 /*
4076 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4077 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4078 */
4079 if (g_fHmVmxSupportsVmcsEfer)
4080 {
4081 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4082 AssertRC(rc);
4083 }
4084 else
4085 {
4086 /*
4087 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4088 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4089 */
4090 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4091 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4092 AssertRCReturn(rc, rc);
4093 }
4094
4095 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4096 }
4097 else if (!g_fHmVmxSupportsVmcsEfer)
4098 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4099
4100 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4101 }
4102
4103 /*
4104 * Other MSRs.
4105 */
4106 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4107 {
4108 /* Speculation Control (R/W). */
4109 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4110 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4111 {
4112 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4113 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4114 AssertRCReturn(rc, rc);
4115 }
4116
4117 /* Last Branch Record. */
4118 if (pVM->hmr0.s.vmx.fLbr)
4119 {
4120 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4121 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4122 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4123 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4124 Assert(cLbrStack <= 32);
4125 for (uint32_t i = 0; i < cLbrStack; i++)
4126 {
4127 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4128 pVmcsInfoShared->au64LbrFromIpMsr[i],
4129 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4130 AssertRCReturn(rc, rc);
4131
4132 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4133 if (idToIpMsrStart != 0)
4134 {
4135 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4136 pVmcsInfoShared->au64LbrToIpMsr[i],
4137 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4138 AssertRCReturn(rc, rc);
4139 }
4140 }
4141
4142 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4143 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4144 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4145 false /* fUpdateHostMsr */);
4146 AssertRCReturn(rc, rc);
4147 }
4148
4149 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4150 }
4151
4152 return VINF_SUCCESS;
4153}
4154
4155
4156/**
4157 * Wrapper for running the guest code in VT-x.
4158 *
4159 * @returns VBox status code, no informational status codes.
4160 * @param pVCpu The cross context virtual CPU structure.
4161 * @param pVmxTransient The VMX-transient structure.
4162 *
4163 * @remarks No-long-jump zone!!!
4164 */
4165DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4166{
4167 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4168 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4169
4170 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4171 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4172#ifdef VBOX_WITH_STATISTICS
4173 if (fResumeVM)
4174 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4175 else
4176 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4177#endif
4178 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4179 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4180 return rc;
4181}
4182
4183
4184/**
4185 * Reports world-switch error and dumps some useful debug info.
4186 *
4187 * @param pVCpu The cross context virtual CPU structure.
4188 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4189 * @param pVmxTransient The VMX-transient structure (only
4190 * exitReason updated).
4191 */
4192static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4193{
4194 Assert(pVCpu);
4195 Assert(pVmxTransient);
4196 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4197
4198 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4199 switch (rcVMRun)
4200 {
4201 case VERR_VMX_INVALID_VMXON_PTR:
4202 AssertFailed();
4203 break;
4204 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4205 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4206 {
4207 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4208 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4209 AssertRC(rc);
4210 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4211
4212 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4213 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4214 Cannot do it here as we may have been long preempted. */
4215
4216#ifdef VBOX_STRICT
4217 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4218 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4219 pVmxTransient->uExitReason));
4220 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4221 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4222 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4223 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4224 else
4225 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4226 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4227 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4228
4229 static struct
4230 {
4231 /** Name of the field to log. */
4232 const char *pszName;
4233 /** The VMCS field. */
4234 uint32_t uVmcsField;
4235 /** Whether host support of this field needs to be checked. */
4236 bool fCheckSupport;
4237 } const s_aVmcsFields[] =
4238 {
4239 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4240 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4241 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4242 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4243 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4244 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4245 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4246 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4247 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4248 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4249 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4250 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4251 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4252 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4253 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4254 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4255 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4256 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4257 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4258 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4259 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4260 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4261 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4262 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4263 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4264 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4265 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4266 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4267 /* The order of selector fields below are fixed! */
4268 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4269 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4270 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4271 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4272 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4273 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4274 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4275 /* End of ordered selector fields. */
4276 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4277 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4278 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4279 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4280 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4281 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4282 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4283 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4284 };
4285
4286 RTGDTR HostGdtr;
4287 ASMGetGDTR(&HostGdtr);
4288
4289 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4290 for (uint32_t i = 0; i < cVmcsFields; i++)
4291 {
4292 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4293
4294 bool fSupported;
4295 if (!s_aVmcsFields[i].fCheckSupport)
4296 fSupported = true;
4297 else
4298 {
4299 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4300 switch (uVmcsField)
4301 {
4302 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4303 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4304 case VMX_VMCS32_CTRL_PROC_EXEC2:
4305 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4306 break;
4307 default:
4308 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4309 }
4310 }
4311
4312 if (fSupported)
4313 {
4314 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4315 switch (uWidth)
4316 {
4317 case VMX_VMCSFIELD_WIDTH_16BIT:
4318 {
4319 uint16_t u16Val;
4320 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4321 AssertRC(rc);
4322 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4323
4324 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4325 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4326 {
4327 if (u16Val < HostGdtr.cbGdt)
4328 {
4329 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4330 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4331 "Host FS", "Host GS", "Host TR" };
4332 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4333 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4334 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4335 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4336 }
4337 else
4338 Log4((" Selector value exceeds GDT limit!\n"));
4339 }
4340 break;
4341 }
4342
4343 case VMX_VMCSFIELD_WIDTH_32BIT:
4344 {
4345 uint32_t u32Val;
4346 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4347 AssertRC(rc);
4348 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4349 break;
4350 }
4351
4352 case VMX_VMCSFIELD_WIDTH_64BIT:
4353 case VMX_VMCSFIELD_WIDTH_NATURAL:
4354 {
4355 uint64_t u64Val;
4356 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4357 AssertRC(rc);
4358 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4359 break;
4360 }
4361 }
4362 }
4363 }
4364
4365 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4366 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4367 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4368 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4369 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4370 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4371#endif /* VBOX_STRICT */
4372 break;
4373 }
4374
4375 default:
4376 /* Impossible */
4377 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4378 break;
4379 }
4380}
4381
4382
4383/**
4384 * Sets up the usage of TSC-offsetting and updates the VMCS.
4385 *
4386 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4387 * VMX-preemption timer.
4388 *
4389 * @param pVCpu The cross context virtual CPU structure.
4390 * @param pVmxTransient The VMX-transient structure.
4391 * @param idCurrentCpu The current CPU number.
4392 *
4393 * @remarks No-long-jump zone!!!
4394 */
4395static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4396{
4397 bool fOffsettedTsc;
4398 bool fParavirtTsc;
4399 uint64_t uTscOffset;
4400 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4401
4402 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4403 {
4404 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4405 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4406 uint64_t cTicksToDeadline;
4407 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4408 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4409 {
4410 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4411 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4412 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4413 if ((int64_t)cTicksToDeadline > 0)
4414 { /* hopefully */ }
4415 else
4416 {
4417 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4418 cTicksToDeadline = 0;
4419 }
4420 }
4421 else
4422 {
4423 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4424 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4425 &pVCpu->hmr0.s.vmx.uTscDeadline,
4426 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4427 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4428 if (cTicksToDeadline >= 128)
4429 { /* hopefully */ }
4430 else
4431 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4432 }
4433
4434 /* Make sure the returned values have sane upper and lower boundaries. */
4435 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4436 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4437 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4438 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4439
4440 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4441 * preemption timers here. We probably need to clamp the preemption timer,
4442 * after converting the timer value to the host. */
4443 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4444 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4445 AssertRC(rc);
4446 }
4447 else
4448 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4449
4450 if (fParavirtTsc)
4451 {
4452 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4453 information before every VM-entry, hence disable it for performance sake. */
4454#if 0
4455 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4456 AssertRC(rc);
4457#endif
4458 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4459 }
4460
4461 if ( fOffsettedTsc
4462 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4463 {
4464 if (pVmxTransient->fIsNestedGuest)
4465 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4466 hmR0VmxSetTscOffsetVmcs(pVmxTransient->pVmcsInfo, uTscOffset);
4467 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4468 }
4469 else
4470 {
4471 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4472 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4473 }
4474}
4475
4476
4477/**
4478 * Saves the guest state from the VMCS into the guest-CPU context.
4479 *
4480 * @returns VBox status code.
4481 * @param pVCpu The cross context virtual CPU structure.
4482 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4483 */
4484VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4485{
4486 AssertPtr(pVCpu);
4487 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4488 return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat);
4489}
4490
4491
4492/**
4493 * Gets VMX VM-exit auxiliary information.
4494 *
4495 * @returns VBox status code.
4496 * @param pVCpu The cross context virtual CPU structure.
4497 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4498 * @param fWhat What to fetch, HMVMX_READ_XXX.
4499 */
4500VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4501{
4502 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4503 if (RT_LIKELY(pVmxTransient))
4504 {
4505 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4506
4507 /* The exit reason is always available. */
4508 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4509
4510
4511 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4512 {
4513 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4514 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4515#ifdef VBOX_STRICT
4516 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4517#endif
4518 }
4519
4520 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4521 {
4522 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_INFO>(pVCpu, pVmxTransient);
4523 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4524#ifdef VBOX_STRICT
4525 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4526#endif
4527 }
4528
4529 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4530 {
4531 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_ERROR_CODE>(pVCpu, pVmxTransient);
4532 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4533#ifdef VBOX_STRICT
4534 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4535#endif
4536 }
4537
4538 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4539 {
4540 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_LEN>(pVCpu, pVmxTransient);
4541 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4542#ifdef VBOX_STRICT
4543 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4544#endif
4545 }
4546
4547 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4548 {
4549 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_INFO>(pVCpu, pVmxTransient);
4550 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4551#ifdef VBOX_STRICT
4552 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4553#endif
4554 }
4555
4556 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4557 {
4558 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE>(pVCpu, pVmxTransient);
4559 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4560#ifdef VBOX_STRICT
4561 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4562#endif
4563 }
4564
4565 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4566 {
4567 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_INFO>(pVCpu, pVmxTransient);
4568 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4569#ifdef VBOX_STRICT
4570 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4571#endif
4572 }
4573
4574 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4575 {
4576 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_LINEAR_ADDR>(pVCpu, pVmxTransient);
4577 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4578#ifdef VBOX_STRICT
4579 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4580#endif
4581 }
4582
4583 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4584 {
4585 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PHYSICAL_ADDR>(pVCpu, pVmxTransient);
4586 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4587#ifdef VBOX_STRICT
4588 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4589#endif
4590 }
4591
4592 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4593 {
4594#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4595 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PENDING_DBG_XCPTS>(pVCpu, pVmxTransient);
4596 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4597#else
4598 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4599#endif
4600#ifdef VBOX_STRICT
4601 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4602#endif
4603 }
4604
4605 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4606 return VINF_SUCCESS;
4607 }
4608 return VERR_NOT_AVAILABLE;
4609}
4610
4611
4612/**
4613 * Does the necessary state syncing before returning to ring-3 for any reason
4614 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4615 *
4616 * @returns VBox status code.
4617 * @param pVCpu The cross context virtual CPU structure.
4618 * @param fImportState Whether to import the guest state from the VMCS back
4619 * to the guest-CPU context.
4620 *
4621 * @remarks No-long-jmp zone!!!
4622 */
4623static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4624{
4625 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4626 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4627
4628 RTCPUID const idCpu = RTMpCpuId();
4629 Log4Func(("HostCpuId=%u\n", idCpu));
4630
4631 /*
4632 * !!! IMPORTANT !!!
4633 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4634 */
4635
4636 /* Save the guest state if necessary. */
4637 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4638 if (fImportState)
4639 {
4640 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4641 AssertRCReturn(rc, rc);
4642 }
4643
4644 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4645 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4646 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4647
4648 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4649#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
4650 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4651 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4652 || (!CPUMIsHyperDebugStateActive(pVCpu) && !pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx));
4653#else
4654 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4655 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4656 || !CPUMIsHyperDebugStateActive(pVCpu));
4657#endif
4658 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4659 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4660 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4661
4662 /* Restore host-state bits that VT-x only restores partially. */
4663 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4664 {
4665 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4666 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4667 }
4668 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4669
4670 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4671 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4672 {
4673 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4674 if (!fImportState)
4675 {
4676 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4677 AssertRCReturn(rc, rc);
4678 }
4679 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4680 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4681 }
4682 else
4683 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4684
4685 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4686 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4687
4688 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4689 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4690 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4691 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4692 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4693 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4694 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4695 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4696 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4697 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4698
4699 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4700
4701 /** @todo This partially defeats the purpose of having preemption hooks.
4702 * The problem is, deregistering the hooks should be moved to a place that
4703 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4704 * context.
4705 */
4706 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4707 AssertRCReturn(rc, rc);
4708
4709#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4710 /*
4711 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4712 * clear a shadow VMCS before allowing that VMCS to become active on another
4713 * logical processor. We may or may not be importing guest state which clears
4714 * it, so cover for it here.
4715 *
4716 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4717 */
4718 if ( pVmcsInfo->pvShadowVmcs
4719 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4720 {
4721 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4722 AssertRCReturn(rc, rc);
4723 }
4724
4725 /*
4726 * Flag that we need to re-export the host state if we switch to this VMCS before
4727 * executing guest or nested-guest code.
4728 */
4729 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4730#endif
4731
4732 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4733 NOREF(idCpu);
4734 return VINF_SUCCESS;
4735}
4736
4737
4738/**
4739 * Leaves the VT-x session.
4740 *
4741 * @returns VBox status code.
4742 * @param pVCpu The cross context virtual CPU structure.
4743 *
4744 * @remarks No-long-jmp zone!!!
4745 */
4746static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4747{
4748 HM_DISABLE_PREEMPT(pVCpu);
4749 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4750 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4751 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4752
4753 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4754 and done this from the VMXR0ThreadCtxCallback(). */
4755 if (!pVCpu->hmr0.s.fLeaveDone)
4756 {
4757 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4758 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4759 pVCpu->hmr0.s.fLeaveDone = true;
4760 }
4761 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4762
4763 /*
4764 * !!! IMPORTANT !!!
4765 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4766 */
4767
4768 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4769 /** @todo Deregistering here means we need to VMCLEAR always
4770 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4771 * for calling VMMR0ThreadCtxHookDisable here! */
4772 VMMR0ThreadCtxHookDisable(pVCpu);
4773
4774 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4775 int rc = HMR0LeaveCpu(pVCpu);
4776 HM_RESTORE_PREEMPT();
4777 return rc;
4778}
4779
4780
4781/**
4782 * Take necessary actions before going back to ring-3.
4783 *
4784 * An action requires us to go back to ring-3. This function does the necessary
4785 * steps before we can safely return to ring-3. This is not the same as longjmps
4786 * to ring-3, this is voluntary and prepares the guest so it may continue
4787 * executing outside HM (recompiler/IEM).
4788 *
4789 * @returns VBox status code.
4790 * @param pVCpu The cross context virtual CPU structure.
4791 * @param rcExit The reason for exiting to ring-3. Can be
4792 * VINF_VMM_UNKNOWN_RING3_CALL.
4793 */
4794static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
4795{
4796 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4797
4798 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4799 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
4800 {
4801 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
4802 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
4803 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4804 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
4805 }
4806
4807 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
4808 VMMRZCallRing3Disable(pVCpu);
4809 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
4810
4811 /*
4812 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
4813 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
4814 *
4815 * This is because execution may continue from ring-3 and we would need to inject
4816 * the event from there (hence place it back in TRPM).
4817 */
4818 if (pVCpu->hm.s.Event.fPending)
4819 {
4820 vmxHCPendingEventToTrpmTrap(pVCpu);
4821 Assert(!pVCpu->hm.s.Event.fPending);
4822
4823 /* Clear the events from the VMCS. */
4824 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
4825 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
4826 }
4827#ifdef VBOX_STRICT
4828 /*
4829 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
4830 * fatal), we don't care about verifying duplicate injection of events. Errors like
4831 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
4832 * function so those should and will be checked below.
4833 */
4834 else if (RT_SUCCESS(rcExit))
4835 {
4836 /*
4837 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
4838 * This can be pretty hard to debug otherwise, interrupts might get injected twice
4839 * occasionally, see @bugref{9180#c42}.
4840 *
4841 * However, if the VM-entry failed, any VM entry-interruption info. field would
4842 * be left unmodified as the event would not have been injected to the guest. In
4843 * such cases, don't assert, we're not going to continue guest execution anyway.
4844 */
4845 uint32_t uExitReason;
4846 uint32_t uEntryIntInfo;
4847 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
4848 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
4849 AssertRC(rc);
4850 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
4851 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
4852 }
4853#endif
4854
4855 /*
4856 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
4857 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
4858 * (e.g. TPR below threshold).
4859 */
4860 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4861 {
4862 Assert(!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
4863 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
4864 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
4865 }
4866
4867 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
4868 and if we're injecting an event we should have a TRPM trap pending. */
4869 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4870#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
4871 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4872#endif
4873
4874 /* Save guest state and restore host state bits. */
4875 int rc = hmR0VmxLeaveSession(pVCpu);
4876 AssertRCReturn(rc, rc);
4877 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4878
4879 /* Thread-context hooks are unregistered at this point!!! */
4880 /* Ring-3 callback notifications are unregistered at this point!!! */
4881
4882 /* Sync recompiler state. */
4883 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4884 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
4885 | CPUM_CHANGED_LDTR
4886 | CPUM_CHANGED_GDTR
4887 | CPUM_CHANGED_IDTR
4888 | CPUM_CHANGED_TR
4889 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4890 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
4891 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
4892 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
4893
4894 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
4895
4896 /* Update the exit-to-ring 3 reason. */
4897 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
4898
4899 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
4900 if ( rcExit != VINF_EM_RAW_INTERRUPT
4901 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4902 {
4903 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
4904 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
4905 }
4906
4907 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
4908 VMMRZCallRing3Enable(pVCpu);
4909 return rc;
4910}
4911
4912
4913/**
4914 * VMMRZCallRing3() callback wrapper which saves the guest state before we
4915 * longjump due to a ring-0 assertion.
4916 *
4917 * @returns VBox status code.
4918 * @param pVCpu The cross context virtual CPU structure.
4919 */
4920VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
4921{
4922 /*
4923 * !!! IMPORTANT !!!
4924 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
4925 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
4926 */
4927 VMMR0AssertionRemoveNotification(pVCpu);
4928 VMMRZCallRing3Disable(pVCpu);
4929 HM_DISABLE_PREEMPT(pVCpu);
4930
4931 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4932 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4933 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4934 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4935
4936 /* Restore host-state bits that VT-x only restores partially. */
4937 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4938 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4939 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4940
4941 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4942 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4943 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4944
4945 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4946 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4947 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4948
4949 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
4950 cleared as part of importing the guest state above. */
4951 hmR0VmxClearVmcs(pVmcsInfo);
4952
4953 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
4954 VMMR0ThreadCtxHookDisable(pVCpu);
4955
4956 /* Leave HM context. This takes care of local init (term). */
4957 HMR0LeaveCpu(pVCpu);
4958 HM_RESTORE_PREEMPT();
4959 return VINF_SUCCESS;
4960}
4961
4962
4963/**
4964 * Enters the VT-x session.
4965 *
4966 * @returns VBox status code.
4967 * @param pVCpu The cross context virtual CPU structure.
4968 */
4969VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
4970{
4971 AssertPtr(pVCpu);
4972 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
4973 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4974
4975 LogFlowFunc(("pVCpu=%p\n", pVCpu));
4976 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
4977 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
4978
4979#ifdef VBOX_STRICT
4980 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
4981 RTCCUINTREG uHostCr4 = ASMGetCR4();
4982 if (!(uHostCr4 & X86_CR4_VMXE))
4983 {
4984 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
4985 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4986 }
4987#endif
4988
4989 /*
4990 * Do the EMT scheduled L1D and MDS flush here if needed.
4991 */
4992 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
4993 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
4994 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
4995 hmR0MdsClear();
4996
4997 /*
4998 * Load the appropriate VMCS as the current and active one.
4999 */
5000 PVMXVMCSINFO pVmcsInfo;
5001 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5002 if (!fInNestedGuestMode)
5003 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5004 else
5005 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5006 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5007 if (RT_SUCCESS(rc))
5008 {
5009 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5010 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5011 pVCpu->hmr0.s.fLeaveDone = false;
5012 Log4Func(("Loaded %s Vmcs. HostCpuId=%u\n", fInNestedGuestMode ? "nested-guest" : "guest", RTMpCpuId()));
5013 }
5014 return rc;
5015}
5016
5017
5018/**
5019 * The thread-context callback.
5020 *
5021 * This is used together with RTThreadCtxHookCreate() on platforms which
5022 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5023 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5024 *
5025 * @param enmEvent The thread-context event.
5026 * @param pVCpu The cross context virtual CPU structure.
5027 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5028 * @thread EMT(pVCpu)
5029 */
5030VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5031{
5032 AssertPtr(pVCpu);
5033 RT_NOREF1(fGlobalInit);
5034
5035 switch (enmEvent)
5036 {
5037 case RTTHREADCTXEVENT_OUT:
5038 {
5039 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5040 VMCPU_ASSERT_EMT(pVCpu);
5041
5042 /* No longjmps (logger flushes, locks) in this fragile context. */
5043 VMMRZCallRing3Disable(pVCpu);
5044 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5045
5046 /* Restore host-state (FPU, debug etc.) */
5047 if (!pVCpu->hmr0.s.fLeaveDone)
5048 {
5049 /*
5050 * Do -not- import the guest-state here as we might already be in the middle of importing
5051 * it, esp. bad if we're holding the PGM lock, see comment at the end of vmxHCImportGuestStateEx().
5052 */
5053 hmR0VmxLeave(pVCpu, false /* fImportState */);
5054 pVCpu->hmr0.s.fLeaveDone = true;
5055 }
5056
5057 /* Leave HM context, takes care of local init (term). */
5058 int rc = HMR0LeaveCpu(pVCpu);
5059 AssertRC(rc);
5060
5061 /* Restore longjmp state. */
5062 VMMRZCallRing3Enable(pVCpu);
5063 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5064 break;
5065 }
5066
5067 case RTTHREADCTXEVENT_IN:
5068 {
5069 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5070 VMCPU_ASSERT_EMT(pVCpu);
5071
5072 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5073 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5074 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5075 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5076 hmR0MdsClear();
5077
5078 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5079 VMMRZCallRing3Disable(pVCpu);
5080 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5081
5082 /* Initialize the bare minimum state required for HM. This takes care of
5083 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5084 int rc = hmR0EnterCpu(pVCpu);
5085 AssertRC(rc);
5086 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5087 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5088
5089 /* Load the active VMCS as the current one. */
5090 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5091 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5092 AssertRC(rc);
5093 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5094 pVCpu->hmr0.s.fLeaveDone = false;
5095
5096 /* Restore longjmp state. */
5097 VMMRZCallRing3Enable(pVCpu);
5098 break;
5099 }
5100
5101 default:
5102 break;
5103 }
5104}
5105
5106
5107/**
5108 * Exports the host state into the VMCS host-state area.
5109 * Sets up the VM-exit MSR-load area.
5110 *
5111 * The CPU state will be loaded from these fields on every successful VM-exit.
5112 *
5113 * @returns VBox status code.
5114 * @param pVCpu The cross context virtual CPU structure.
5115 *
5116 * @remarks No-long-jump zone!!!
5117 */
5118static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5119{
5120 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5121
5122 int rc = VINF_SUCCESS;
5123 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5124 {
5125 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5126
5127 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5128 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5129
5130 hmR0VmxExportHostMsrs(pVCpu);
5131
5132 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5133 }
5134 return rc;
5135}
5136
5137
5138/**
5139 * Saves the host state in the VMCS host-state.
5140 *
5141 * @returns VBox status code.
5142 * @param pVCpu The cross context virtual CPU structure.
5143 *
5144 * @remarks No-long-jump zone!!!
5145 */
5146VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5147{
5148 AssertPtr(pVCpu);
5149 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5150
5151 /*
5152 * Export the host state here while entering HM context.
5153 * When thread-context hooks are used, we might get preempted and have to re-save the host
5154 * state but most of the time we won't be, so do it here before we disable interrupts.
5155 */
5156 return hmR0VmxExportHostState(pVCpu);
5157}
5158
5159
5160/**
5161 * Exports the guest state into the VMCS guest-state area.
5162 *
5163 * The will typically be done before VM-entry when the guest-CPU state and the
5164 * VMCS state may potentially be out of sync.
5165 *
5166 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5167 * VM-entry controls.
5168 * Sets up the appropriate VMX non-root function to execute guest code based on
5169 * the guest CPU mode.
5170 *
5171 * @returns VBox strict status code.
5172 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5173 * without unrestricted guest execution and the VMMDev is not presently
5174 * mapped (e.g. EFI32).
5175 *
5176 * @param pVCpu The cross context virtual CPU structure.
5177 * @param pVmxTransient The VMX-transient structure.
5178 *
5179 * @remarks No-long-jump zone!!!
5180 */
5181static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5182{
5183 AssertPtr(pVCpu);
5184 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5185 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5186
5187 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5188
5189 /*
5190 * Determine real-on-v86 mode.
5191 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5192 */
5193 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5194 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5195 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5196 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5197 else
5198 {
5199 Assert(!pVmxTransient->fIsNestedGuest);
5200 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5201 }
5202
5203 /*
5204 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5205 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5206 */
5207 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5208 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5209
5210 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5211 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5212
5213 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5214 if (rcStrict == VINF_SUCCESS)
5215 { /* likely */ }
5216 else
5217 {
5218 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5219 return rcStrict;
5220 }
5221
5222 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5223 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5224
5225 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5226 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5227
5228 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5229 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5230 vmxHCExportGuestRip(pVCpu);
5231 hmR0VmxExportGuestRsp(pVCpu);
5232 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5233
5234 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5235 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5236
5237 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5238 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5239 | HM_CHANGED_GUEST_CR2
5240 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5241 | HM_CHANGED_GUEST_X87
5242 | HM_CHANGED_GUEST_SSE_AVX
5243