VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 94439

Last change on this file since 94439 was 94011, checked in by vboxsync, 3 years ago

VMM/HMVMXR0: Removed obsolete comment.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 295.4 KB
Line 
1/* $Id: HMVMXR0.cpp 94011 2022-03-01 05:28:19Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#define VMCPU_INCL_CPUM_GST_CTX
24#include <iprt/x86.h>
25#include <iprt/asm-amd64-x86.h>
26#include <iprt/thread.h>
27#include <iprt/mem.h>
28#include <iprt/mp.h>
29
30#include <VBox/vmm/pdmapi.h>
31#include <VBox/vmm/dbgf.h>
32#include <VBox/vmm/iem.h>
33#include <VBox/vmm/iom.h>
34#include <VBox/vmm/tm.h>
35#include <VBox/vmm/em.h>
36#include <VBox/vmm/gim.h>
37#include <VBox/vmm/apic.h>
38#include "HMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include <VBox/vmm/hmvmxinline.h>
41#include "HMVMXR0.h"
42#include "VMXInternal.h"
43#include "dtrace/VBoxVMM.h"
44
45
46/*********************************************************************************************************************************
47* Defined Constants And Macros *
48*********************************************************************************************************************************/
49#ifdef DEBUG_ramshankar
50# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
51# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
52# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
53# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
54# define HMVMX_ALWAYS_CLEAN_TRANSIENT
55# define HMVMX_ALWAYS_CHECK_GUEST_STATE
56# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
57# define HMVMX_ALWAYS_TRAP_PF
58# define HMVMX_ALWAYS_FLUSH_TLB
59# define HMVMX_ALWAYS_SWAP_EFER
60#endif
61
62
63/*********************************************************************************************************************************
64* Structures and Typedefs *
65*********************************************************************************************************************************/
66/**
67 * VMX page allocation information.
68 */
69typedef struct
70{
71 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
72 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
73 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
74 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
75} VMXPAGEALLOCINFO;
76/** Pointer to VMX page-allocation info. */
77typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
78/** Pointer to a const VMX page-allocation info. */
79typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
80AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
81
82
83/*********************************************************************************************************************************
84* Internal Functions *
85*********************************************************************************************************************************/
86static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
87static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
88
89
90/**
91 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
92 * @returns @c true if it's part of LBR stack, @c false otherwise.
93 *
94 * @param pVM The cross context VM structure.
95 * @param idMsr The MSR.
96 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
97 * Optional, can be NULL.
98 *
99 * @remarks Must only be called when LBR is enabled.
100 */
101DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
102{
103 Assert(pVM->hmr0.s.vmx.fLbr);
104 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
105 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
106 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
107 if (idxMsr < cLbrStack)
108 {
109 if (pidxMsr)
110 *pidxMsr = idxMsr;
111 return true;
112 }
113 return false;
114}
115
116
117/**
118 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
119 * @returns @c true if it's part of LBR stack, @c false otherwise.
120 *
121 * @param pVM The cross context VM structure.
122 * @param idMsr The MSR.
123 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
124 * Optional, can be NULL.
125 *
126 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
127 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
128 */
129DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
130{
131 Assert(pVM->hmr0.s.vmx.fLbr);
132 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
133 {
134 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
135 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
136 if (idxMsr < cLbrStack)
137 {
138 if (pidxMsr)
139 *pidxMsr = idxMsr;
140 return true;
141 }
142 }
143 return false;
144}
145
146
147/**
148 * Gets the active (in use) VMCS info. object for the specified VCPU.
149 *
150 * This is either the guest or nested-guest VMCS info. and need not necessarily
151 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
152 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
153 * current VMCS while returning to ring-3. However, the VMCS info. object for that
154 * VMCS would still be active and returned here so that we could dump the VMCS
155 * fields to ring-3 for diagnostics. This function is thus only used to
156 * distinguish between the nested-guest or guest VMCS.
157 *
158 * @returns The active VMCS information.
159 * @param pVCpu The cross context virtual CPU structure.
160 *
161 * @thread EMT.
162 * @remarks This function may be called with preemption or interrupts disabled!
163 */
164DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
165{
166 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
167 return &pVCpu->hmr0.s.vmx.VmcsInfo;
168 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
169}
170
171
172/**
173 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
174 * area.
175 *
176 * @returns @c true if it's different, @c false otherwise.
177 * @param pVmcsInfo The VMCS info. object.
178 */
179DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
180{
181 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
182 && pVmcsInfo->pvGuestMsrStore);
183}
184
185
186/**
187 * Sets the given Processor-based VM-execution controls.
188 *
189 * @param pVmxTransient The VMX-transient structure.
190 * @param uProcCtls The Processor-based VM-execution controls to set.
191 */
192static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
193{
194 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
195 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
196 {
197 pVmcsInfo->u32ProcCtls |= uProcCtls;
198 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
199 AssertRC(rc);
200 }
201}
202
203
204/**
205 * Removes the given Processor-based VM-execution controls.
206 *
207 * @param pVCpu The cross context virtual CPU structure.
208 * @param pVmxTransient The VMX-transient structure.
209 * @param uProcCtls The Processor-based VM-execution controls to remove.
210 *
211 * @remarks When executing a nested-guest, this will not remove any of the specified
212 * controls if the nested hypervisor has set any one of them.
213 */
214static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if (pVmcsInfo->u32ProcCtls & uProcCtls)
218 {
219#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
220 if ( !pVmxTransient->fIsNestedGuest
221 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
222#else
223 NOREF(pVCpu);
224 if (!pVmxTransient->fIsNestedGuest)
225#endif
226 {
227 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
228 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
229 AssertRC(rc);
230 }
231 }
232}
233
234
235/**
236 * Sets the TSC offset for the current VMCS.
237 *
238 * @param uTscOffset The TSC offset to set.
239 * @param pVmcsInfo The VMCS info. object.
240 */
241static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
242{
243 if (pVmcsInfo->u64TscOffset != uTscOffset)
244 {
245 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
246 AssertRC(rc);
247 pVmcsInfo->u64TscOffset = uTscOffset;
248 }
249}
250
251
252/**
253 * Loads the VMCS specified by the VMCS info. object.
254 *
255 * @returns VBox status code.
256 * @param pVmcsInfo The VMCS info. object.
257 *
258 * @remarks Can be called with interrupts disabled.
259 */
260static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
261{
262 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
263 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
264
265 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
266 if (RT_SUCCESS(rc))
267 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
268 return rc;
269}
270
271
272/**
273 * Clears the VMCS specified by the VMCS info. object.
274 *
275 * @returns VBox status code.
276 * @param pVmcsInfo The VMCS info. object.
277 *
278 * @remarks Can be called with interrupts disabled.
279 */
280static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
281{
282 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
283 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
284
285 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
286 if (RT_SUCCESS(rc))
287 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
288 return rc;
289}
290
291
292/**
293 * Checks whether the MSR belongs to the set of guest MSRs that we restore
294 * lazily while leaving VT-x.
295 *
296 * @returns true if it does, false otherwise.
297 * @param pVCpu The cross context virtual CPU structure.
298 * @param idMsr The MSR to check.
299 */
300static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
301{
302 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
303 {
304 switch (idMsr)
305 {
306 case MSR_K8_LSTAR:
307 case MSR_K6_STAR:
308 case MSR_K8_SF_MASK:
309 case MSR_K8_KERNEL_GS_BASE:
310 return true;
311 }
312 }
313 return false;
314}
315
316
317/**
318 * Loads a set of guests MSRs to allow read/passthru to the guest.
319 *
320 * The name of this function is slightly confusing. This function does NOT
321 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
322 * common prefix for functions dealing with "lazy restoration" of the shared
323 * MSRs.
324 *
325 * @param pVCpu The cross context virtual CPU structure.
326 *
327 * @remarks No-long-jump zone!!!
328 */
329static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
330{
331 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
332 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
333
334 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
335 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
336 {
337 /*
338 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
339 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
340 * we can skip a few MSR writes.
341 *
342 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
343 * guest MSR values in the guest-CPU context might be different to what's currently
344 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
345 * CPU, see @bugref{8728}.
346 */
347 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
348 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
349 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
350 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
351 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
352 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
353 {
354#ifdef VBOX_STRICT
355 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
356 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
357 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
358 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
359#endif
360 }
361 else
362 {
363 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
364 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
365 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
366 /* The system call flag mask register isn't as benign and accepting of all
367 values as the above, so mask it to avoid #GP'ing on corrupted input. */
368 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
369 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
370 }
371 }
372 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
373}
374
375
376/**
377 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
378 *
379 * @returns @c true if found, @c false otherwise.
380 * @param pVmcsInfo The VMCS info. object.
381 * @param idMsr The MSR to find.
382 */
383static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
384{
385 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
386 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
387 Assert(pMsrs);
388 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
389 for (uint32_t i = 0; i < cMsrs; i++)
390 {
391 if (pMsrs[i].u32Msr == idMsr)
392 return true;
393 }
394 return false;
395}
396
397
398/**
399 * Performs lazy restoration of the set of host MSRs if they were previously
400 * loaded with guest MSR values.
401 *
402 * @param pVCpu The cross context virtual CPU structure.
403 *
404 * @remarks No-long-jump zone!!!
405 * @remarks The guest MSRs should have been saved back into the guest-CPU
406 * context by hmR0VmxImportGuestState()!!!
407 */
408static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
409{
410 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
411 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
412
413 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
414 {
415 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
416 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
417 {
418 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
419 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
420 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
421 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
422 }
423 }
424 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
425}
426
427
428/**
429 * Sets pfnStartVm to the best suited variant.
430 *
431 * This must be called whenever anything changes relative to the hmR0VmXStartVm
432 * variant selection:
433 * - pVCpu->hm.s.fLoadSaveGuestXcr0
434 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
435 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
436 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
437 * - Perhaps: CPUMCTX.fXStateMask (windows only)
438 *
439 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
440 * cannot be changed at runtime.
441 */
442static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
443{
444 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
445 {
446 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
447 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
448 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
449 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
450 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
451 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
452 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
453 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
454 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
455 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
456 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
457 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
458 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
459 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
460 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
461 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
462 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
463 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
464 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
465 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
466 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
467 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
468 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
469 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
470 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
471 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
472 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
473 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
474 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
475 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
476 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
477 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
478 };
479 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
480 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
481 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
482 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
483 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
484 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
485 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
486 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
487}
488
489
490/**
491 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
492 * stack.
493 *
494 * @returns Strict VBox status code (i.e. informational status codes too).
495 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
496 * @param pVCpu The cross context virtual CPU structure.
497 * @param uValue The value to push to the guest stack.
498 */
499static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
500{
501 /*
502 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
503 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
504 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
505 */
506 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
507 if (pCtx->sp == 1)
508 return VINF_EM_RESET;
509 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
510 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
511 AssertRC(rc);
512 return rc;
513}
514
515
516/**
517 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
518 * unreferenced local parameters in the template code...
519 */
520DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
521{
522 RT_NOREF(pVCpu);
523 return VMXWriteVmcs16(uFieldEnc, u16Val);
524}
525
526
527/**
528 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
529 * unreferenced local parameters in the template code...
530 */
531DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
532{
533 RT_NOREF(pVCpu);
534 return VMXWriteVmcs32(uFieldEnc, u32Val);
535}
536
537
538/**
539 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
540 * unreferenced local parameters in the template code...
541 */
542DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
543{
544 RT_NOREF(pVCpu);
545 return VMXWriteVmcs64(uFieldEnc, u64Val);
546}
547
548
549/**
550 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
551 * unreferenced local parameters in the template code...
552 */
553DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
554{
555 RT_NOREF(pVCpu);
556 return VMXReadVmcs16(uFieldEnc, pu16Val);
557}
558
559
560/**
561 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
562 * unreferenced local parameters in the template code...
563 */
564DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
565{
566 RT_NOREF(pVCpu);
567 return VMXReadVmcs32(uFieldEnc, pu32Val);
568}
569
570
571/**
572 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
573 * unreferenced local parameters in the template code...
574 */
575DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
576{
577 RT_NOREF(pVCpu);
578 return VMXReadVmcs64(uFieldEnc, pu64Val);
579}
580
581
582/*
583 * Instantiate the code we share with the NEM darwin backend.
584 */
585#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
586#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
587
588#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
589#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
590#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
591#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
592
593#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
594#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
595#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
596#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
597
598#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
599#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
600#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
601#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
602
603#include "../VMMAll/VMXAllTemplate.cpp.h"
604
605#undef VMX_VMCS_WRITE_16
606#undef VMX_VMCS_WRITE_32
607#undef VMX_VMCS_WRITE_64
608#undef VMX_VMCS_WRITE_NW
609
610#undef VMX_VMCS_READ_16
611#undef VMX_VMCS_READ_32
612#undef VMX_VMCS_READ_64
613#undef VMX_VMCS_READ_NW
614
615#undef VM_IS_VMX_PREEMPT_TIMER_USED
616#undef VM_IS_VMX_NESTED_PAGING
617#undef VM_IS_VMX_UNRESTRICTED_GUEST
618#undef VCPU_2_VMXSTATS
619#undef VCPU_2_VMXSTATE
620
621
622/**
623 * Updates the VM's last error record.
624 *
625 * If there was a VMX instruction error, reads the error data from the VMCS and
626 * updates VCPU's last error record as well.
627 *
628 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
629 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
630 * VERR_VMX_INVALID_VMCS_FIELD.
631 * @param rc The error code.
632 */
633static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
634{
635 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
636 || rc == VERR_VMX_UNABLE_TO_START_VM)
637 {
638 AssertPtrReturnVoid(pVCpu);
639 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
640 }
641 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
642}
643
644
645/**
646 * Enters VMX root mode operation on the current CPU.
647 *
648 * @returns VBox status code.
649 * @param pHostCpu The HM physical-CPU structure.
650 * @param pVM The cross context VM structure. Can be
651 * NULL, after a resume.
652 * @param HCPhysCpuPage Physical address of the VMXON region.
653 * @param pvCpuPage Pointer to the VMXON region.
654 */
655static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
656{
657 Assert(pHostCpu);
658 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
659 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
660 Assert(pvCpuPage);
661 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
662
663 if (pVM)
664 {
665 /* Write the VMCS revision identifier to the VMXON region. */
666 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
667 }
668
669 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
670 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
671
672 /* Enable the VMX bit in CR4 if necessary. */
673 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
674
675 /* Record whether VMXE was already prior to us enabling it above. */
676 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
677
678 /* Enter VMX root mode. */
679 int rc = VMXEnable(HCPhysCpuPage);
680 if (RT_FAILURE(rc))
681 {
682 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
683 if (!pHostCpu->fVmxeAlreadyEnabled)
684 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
685
686 if (pVM)
687 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
688 }
689
690 /* Restore interrupts. */
691 ASMSetFlags(fEFlags);
692 return rc;
693}
694
695
696/**
697 * Exits VMX root mode operation on the current CPU.
698 *
699 * @returns VBox status code.
700 * @param pHostCpu The HM physical-CPU structure.
701 */
702static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
703{
704 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
705
706 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
707 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
708
709 /* If we're for some reason not in VMX root mode, then don't leave it. */
710 RTCCUINTREG const uHostCr4 = ASMGetCR4();
711
712 int rc;
713 if (uHostCr4 & X86_CR4_VMXE)
714 {
715 /* Exit VMX root mode and clear the VMX bit in CR4. */
716 VMXDisable();
717
718 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
719 if (!pHostCpu->fVmxeAlreadyEnabled)
720 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
721
722 rc = VINF_SUCCESS;
723 }
724 else
725 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
726
727 /* Restore interrupts. */
728 ASMSetFlags(fEFlags);
729 return rc;
730}
731
732
733/**
734 * Allocates pages specified as specified by an array of VMX page allocation info
735 * objects.
736 *
737 * The pages contents are zero'd after allocation.
738 *
739 * @returns VBox status code.
740 * @param phMemObj Where to return the handle to the allocation.
741 * @param paAllocInfo The pointer to the first element of the VMX
742 * page-allocation info object array.
743 * @param cEntries The number of elements in the @a paAllocInfo array.
744 */
745static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
746{
747 *phMemObj = NIL_RTR0MEMOBJ;
748
749 /* Figure out how many pages to allocate. */
750 uint32_t cPages = 0;
751 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
752 cPages += !!paAllocInfo[iPage].fValid;
753
754 /* Allocate the pages. */
755 if (cPages)
756 {
757 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
758 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
759 if (RT_FAILURE(rc))
760 return rc;
761
762 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
763 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
764 RT_BZERO(pvFirstPage, cbPages);
765
766 uint32_t iPage = 0;
767 for (uint32_t i = 0; i < cEntries; i++)
768 if (paAllocInfo[i].fValid)
769 {
770 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
771 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
772 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
773 AssertPtr(pvPage);
774
775 Assert(paAllocInfo[iPage].pHCPhys);
776 Assert(paAllocInfo[iPage].ppVirt);
777 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
778 *paAllocInfo[iPage].ppVirt = pvPage;
779
780 /* Move to next page. */
781 ++iPage;
782 }
783
784 /* Make sure all valid (requested) pages have been assigned. */
785 Assert(iPage == cPages);
786 }
787 return VINF_SUCCESS;
788}
789
790
791/**
792 * Frees pages allocated using hmR0VmxPagesAllocZ.
793 *
794 * @param phMemObj Pointer to the memory object handle. Will be set to
795 * NIL.
796 */
797DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
798{
799 /* We can cleanup wholesale since it's all one allocation. */
800 if (*phMemObj != NIL_RTR0MEMOBJ)
801 {
802 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
803 *phMemObj = NIL_RTR0MEMOBJ;
804 }
805}
806
807
808/**
809 * Initializes a VMCS info. object.
810 *
811 * @param pVmcsInfo The VMCS info. object.
812 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
813 */
814static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
815{
816 RT_ZERO(*pVmcsInfo);
817 RT_ZERO(*pVmcsInfoShared);
818
819 pVmcsInfo->pShared = pVmcsInfoShared;
820 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
821 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
822 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
823 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
824 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
825 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
826 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
827 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
828 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
829 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
830 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
831 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
832}
833
834
835/**
836 * Frees the VT-x structures for a VMCS info. object.
837 *
838 * @param pVmcsInfo The VMCS info. object.
839 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
840 */
841static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
842{
843 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
844 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
845}
846
847
848/**
849 * Allocates the VT-x structures for a VMCS info. object.
850 *
851 * @returns VBox status code.
852 * @param pVCpu The cross context virtual CPU structure.
853 * @param pVmcsInfo The VMCS info. object.
854 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
855 *
856 * @remarks The caller is expected to take care of any and all allocation failures.
857 * This function will not perform any cleanup for failures half-way
858 * through.
859 */
860static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
861{
862 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
863
864 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
865 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
866 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
867 VMXPAGEALLOCINFO aAllocInfo[] =
868 {
869 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
870 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
871 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
872 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
873 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
874 };
875
876 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
877 if (RT_FAILURE(rc))
878 return rc;
879
880 /*
881 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
882 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
883 */
884 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
885 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
886 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
887 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
888
889 /*
890 * Get the virtual-APIC page rather than allocating them again.
891 */
892 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
893 {
894 if (!fIsNstGstVmcs)
895 {
896 if (PDMHasApic(pVM))
897 {
898 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
899 if (RT_FAILURE(rc))
900 return rc;
901 Assert(pVmcsInfo->pbVirtApic);
902 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
903 }
904 }
905 else
906 {
907 pVmcsInfo->pbVirtApic = &pVCpu->cpum.GstCtx.hwvirt.vmx.abVirtApicPage[0];
908 pVmcsInfo->HCPhysVirtApic = GVMMR0ConvertGVMPtr2HCPhys(pVM, pVmcsInfo->pbVirtApic);
909 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
910 }
911 }
912
913 return VINF_SUCCESS;
914}
915
916
917/**
918 * Free all VT-x structures for the VM.
919 *
920 * @returns IPRT status code.
921 * @param pVM The cross context VM structure.
922 */
923static void hmR0VmxStructsFree(PVMCC pVM)
924{
925 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
926#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
927 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
928 {
929 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
930 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
931 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
932 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
933 }
934#endif
935
936 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
937 {
938 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
939 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
940#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
941 if (pVM->cpum.ro.GuestFeatures.fVmx)
942 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
943#endif
944 }
945}
946
947
948/**
949 * Allocate all VT-x structures for the VM.
950 *
951 * @returns IPRT status code.
952 * @param pVM The cross context VM structure.
953 *
954 * @remarks This functions will cleanup on memory allocation failures.
955 */
956static int hmR0VmxStructsAlloc(PVMCC pVM)
957{
958 /*
959 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
960 * The VMCS size cannot be more than 4096 bytes.
961 *
962 * See Intel spec. Appendix A.1 "Basic VMX Information".
963 */
964 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
965 if (cbVmcs <= X86_PAGE_4K_SIZE)
966 { /* likely */ }
967 else
968 {
969 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
970 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
971 }
972
973 /*
974 * Allocate per-VM VT-x structures.
975 */
976 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
977 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
978 VMXPAGEALLOCINFO aAllocInfo[] =
979 {
980 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
981 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
982 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
983#ifdef VBOX_WITH_CRASHDUMP_MAGIC
984 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
985#endif
986 };
987
988 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
989 if (RT_SUCCESS(rc))
990 {
991#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
992 /* Allocate the shadow VMCS-fields array. */
993 if (fUseVmcsShadowing)
994 {
995 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
996 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
997 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
998 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
999 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1000 rc = VERR_NO_MEMORY;
1001 }
1002#endif
1003
1004 /*
1005 * Allocate per-VCPU VT-x structures.
1006 */
1007 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1008 {
1009 /* Allocate the guest VMCS structures. */
1010 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1011 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1012
1013#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1014 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1015 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1016 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1017#endif
1018 }
1019 if (RT_SUCCESS(rc))
1020 return VINF_SUCCESS;
1021 }
1022 hmR0VmxStructsFree(pVM);
1023 return rc;
1024}
1025
1026
1027/**
1028 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1029 *
1030 * @param pVM The cross context VM structure.
1031 */
1032static void hmR0VmxStructsInit(PVMCC pVM)
1033{
1034 /* Paranoia. */
1035 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1036#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1037 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1038#endif
1039
1040 /*
1041 * Initialize members up-front so we can cleanup en masse on allocation failures.
1042 */
1043#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1044 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1045#endif
1046 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1047 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1048 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1049 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1050 {
1051 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1052 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1053 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1054 }
1055}
1056
1057#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1058/**
1059 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1060 *
1061 * @returns @c true if the MSR is intercepted, @c false otherwise.
1062 * @param pbMsrBitmap The MSR bitmap.
1063 * @param offMsr The MSR byte offset.
1064 * @param iBit The bit offset from the byte offset.
1065 */
1066DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1067{
1068 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1069 return ASMBitTest(pbMsrBitmap + offMsr, iBit);
1070}
1071#endif
1072
1073/**
1074 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1075 *
1076 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1077 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1078 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1079 * the read/write access of this MSR.
1080 *
1081 * @param pVCpu The cross context virtual CPU structure.
1082 * @param pVmcsInfo The VMCS info. object.
1083 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1084 * @param idMsr The MSR value.
1085 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1086 * include both a read -and- a write permission!
1087 *
1088 * @sa CPUMGetVmxMsrPermission.
1089 * @remarks Can be called with interrupts disabled.
1090 */
1091static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1092{
1093 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1094 Assert(pbMsrBitmap);
1095 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1096
1097 /*
1098 * MSR-bitmap Layout:
1099 * Byte index MSR range Interpreted as
1100 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1101 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1102 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1103 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1104 *
1105 * A bit corresponding to an MSR within the above range causes a VM-exit
1106 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1107 * the MSR range, it always cause a VM-exit.
1108 *
1109 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1110 */
1111 uint16_t const offBitmapRead = 0;
1112 uint16_t const offBitmapWrite = 0x800;
1113 uint16_t offMsr;
1114 int32_t iBit;
1115 if (idMsr <= UINT32_C(0x00001fff))
1116 {
1117 offMsr = 0;
1118 iBit = idMsr;
1119 }
1120 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1121 {
1122 offMsr = 0x400;
1123 iBit = idMsr - UINT32_C(0xc0000000);
1124 }
1125 else
1126 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1127
1128 /*
1129 * Set the MSR read permission.
1130 */
1131 uint16_t const offMsrRead = offBitmapRead + offMsr;
1132 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1133 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1134 {
1135#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1136 bool const fClear = !fIsNstGstVmcs ? true
1137 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1138#else
1139 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1140 bool const fClear = true;
1141#endif
1142 if (fClear)
1143 ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
1144 }
1145 else
1146 ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
1147
1148 /*
1149 * Set the MSR write permission.
1150 */
1151 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1152 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1153 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1154 {
1155#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1156 bool const fClear = !fIsNstGstVmcs ? true
1157 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1158#else
1159 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1160 bool const fClear = true;
1161#endif
1162 if (fClear)
1163 ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
1164 }
1165 else
1166 ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
1167}
1168
1169
1170/**
1171 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1172 * area.
1173 *
1174 * @returns VBox status code.
1175 * @param pVCpu The cross context virtual CPU structure.
1176 * @param pVmcsInfo The VMCS info. object.
1177 * @param cMsrs The number of MSRs.
1178 */
1179static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1180{
1181 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1182 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1183 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1184 {
1185 /* Commit the MSR counts to the VMCS and update the cache. */
1186 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1187 {
1188 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1189 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1190 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1191 pVmcsInfo->cEntryMsrLoad = cMsrs;
1192 pVmcsInfo->cExitMsrStore = cMsrs;
1193 pVmcsInfo->cExitMsrLoad = cMsrs;
1194 }
1195 return VINF_SUCCESS;
1196 }
1197
1198 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1199 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1200 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1201}
1202
1203
1204/**
1205 * Adds a new (or updates the value of an existing) guest/host MSR
1206 * pair to be swapped during the world-switch as part of the
1207 * auto-load/store MSR area in the VMCS.
1208 *
1209 * @returns VBox status code.
1210 * @param pVCpu The cross context virtual CPU structure.
1211 * @param pVmxTransient The VMX-transient structure.
1212 * @param idMsr The MSR.
1213 * @param uGuestMsrValue Value of the guest MSR.
1214 * @param fSetReadWrite Whether to set the guest read/write access of this
1215 * MSR (thus not causing a VM-exit).
1216 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1217 * necessary.
1218 */
1219static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1220 bool fSetReadWrite, bool fUpdateHostMsr)
1221{
1222 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1223 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1224 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1225 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1226 uint32_t i;
1227
1228 /* Paranoia. */
1229 Assert(pGuestMsrLoad);
1230
1231#ifndef DEBUG_bird
1232 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1233#endif
1234
1235 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1236 for (i = 0; i < cMsrs; i++)
1237 {
1238 if (pGuestMsrLoad[i].u32Msr == idMsr)
1239 break;
1240 }
1241
1242 bool fAdded = false;
1243 if (i == cMsrs)
1244 {
1245 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1246 ++cMsrs;
1247 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1248 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1249
1250 /* Set the guest to read/write this MSR without causing VM-exits. */
1251 if ( fSetReadWrite
1252 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1253 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1254
1255 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1256 fAdded = true;
1257 }
1258
1259 /* Update the MSR value for the newly added or already existing MSR. */
1260 pGuestMsrLoad[i].u32Msr = idMsr;
1261 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1262
1263 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1264 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1265 {
1266 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1267 pGuestMsrStore[i].u32Msr = idMsr;
1268 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1269 }
1270
1271 /* Update the corresponding slot in the host MSR area. */
1272 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1273 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1274 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1275 pHostMsr[i].u32Msr = idMsr;
1276
1277 /*
1278 * Only if the caller requests to update the host MSR value AND we've newly added the
1279 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1280 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1281 *
1282 * We do this for performance reasons since reading MSRs may be quite expensive.
1283 */
1284 if (fAdded)
1285 {
1286 if (fUpdateHostMsr)
1287 {
1288 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1289 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1290 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1291 }
1292 else
1293 {
1294 /* Someone else can do the work. */
1295 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1296 }
1297 }
1298 return VINF_SUCCESS;
1299}
1300
1301
1302/**
1303 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1304 * auto-load/store MSR area in the VMCS.
1305 *
1306 * @returns VBox status code.
1307 * @param pVCpu The cross context virtual CPU structure.
1308 * @param pVmxTransient The VMX-transient structure.
1309 * @param idMsr The MSR.
1310 */
1311static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1312{
1313 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1314 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1315 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1316 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1317
1318#ifndef DEBUG_bird
1319 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1320#endif
1321
1322 for (uint32_t i = 0; i < cMsrs; i++)
1323 {
1324 /* Find the MSR. */
1325 if (pGuestMsrLoad[i].u32Msr == idMsr)
1326 {
1327 /*
1328 * If it's the last MSR, we only need to reduce the MSR count.
1329 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1330 */
1331 if (i < cMsrs - 1)
1332 {
1333 /* Remove it from the VM-entry MSR-load area. */
1334 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1335 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1336
1337 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1338 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1339 {
1340 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1341 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1342 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1343 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1344 }
1345
1346 /* Remove it from the VM-exit MSR-load area. */
1347 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1348 Assert(pHostMsr[i].u32Msr == idMsr);
1349 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1350 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1351 }
1352
1353 /* Reduce the count to reflect the removed MSR and bail. */
1354 --cMsrs;
1355 break;
1356 }
1357 }
1358
1359 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1360 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1361 {
1362 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1363 AssertRCReturn(rc, rc);
1364
1365 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1366 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1367 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1368
1369 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1370 return VINF_SUCCESS;
1371 }
1372
1373 return VERR_NOT_FOUND;
1374}
1375
1376
1377/**
1378 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1379 *
1380 * @param pVCpu The cross context virtual CPU structure.
1381 * @param pVmcsInfo The VMCS info. object.
1382 *
1383 * @remarks No-long-jump zone!!!
1384 */
1385static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1386{
1387 RT_NOREF(pVCpu);
1388 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1389
1390 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1391 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1392 Assert(pHostMsrLoad);
1393 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1394 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1395 for (uint32_t i = 0; i < cMsrs; i++)
1396 {
1397 /*
1398 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1399 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1400 */
1401 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1402 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1403 else
1404 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1405 }
1406}
1407
1408
1409/**
1410 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1411 * perform lazy restoration of the host MSRs while leaving VT-x.
1412 *
1413 * @param pVCpu The cross context virtual CPU structure.
1414 *
1415 * @remarks No-long-jump zone!!!
1416 */
1417static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1418{
1419 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1420
1421 /*
1422 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1423 */
1424 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1425 {
1426 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1427 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1428 {
1429 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1430 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1431 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1432 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1433 }
1434 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1435 }
1436}
1437
1438
1439#ifdef VBOX_STRICT
1440
1441/**
1442 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1443 *
1444 * @param pVmcsInfo The VMCS info. object.
1445 */
1446static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1447{
1448 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1449
1450 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1451 {
1452 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1453 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1454 uint64_t uVmcsEferMsrVmcs;
1455 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1456 AssertRC(rc);
1457
1458 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1459 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1460 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1461 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1462 }
1463}
1464
1465
1466/**
1467 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1468 * VMCS are correct.
1469 *
1470 * @param pVCpu The cross context virtual CPU structure.
1471 * @param pVmcsInfo The VMCS info. object.
1472 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1473 */
1474static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1475{
1476 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1477
1478 /* Read the various MSR-area counts from the VMCS. */
1479 uint32_t cEntryLoadMsrs;
1480 uint32_t cExitStoreMsrs;
1481 uint32_t cExitLoadMsrs;
1482 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1483 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1484 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1485
1486 /* Verify all the MSR counts are the same. */
1487 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1488 Assert(cExitStoreMsrs == cExitLoadMsrs);
1489 uint32_t const cMsrs = cExitLoadMsrs;
1490
1491 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1492 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1493
1494 /* Verify the MSR counts are within the allocated page size. */
1495 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1496
1497 /* Verify the relevant contents of the MSR areas match. */
1498 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1499 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1500 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1501 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1502 for (uint32_t i = 0; i < cMsrs; i++)
1503 {
1504 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1505 if (fSeparateExitMsrStorePage)
1506 {
1507 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1508 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1509 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1510 }
1511
1512 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1513 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1514 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1515
1516 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1517 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1518 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1519 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1520
1521 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1522 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1523 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1524 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1525
1526 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1527 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1528 {
1529 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1530 if (fIsEferMsr)
1531 {
1532 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1533 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1534 }
1535 else
1536 {
1537 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1538 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1539 if ( pVM->hmr0.s.vmx.fLbr
1540 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1541 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1542 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1543 {
1544 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1545 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1546 pGuestMsrLoad->u32Msr, cMsrs));
1547 }
1548 else if (!fIsNstGstVmcs)
1549 {
1550 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1551 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1552 }
1553 else
1554 {
1555 /*
1556 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1557 * execute a nested-guest with MSR passthrough.
1558 *
1559 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1560 * allow passthrough too.
1561 */
1562 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1563 Assert(pvMsrBitmapNstGst);
1564 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1565 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1566 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1567 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1568 }
1569 }
1570 }
1571
1572 /* Move to the next MSR. */
1573 pHostMsrLoad++;
1574 pGuestMsrLoad++;
1575 pGuestMsrStore++;
1576 }
1577}
1578
1579#endif /* VBOX_STRICT */
1580
1581/**
1582 * Flushes the TLB using EPT.
1583 *
1584 * @returns VBox status code.
1585 * @param pVCpu The cross context virtual CPU structure of the calling
1586 * EMT. Can be NULL depending on @a enmTlbFlush.
1587 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1588 * enmTlbFlush.
1589 * @param enmTlbFlush Type of flush.
1590 *
1591 * @remarks Caller is responsible for making sure this function is called only
1592 * when NestedPaging is supported and providing @a enmTlbFlush that is
1593 * supported by the CPU.
1594 * @remarks Can be called with interrupts disabled.
1595 */
1596static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1597{
1598 uint64_t au64Descriptor[2];
1599 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1600 au64Descriptor[0] = 0;
1601 else
1602 {
1603 Assert(pVCpu);
1604 Assert(pVmcsInfo);
1605 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1606 }
1607 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1608
1609 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1610 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1611
1612 if ( RT_SUCCESS(rc)
1613 && pVCpu)
1614 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1615}
1616
1617
1618/**
1619 * Flushes the TLB using VPID.
1620 *
1621 * @returns VBox status code.
1622 * @param pVCpu The cross context virtual CPU structure of the calling
1623 * EMT. Can be NULL depending on @a enmTlbFlush.
1624 * @param enmTlbFlush Type of flush.
1625 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1626 * on @a enmTlbFlush).
1627 *
1628 * @remarks Can be called with interrupts disabled.
1629 */
1630static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1631{
1632 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1633
1634 uint64_t au64Descriptor[2];
1635 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1636 {
1637 au64Descriptor[0] = 0;
1638 au64Descriptor[1] = 0;
1639 }
1640 else
1641 {
1642 AssertPtr(pVCpu);
1643 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1644 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1645 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1646 au64Descriptor[1] = GCPtr;
1647 }
1648
1649 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1650 AssertMsg(rc == VINF_SUCCESS,
1651 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1652
1653 if ( RT_SUCCESS(rc)
1654 && pVCpu)
1655 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1656 NOREF(rc);
1657}
1658
1659
1660/**
1661 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1662 * otherwise there is nothing really to invalidate.
1663 *
1664 * @returns VBox status code.
1665 * @param pVCpu The cross context virtual CPU structure.
1666 * @param GCVirt Guest virtual address of the page to invalidate.
1667 */
1668VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1669{
1670 AssertPtr(pVCpu);
1671 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1672
1673 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1674 {
1675 /*
1676 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1677 * the EPT case. See @bugref{6043} and @bugref{6177}.
1678 *
1679 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1680 * as this function maybe called in a loop with individual addresses.
1681 */
1682 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1683 if (pVM->hmr0.s.vmx.fVpid)
1684 {
1685 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1686 {
1687 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1688 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1689 }
1690 else
1691 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1692 }
1693 else if (pVM->hmr0.s.fNestedPaging)
1694 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1695 }
1696
1697 return VINF_SUCCESS;
1698}
1699
1700
1701/**
1702 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1703 * case where neither EPT nor VPID is supported by the CPU.
1704 *
1705 * @param pHostCpu The HM physical-CPU structure.
1706 * @param pVCpu The cross context virtual CPU structure.
1707 *
1708 * @remarks Called with interrupts disabled.
1709 */
1710static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1711{
1712 AssertPtr(pVCpu);
1713 AssertPtr(pHostCpu);
1714
1715 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1716
1717 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1718 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1719 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1720 pVCpu->hmr0.s.fForceTLBFlush = false;
1721 return;
1722}
1723
1724
1725/**
1726 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1727 *
1728 * @param pHostCpu The HM physical-CPU structure.
1729 * @param pVCpu The cross context virtual CPU structure.
1730 * @param pVmcsInfo The VMCS info. object.
1731 *
1732 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1733 * nomenclature. The reason is, to avoid confusion in compare statements
1734 * since the host-CPU copies are named "ASID".
1735 *
1736 * @remarks Called with interrupts disabled.
1737 */
1738static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1739{
1740#ifdef VBOX_WITH_STATISTICS
1741 bool fTlbFlushed = false;
1742# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1743# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1744 if (!fTlbFlushed) \
1745 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1746 } while (0)
1747#else
1748# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1749# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1750#endif
1751
1752 AssertPtr(pVCpu);
1753 AssertPtr(pHostCpu);
1754 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1755
1756 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1757 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1758 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1759 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1760
1761 /*
1762 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1763 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1764 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1765 * cannot reuse the current ASID anymore.
1766 */
1767 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1768 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1769 {
1770 ++pHostCpu->uCurrentAsid;
1771 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1772 {
1773 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1774 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1775 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1776 }
1777
1778 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1779 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1780 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1781
1782 /*
1783 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1784 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1785 */
1786 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1787 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1788 HMVMX_SET_TAGGED_TLB_FLUSHED();
1789 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1790 }
1791 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1792 {
1793 /*
1794 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1795 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1796 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1797 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1798 * mappings, see @bugref{6568}.
1799 *
1800 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1801 */
1802 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1803 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1804 HMVMX_SET_TAGGED_TLB_FLUSHED();
1805 }
1806 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1807 {
1808 /*
1809 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1810 * address which requires flushing the TLB of EPT cached structures.
1811 *
1812 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1813 */
1814 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1815 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1816 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1817 HMVMX_SET_TAGGED_TLB_FLUSHED();
1818 }
1819
1820
1821 pVCpu->hmr0.s.fForceTLBFlush = false;
1822 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1823
1824 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1825 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1826 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1827 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1828 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1829 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1830 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1831 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1832 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1833
1834 /* Update VMCS with the VPID. */
1835 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1836 AssertRC(rc);
1837
1838#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1839}
1840
1841
1842/**
1843 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1844 *
1845 * @param pHostCpu The HM physical-CPU structure.
1846 * @param pVCpu The cross context virtual CPU structure.
1847 * @param pVmcsInfo The VMCS info. object.
1848 *
1849 * @remarks Called with interrupts disabled.
1850 */
1851static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1852{
1853 AssertPtr(pVCpu);
1854 AssertPtr(pHostCpu);
1855 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1856 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1857 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1858
1859 /*
1860 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1861 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1862 */
1863 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1864 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1865 {
1866 pVCpu->hmr0.s.fForceTLBFlush = true;
1867 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1868 }
1869
1870 /* Check for explicit TLB flushes. */
1871 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1872 {
1873 pVCpu->hmr0.s.fForceTLBFlush = true;
1874 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1875 }
1876
1877 /* Check for TLB flushes while switching to/from a nested-guest. */
1878 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1879 {
1880 pVCpu->hmr0.s.fForceTLBFlush = true;
1881 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1882 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1883 }
1884
1885 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1886 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1887
1888 if (pVCpu->hmr0.s.fForceTLBFlush)
1889 {
1890 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1891 pVCpu->hmr0.s.fForceTLBFlush = false;
1892 }
1893}
1894
1895
1896/**
1897 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1898 *
1899 * @param pHostCpu The HM physical-CPU structure.
1900 * @param pVCpu The cross context virtual CPU structure.
1901 *
1902 * @remarks Called with interrupts disabled.
1903 */
1904static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1905{
1906 AssertPtr(pVCpu);
1907 AssertPtr(pHostCpu);
1908 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1909 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1910 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1911
1912 /*
1913 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1914 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1915 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1916 * cannot reuse the current ASID anymore.
1917 */
1918 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1919 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1920 {
1921 pVCpu->hmr0.s.fForceTLBFlush = true;
1922 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1923 }
1924
1925 /* Check for explicit TLB flushes. */
1926 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1927 {
1928 /*
1929 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1930 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1931 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1932 * include fExplicitFlush's too) - an obscure corner case.
1933 */
1934 pVCpu->hmr0.s.fForceTLBFlush = true;
1935 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1936 }
1937
1938 /* Check for TLB flushes while switching to/from a nested-guest. */
1939 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1940 {
1941 pVCpu->hmr0.s.fForceTLBFlush = true;
1942 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1943 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1944 }
1945
1946 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1947 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1948 if (pVCpu->hmr0.s.fForceTLBFlush)
1949 {
1950 ++pHostCpu->uCurrentAsid;
1951 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1952 {
1953 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1954 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1955 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1956 }
1957
1958 pVCpu->hmr0.s.fForceTLBFlush = false;
1959 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1960 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1961 if (pHostCpu->fFlushAsidBeforeUse)
1962 {
1963 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1964 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1965 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1966 {
1967 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1968 pHostCpu->fFlushAsidBeforeUse = false;
1969 }
1970 else
1971 {
1972 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1973 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1974 }
1975 }
1976 }
1977
1978 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1979 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1980 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1981 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1982 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1983 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1984 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1985
1986 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1987 AssertRC(rc);
1988}
1989
1990
1991/**
1992 * Flushes the guest TLB entry based on CPU capabilities.
1993 *
1994 * @param pHostCpu The HM physical-CPU structure.
1995 * @param pVCpu The cross context virtual CPU structure.
1996 * @param pVmcsInfo The VMCS info. object.
1997 *
1998 * @remarks Called with interrupts disabled.
1999 */
2000static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2001{
2002#ifdef HMVMX_ALWAYS_FLUSH_TLB
2003 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2004#endif
2005 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2006 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2007 {
2008 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2009 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2010 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2011 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2012 default:
2013 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2014 break;
2015 }
2016 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2017}
2018
2019
2020/**
2021 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2022 * TLB entries from the host TLB before VM-entry.
2023 *
2024 * @returns VBox status code.
2025 * @param pVM The cross context VM structure.
2026 */
2027static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2028{
2029 /*
2030 * Determine optimal flush type for nested paging.
2031 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2032 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2033 */
2034 if (pVM->hmr0.s.fNestedPaging)
2035 {
2036 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2037 {
2038 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2039 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2040 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2041 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2042 else
2043 {
2044 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2045 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2046 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2047 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2048 }
2049
2050 /* Make sure the write-back cacheable memory type for EPT is supported. */
2051 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2052 {
2053 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2054 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2055 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2056 }
2057
2058 /* EPT requires a page-walk length of 4. */
2059 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2060 {
2061 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2062 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2063 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2064 }
2065 }
2066 else
2067 {
2068 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2069 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2070 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2071 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2072 }
2073 }
2074
2075 /*
2076 * Determine optimal flush type for VPID.
2077 */
2078 if (pVM->hmr0.s.vmx.fVpid)
2079 {
2080 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2081 {
2082 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2083 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2084 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2085 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2086 else
2087 {
2088 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2089 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2090 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2091 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2092 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2093 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2094 pVM->hmr0.s.vmx.fVpid = false;
2095 }
2096 }
2097 else
2098 {
2099 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2100 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2101 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2102 pVM->hmr0.s.vmx.fVpid = false;
2103 }
2104 }
2105
2106 /*
2107 * Setup the handler for flushing tagged-TLBs.
2108 */
2109 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2110 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2111 else if (pVM->hmr0.s.fNestedPaging)
2112 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2113 else if (pVM->hmr0.s.vmx.fVpid)
2114 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2115 else
2116 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2117
2118
2119 /*
2120 * Copy out the result to ring-3.
2121 */
2122 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2123 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2124 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2125 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2126 return VINF_SUCCESS;
2127}
2128
2129
2130/**
2131 * Sets up the LBR MSR ranges based on the host CPU.
2132 *
2133 * @returns VBox status code.
2134 * @param pVM The cross context VM structure.
2135 *
2136 * @sa nemR3DarwinSetupLbrMsrRange
2137 */
2138static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2139{
2140 Assert(pVM->hmr0.s.vmx.fLbr);
2141 uint32_t idLbrFromIpMsrFirst;
2142 uint32_t idLbrFromIpMsrLast;
2143 uint32_t idLbrToIpMsrFirst;
2144 uint32_t idLbrToIpMsrLast;
2145 uint32_t idLbrTosMsr;
2146
2147 /*
2148 * Determine the LBR MSRs supported for this host CPU family and model.
2149 *
2150 * See Intel spec. 17.4.8 "LBR Stack".
2151 * See Intel "Model-Specific Registers" spec.
2152 */
2153 uint32_t const uFamilyModel = (pVM->cpum.ro.HostFeatures.uFamily << 8)
2154 | pVM->cpum.ro.HostFeatures.uModel;
2155 switch (uFamilyModel)
2156 {
2157 case 0x0f01: case 0x0f02:
2158 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2159 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2160 idLbrToIpMsrFirst = 0x0;
2161 idLbrToIpMsrLast = 0x0;
2162 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2163 break;
2164
2165 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2166 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2167 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2168 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2169 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2170 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2171 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2172 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2173 break;
2174
2175 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2176 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2177 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2178 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2179 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2180 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2181 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2182 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2183 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2184 break;
2185
2186 case 0x0617: case 0x061d: case 0x060f:
2187 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2188 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2189 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2190 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2191 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2192 break;
2193
2194 /* Atom and related microarchitectures we don't care about:
2195 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2196 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2197 case 0x0636: */
2198 /* All other CPUs: */
2199 default:
2200 {
2201 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2202 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2203 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2204 }
2205 }
2206
2207 /*
2208 * Validate.
2209 */
2210 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2211 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2212 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2213 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2214 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2215 {
2216 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2217 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2218 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2219 }
2220 NOREF(pVCpu0);
2221
2222 /*
2223 * Update the LBR info. to the VM struct. for use later.
2224 */
2225 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2226
2227 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2228 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2229
2230 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2231 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2232 return VINF_SUCCESS;
2233}
2234
2235
2236#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2237/**
2238 * Sets up the shadow VMCS fields arrays.
2239 *
2240 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2241 * executing the guest.
2242 *
2243 * @returns VBox status code.
2244 * @param pVM The cross context VM structure.
2245 */
2246static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2247{
2248 /*
2249 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2250 * when the host does not support it.
2251 */
2252 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2253 if ( !fGstVmwriteAll
2254 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2255 { /* likely. */ }
2256 else
2257 {
2258 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2259 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2260 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2261 }
2262
2263 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2264 uint32_t cRwFields = 0;
2265 uint32_t cRoFields = 0;
2266 for (uint32_t i = 0; i < cVmcsFields; i++)
2267 {
2268 VMXVMCSFIELD VmcsField;
2269 VmcsField.u = g_aVmcsFields[i];
2270
2271 /*
2272 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2273 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2274 * in the shadow VMCS fields array as they would be redundant.
2275 *
2276 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2277 * we must not include it in the shadow VMCS fields array. Guests attempting to
2278 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2279 * the required behavior.
2280 */
2281 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2282 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2283 {
2284 /*
2285 * Read-only fields are placed in a separate array so that while syncing shadow
2286 * VMCS fields later (which is more performance critical) we can avoid branches.
2287 *
2288 * However, if the guest can write to all fields (including read-only fields),
2289 * we treat it a as read/write field. Otherwise, writing to these fields would
2290 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2291 */
2292 if ( fGstVmwriteAll
2293 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2294 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2295 else
2296 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2297 }
2298 }
2299
2300 /* Update the counts. */
2301 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2302 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2303 return VINF_SUCCESS;
2304}
2305
2306
2307/**
2308 * Sets up the VMREAD and VMWRITE bitmaps.
2309 *
2310 * @param pVM The cross context VM structure.
2311 */
2312static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2313{
2314 /*
2315 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2316 */
2317 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2318 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2319 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2320 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2321 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2322
2323 /*
2324 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2325 * VMREAD and VMWRITE bitmaps.
2326 */
2327 {
2328 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2329 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2330 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2331 {
2332 uint32_t const uVmcsField = paShadowVmcsFields[i];
2333 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2334 Assert(uVmcsField >> 3 < cbBitmap);
2335 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2336 ASMBitClear(pbVmwriteBitmap + (uVmcsField >> 3), uVmcsField & 7);
2337 }
2338 }
2339
2340 /*
2341 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2342 * if the host supports VMWRITE to all supported VMCS fields.
2343 */
2344 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2345 {
2346 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2347 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2348 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2349 {
2350 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2351 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2352 Assert(uVmcsField >> 3 < cbBitmap);
2353 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2354 }
2355 }
2356}
2357#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2358
2359
2360/**
2361 * Sets up the virtual-APIC page address for the VMCS.
2362 *
2363 * @param pVmcsInfo The VMCS info. object.
2364 */
2365DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2366{
2367 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2368 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2369 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2370 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2371 AssertRC(rc);
2372}
2373
2374
2375/**
2376 * Sets up the MSR-bitmap address for the VMCS.
2377 *
2378 * @param pVmcsInfo The VMCS info. object.
2379 */
2380DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2381{
2382 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2383 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2384 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2385 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2386 AssertRC(rc);
2387}
2388
2389
2390/**
2391 * Sets up the APIC-access page address for the VMCS.
2392 *
2393 * @param pVCpu The cross context virtual CPU structure.
2394 */
2395DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2396{
2397 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2398 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2399 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2400 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2401 AssertRC(rc);
2402}
2403
2404#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2405
2406/**
2407 * Sets up the VMREAD bitmap address for the VMCS.
2408 *
2409 * @param pVCpu The cross context virtual CPU structure.
2410 */
2411DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2412{
2413 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2414 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2415 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2416 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2417 AssertRC(rc);
2418}
2419
2420
2421/**
2422 * Sets up the VMWRITE bitmap address for the VMCS.
2423 *
2424 * @param pVCpu The cross context virtual CPU structure.
2425 */
2426DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2427{
2428 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2429 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2430 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2431 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2432 AssertRC(rc);
2433}
2434
2435#endif
2436
2437/**
2438 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2439 * in the VMCS.
2440 *
2441 * @returns VBox status code.
2442 * @param pVmcsInfo The VMCS info. object.
2443 */
2444DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2445{
2446 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2447 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2448 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2449
2450 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2451 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2452 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2453
2454 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2455 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2456 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2457
2458 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2459 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2460 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2461 return VINF_SUCCESS;
2462}
2463
2464
2465/**
2466 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2467 *
2468 * @param pVCpu The cross context virtual CPU structure.
2469 * @param pVmcsInfo The VMCS info. object.
2470 */
2471static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2472{
2473 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2474
2475 /*
2476 * By default, ensure guest attempts to access any MSR cause VM-exits.
2477 * This shall later be relaxed for specific MSRs as necessary.
2478 *
2479 * Note: For nested-guests, the entire bitmap will be merged prior to
2480 * executing the nested-guest using hardware-assisted VMX and hence there
2481 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2482 */
2483 Assert(pVmcsInfo->pvMsrBitmap);
2484 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2485
2486 /*
2487 * The guest can access the following MSRs (read, write) without causing
2488 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2489 */
2490 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2491 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2492 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2493 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2494 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2495 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2496
2497 /*
2498 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2499 * associated with then. We never need to intercept access (writes need to be
2500 * executed without causing a VM-exit, reads will #GP fault anyway).
2501 *
2502 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2503 * read/write them. We swap the guest/host MSR value using the
2504 * auto-load/store MSR area.
2505 */
2506 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2507 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2508 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2509 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2510 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2511 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2512
2513 /*
2514 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2515 * required for 64-bit guests.
2516 */
2517 if (pVM->hmr0.s.fAllow64BitGuests)
2518 {
2519 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2520 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2521 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2522 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2523 }
2524
2525 /*
2526 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2527 */
2528#ifdef VBOX_STRICT
2529 Assert(pVmcsInfo->pvMsrBitmap);
2530 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2531 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2532#endif
2533}
2534
2535
2536/**
2537 * Sets up pin-based VM-execution controls in the VMCS.
2538 *
2539 * @returns VBox status code.
2540 * @param pVCpu The cross context virtual CPU structure.
2541 * @param pVmcsInfo The VMCS info. object.
2542 */
2543static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2544{
2545 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2546 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2547 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2548
2549 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2550 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2551
2552 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2553 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2554
2555 /* Enable the VMX-preemption timer. */
2556 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2557 {
2558 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2559 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2560 }
2561
2562#if 0
2563 /* Enable posted-interrupt processing. */
2564 if (pVM->hm.s.fPostedIntrs)
2565 {
2566 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2567 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2568 fVal |= VMX_PIN_CTLS_POSTED_INT;
2569 }
2570#endif
2571
2572 if ((fVal & fZap) != fVal)
2573 {
2574 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2575 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2576 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2577 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2578 }
2579
2580 /* Commit it to the VMCS and update our cache. */
2581 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2582 AssertRC(rc);
2583 pVmcsInfo->u32PinCtls = fVal;
2584
2585 return VINF_SUCCESS;
2586}
2587
2588
2589/**
2590 * Sets up secondary processor-based VM-execution controls in the VMCS.
2591 *
2592 * @returns VBox status code.
2593 * @param pVCpu The cross context virtual CPU structure.
2594 * @param pVmcsInfo The VMCS info. object.
2595 */
2596static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2597{
2598 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2599 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2600 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2601
2602 /* WBINVD causes a VM-exit. */
2603 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2604 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2605
2606 /* Enable EPT (aka nested-paging). */
2607 if (pVM->hmr0.s.fNestedPaging)
2608 fVal |= VMX_PROC_CTLS2_EPT;
2609
2610 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2611 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2612 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2613 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2614 fVal |= VMX_PROC_CTLS2_INVPCID;
2615
2616 /* Enable VPID. */
2617 if (pVM->hmr0.s.vmx.fVpid)
2618 fVal |= VMX_PROC_CTLS2_VPID;
2619
2620 /* Enable unrestricted guest execution. */
2621 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2622 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2623
2624#if 0
2625 if (pVM->hm.s.fVirtApicRegs)
2626 {
2627 /* Enable APIC-register virtualization. */
2628 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2629 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2630
2631 /* Enable virtual-interrupt delivery. */
2632 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2633 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2634 }
2635#endif
2636
2637 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2638 where the TPR shadow resides. */
2639 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2640 * done dynamically. */
2641 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2642 {
2643 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2644 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2645 }
2646
2647 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2648 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2649 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2650 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2651 fVal |= VMX_PROC_CTLS2_RDTSCP;
2652
2653 /* Enable Pause-Loop exiting. */
2654 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2655 && pVM->hm.s.vmx.cPleGapTicks
2656 && pVM->hm.s.vmx.cPleWindowTicks)
2657 {
2658 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2659
2660 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2661 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2662 }
2663
2664 if ((fVal & fZap) != fVal)
2665 {
2666 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2667 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2668 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2669 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2670 }
2671
2672 /* Commit it to the VMCS and update our cache. */
2673 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2674 AssertRC(rc);
2675 pVmcsInfo->u32ProcCtls2 = fVal;
2676
2677 return VINF_SUCCESS;
2678}
2679
2680
2681/**
2682 * Sets up processor-based VM-execution controls in the VMCS.
2683 *
2684 * @returns VBox status code.
2685 * @param pVCpu The cross context virtual CPU structure.
2686 * @param pVmcsInfo The VMCS info. object.
2687 */
2688static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2689{
2690 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2691 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2692 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2693
2694 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2695 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2696 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2697 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2698 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2699 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2700 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2701
2702 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2703 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2704 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2705 {
2706 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2707 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2708 }
2709
2710 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2711 if (!pVM->hmr0.s.fNestedPaging)
2712 {
2713 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2714 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2715 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2716 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2717 }
2718
2719 /* Use TPR shadowing if supported by the CPU. */
2720 if ( PDMHasApic(pVM)
2721 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2722 {
2723 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2724 /* CR8 writes cause a VM-exit based on TPR threshold. */
2725 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2726 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2727 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2728 }
2729 else
2730 {
2731 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2732 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2733 if (pVM->hmr0.s.fAllow64BitGuests)
2734 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2735 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2736 }
2737
2738 /* Use MSR-bitmaps if supported by the CPU. */
2739 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2740 {
2741 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2742 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2743 }
2744
2745 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2746 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2747 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2748
2749 if ((fVal & fZap) != fVal)
2750 {
2751 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2752 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2753 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2754 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2755 }
2756
2757 /* Commit it to the VMCS and update our cache. */
2758 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2759 AssertRC(rc);
2760 pVmcsInfo->u32ProcCtls = fVal;
2761
2762 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2763 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2764 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2765
2766 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2767 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2768 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2769
2770 /* Sanity check, should not really happen. */
2771 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2772 { /* likely */ }
2773 else
2774 {
2775 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2776 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2777 }
2778
2779 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2780 return VINF_SUCCESS;
2781}
2782
2783
2784/**
2785 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2786 * Processor-based VM-execution) control fields in the VMCS.
2787 *
2788 * @returns VBox status code.
2789 * @param pVCpu The cross context virtual CPU structure.
2790 * @param pVmcsInfo The VMCS info. object.
2791 */
2792static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2793{
2794#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2795 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2796 {
2797 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2798 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2799 }
2800#endif
2801
2802 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2803 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2804 AssertRC(rc);
2805
2806 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2807 if (RT_SUCCESS(rc))
2808 {
2809 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2810 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2811
2812 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2813 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2814
2815 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2816 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2817
2818 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2819 {
2820 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2821 AssertRC(rc);
2822 }
2823 return VINF_SUCCESS;
2824 }
2825 else
2826 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2827 return rc;
2828}
2829
2830
2831/**
2832 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2833 *
2834 * We shall setup those exception intercepts that don't change during the
2835 * lifetime of the VM here. The rest are done dynamically while loading the
2836 * guest state.
2837 *
2838 * @param pVCpu The cross context virtual CPU structure.
2839 * @param pVmcsInfo The VMCS info. object.
2840 */
2841static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2842{
2843 /*
2844 * The following exceptions are always intercepted:
2845 *
2846 * #AC - To prevent the guest from hanging the CPU and for dealing with
2847 * split-lock detecting host configs.
2848 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2849 * recursive #DBs can cause a CPU hang.
2850 * #PF - To sync our shadow page tables when nested-paging is not used.
2851 */
2852 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2853 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2854 | RT_BIT(X86_XCPT_DB)
2855 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2856
2857 /* Commit it to the VMCS. */
2858 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2859 AssertRC(rc);
2860
2861 /* Update our cache of the exception bitmap. */
2862 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2863}
2864
2865
2866#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2867/**
2868 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2869 *
2870 * @returns VBox status code.
2871 * @param pVmcsInfo The VMCS info. object.
2872 */
2873static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2874{
2875 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2876 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2877 AssertRC(rc);
2878
2879 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2880 if (RT_SUCCESS(rc))
2881 {
2882 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2883 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2884
2885 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2886 Assert(!pVmcsInfo->u64Cr0Mask);
2887 Assert(!pVmcsInfo->u64Cr4Mask);
2888 return VINF_SUCCESS;
2889 }
2890 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2891 return rc;
2892}
2893#endif
2894
2895
2896/**
2897 * Selector FNHMSVMVMRUN implementation.
2898 */
2899static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2900{
2901 hmR0VmxUpdateStartVmFunction(pVCpu);
2902 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2903}
2904
2905
2906/**
2907 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2908 * VMX.
2909 *
2910 * @returns VBox status code.
2911 * @param pVCpu The cross context virtual CPU structure.
2912 * @param pVmcsInfo The VMCS info. object.
2913 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2914 */
2915static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2916{
2917 Assert(pVmcsInfo->pvVmcs);
2918 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2919
2920 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2921 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2922 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2923
2924 LogFlowFunc(("\n"));
2925
2926 /*
2927 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2928 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2929 */
2930 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2931 if (RT_SUCCESS(rc))
2932 {
2933 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2934 if (RT_SUCCESS(rc))
2935 {
2936 /*
2937 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2938 * The host is always 64-bit since we no longer support 32-bit hosts.
2939 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2940 */
2941 if (!fIsNstGstVmcs)
2942 {
2943 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2944 if (RT_SUCCESS(rc))
2945 {
2946 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2947 if (RT_SUCCESS(rc))
2948 {
2949 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2950 if (RT_SUCCESS(rc))
2951 {
2952 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2953#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2954 /*
2955 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2956 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2957 * making it fit for use when VMCS shadowing is later enabled.
2958 */
2959 if (pVmcsInfo->pvShadowVmcs)
2960 {
2961 VMXVMCSREVID VmcsRevId;
2962 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2963 VmcsRevId.n.fIsShadowVmcs = 1;
2964 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2965 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2966 if (RT_SUCCESS(rc))
2967 { /* likely */ }
2968 else
2969 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2970 }
2971#endif
2972 }
2973 else
2974 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2975 }
2976 else
2977 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2978 }
2979 else
2980 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2981 }
2982 else
2983 {
2984#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2985 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
2986 if (RT_SUCCESS(rc))
2987 { /* likely */ }
2988 else
2989 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
2990#else
2991 AssertFailed();
2992#endif
2993 }
2994 }
2995 else
2996 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
2997 }
2998 else
2999 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3000
3001 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3002 if (RT_SUCCESS(rc))
3003 {
3004 rc = hmR0VmxClearVmcs(pVmcsInfo);
3005 if (RT_SUCCESS(rc))
3006 { /* likely */ }
3007 else
3008 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3009 }
3010
3011 /*
3012 * Update the last-error record both for failures and success, so we
3013 * can propagate the status code back to ring-3 for diagnostics.
3014 */
3015 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3016 NOREF(pszVmcs);
3017 return rc;
3018}
3019
3020
3021/**
3022 * Does global VT-x initialization (called during module initialization).
3023 *
3024 * @returns VBox status code.
3025 */
3026VMMR0DECL(int) VMXR0GlobalInit(void)
3027{
3028#ifdef HMVMX_USE_FUNCTION_TABLE
3029 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3030# ifdef VBOX_STRICT
3031 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3032 Assert(g_aVMExitHandlers[i].pfn);
3033# endif
3034#endif
3035 return VINF_SUCCESS;
3036}
3037
3038
3039/**
3040 * Does global VT-x termination (called during module termination).
3041 */
3042VMMR0DECL(void) VMXR0GlobalTerm()
3043{
3044 /* Nothing to do currently. */
3045}
3046
3047
3048/**
3049 * Sets up and activates VT-x on the current CPU.
3050 *
3051 * @returns VBox status code.
3052 * @param pHostCpu The HM physical-CPU structure.
3053 * @param pVM The cross context VM structure. Can be
3054 * NULL after a host resume operation.
3055 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3056 * fEnabledByHost is @c true).
3057 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3058 * @a fEnabledByHost is @c true).
3059 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3060 * enable VT-x on the host.
3061 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3062 */
3063VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3064 PCSUPHWVIRTMSRS pHwvirtMsrs)
3065{
3066 AssertPtr(pHostCpu);
3067 AssertPtr(pHwvirtMsrs);
3068 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3069
3070 /* Enable VT-x if it's not already enabled by the host. */
3071 if (!fEnabledByHost)
3072 {
3073 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3074 if (RT_FAILURE(rc))
3075 return rc;
3076 }
3077
3078 /*
3079 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3080 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3081 * invalidated when flushing by VPID.
3082 */
3083 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3084 {
3085 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3086 pHostCpu->fFlushAsidBeforeUse = false;
3087 }
3088 else
3089 pHostCpu->fFlushAsidBeforeUse = true;
3090
3091 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3092 ++pHostCpu->cTlbFlushes;
3093
3094 return VINF_SUCCESS;
3095}
3096
3097
3098/**
3099 * Deactivates VT-x on the current CPU.
3100 *
3101 * @returns VBox status code.
3102 * @param pHostCpu The HM physical-CPU structure.
3103 * @param pvCpuPage Pointer to the VMXON region.
3104 * @param HCPhysCpuPage Physical address of the VMXON region.
3105 *
3106 * @remarks This function should never be called when SUPR0EnableVTx() or
3107 * similar was used to enable VT-x on the host.
3108 */
3109VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3110{
3111 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3112
3113 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3114 return hmR0VmxLeaveRootMode(pHostCpu);
3115}
3116
3117
3118/**
3119 * Does per-VM VT-x initialization.
3120 *
3121 * @returns VBox status code.
3122 * @param pVM The cross context VM structure.
3123 */
3124VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3125{
3126 AssertPtr(pVM);
3127 LogFlowFunc(("pVM=%p\n", pVM));
3128
3129 hmR0VmxStructsInit(pVM);
3130 int rc = hmR0VmxStructsAlloc(pVM);
3131 if (RT_FAILURE(rc))
3132 {
3133 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3134 return rc;
3135 }
3136
3137 /* Setup the crash dump page. */
3138#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3139 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3140 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3141#endif
3142 return VINF_SUCCESS;
3143}
3144
3145
3146/**
3147 * Does per-VM VT-x termination.
3148 *
3149 * @returns VBox status code.
3150 * @param pVM The cross context VM structure.
3151 */
3152VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3153{
3154 AssertPtr(pVM);
3155 LogFlowFunc(("pVM=%p\n", pVM));
3156
3157#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3158 if (pVM->hmr0.s.vmx.pbScratch)
3159 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3160#endif
3161 hmR0VmxStructsFree(pVM);
3162 return VINF_SUCCESS;
3163}
3164
3165
3166/**
3167 * Sets up the VM for execution using hardware-assisted VMX.
3168 * This function is only called once per-VM during initialization.
3169 *
3170 * @returns VBox status code.
3171 * @param pVM The cross context VM structure.
3172 */
3173VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3174{
3175 AssertPtr(pVM);
3176 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3177
3178 LogFlowFunc(("pVM=%p\n", pVM));
3179
3180 /*
3181 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3182 * without causing a #GP.
3183 */
3184 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3185 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3186 { /* likely */ }
3187 else
3188 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3189
3190 /*
3191 * Check that nested paging is supported if enabled and copy over the flag to the
3192 * ring-0 only structure.
3193 */
3194 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3195 AssertReturn( !fNestedPaging
3196 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3197 VERR_INCOMPATIBLE_CONFIG);
3198 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3199 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3200
3201 /*
3202 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3203 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3204 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3205 */
3206 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3207 AssertReturn( !fUnrestrictedGuest
3208 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3209 && fNestedPaging),
3210 VERR_INCOMPATIBLE_CONFIG);
3211 if ( !fUnrestrictedGuest
3212 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3213 || !pVM->hm.s.vmx.pRealModeTSS))
3214 {
3215 LogRelFunc(("Invalid real-on-v86 state.\n"));
3216 return VERR_INTERNAL_ERROR;
3217 }
3218 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3219
3220 /* Initialize these always, see hmR3InitFinalizeR0().*/
3221 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3222 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3223
3224 /* Setup the tagged-TLB flush handlers. */
3225 int rc = hmR0VmxSetupTaggedTlb(pVM);
3226 if (RT_FAILURE(rc))
3227 {
3228 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3229 return rc;
3230 }
3231
3232 /* Determine LBR capabilities. */
3233 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3234 if (pVM->hmr0.s.vmx.fLbr)
3235 {
3236 rc = hmR0VmxSetupLbrMsrRange(pVM);
3237 if (RT_FAILURE(rc))
3238 {
3239 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3240 return rc;
3241 }
3242 }
3243
3244#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3245 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3246 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3247 {
3248 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3249 if (RT_SUCCESS(rc))
3250 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3251 else
3252 {
3253 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3254 return rc;
3255 }
3256 }
3257#endif
3258
3259 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3260 {
3261 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3262 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3263
3264 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3265
3266 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3267 if (RT_SUCCESS(rc))
3268 {
3269#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3270 if (pVM->cpum.ro.GuestFeatures.fVmx)
3271 {
3272 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3273 if (RT_SUCCESS(rc))
3274 { /* likely */ }
3275 else
3276 {
3277 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3278 return rc;
3279 }
3280 }
3281#endif
3282 }
3283 else
3284 {
3285 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3286 return rc;
3287 }
3288 }
3289
3290 return VINF_SUCCESS;
3291}
3292
3293
3294/**
3295 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3296 * the VMCS.
3297 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3298 */
3299static uint64_t hmR0VmxExportHostControlRegs(void)
3300{
3301 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3302 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3303 uint64_t uHostCr4 = ASMGetCR4();
3304 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3305 return uHostCr4;
3306}
3307
3308
3309/**
3310 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3311 * the host-state area in the VMCS.
3312 *
3313 * @returns VBox status code.
3314 * @param pVCpu The cross context virtual CPU structure.
3315 * @param uHostCr4 The host CR4 value.
3316 */
3317static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3318{
3319 /*
3320 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3321 * will be messed up. We should -not- save the messed up state without restoring
3322 * the original host-state, see @bugref{7240}.
3323 *
3324 * This apparently can happen (most likely the FPU changes), deal with it rather than
3325 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3326 */
3327 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3328 {
3329 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3330 pVCpu->idCpu));
3331 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3332 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3333 }
3334
3335 /*
3336 * Get all the host info.
3337 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3338 * without also checking the cpuid bit.
3339 */
3340 uint32_t fRestoreHostFlags;
3341#if RT_INLINE_ASM_EXTERNAL
3342 if (uHostCr4 & X86_CR4_FSGSBASE)
3343 {
3344 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3345 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3346 }
3347 else
3348 {
3349 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3350 fRestoreHostFlags = 0;
3351 }
3352 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3353 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3354 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3355 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3356#else
3357 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3358 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3359 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3360 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3361 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3362 if (uHostCr4 & X86_CR4_FSGSBASE)
3363 {
3364 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3365 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3366 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3367 }
3368 else
3369 {
3370 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3371 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3372 fRestoreHostFlags = 0;
3373 }
3374 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3375 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3376 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3377 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3378 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3379#endif
3380
3381 /*
3382 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3383 * gain VM-entry and restore them before we get preempted.
3384 *
3385 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3386 */
3387 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3388 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3389 {
3390 if (!(uSelAll & X86_SEL_LDT))
3391 {
3392#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3393 do { \
3394 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3395 if ((a_uVmcsVar) & X86_SEL_RPL) \
3396 { \
3397 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3398 (a_uVmcsVar) = 0; \
3399 } \
3400 } while (0)
3401 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3402 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3403 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3404 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3405#undef VMXLOCAL_ADJUST_HOST_SEG
3406 }
3407 else
3408 {
3409#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3410 do { \
3411 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3412 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3413 { \
3414 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3415 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3416 else \
3417 { \
3418 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3419 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3420 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3421 } \
3422 (a_uVmcsVar) = 0; \
3423 } \
3424 } while (0)
3425 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3426 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3427 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3428 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3429#undef VMXLOCAL_ADJUST_HOST_SEG
3430 }
3431 }
3432
3433 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3434 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3435 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3436 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3437 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3438 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3439 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3440 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3441
3442 /*
3443 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3444 * them to the maximum limit (0xffff) on every VM-exit.
3445 */
3446 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3447 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3448
3449 /*
3450 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3451 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3452 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3453 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3454 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3455 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3456 * at 0xffff on hosts where we are sure it won't cause trouble.
3457 */
3458#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3459 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3460#else
3461 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3462#endif
3463 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3464
3465 /*
3466 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3467 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3468 * RPL should be too in most cases.
3469 */
3470 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3471 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3472 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3473 VERR_VMX_INVALID_HOST_STATE);
3474
3475 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3476 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3477
3478 /*
3479 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3480 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3481 * restoration if the host has something else. Task switching is not supported in 64-bit
3482 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3483 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3484 *
3485 * [1] See Intel spec. 3.5 "System Descriptor Types".
3486 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3487 */
3488 Assert(pDesc->System.u4Type == 11);
3489 if ( pDesc->System.u16LimitLow != 0x67
3490 || pDesc->System.u4LimitHigh)
3491 {
3492 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3493
3494 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3495 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3496 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3497 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3498 {
3499 /* The GDT is read-only but the writable GDT is available. */
3500 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3501 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3502 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3503 AssertRCReturn(rc, rc);
3504 }
3505 }
3506
3507 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3508
3509 /*
3510 * Do all the VMCS updates in one block to assist nested virtualization.
3511 */
3512 int rc;
3513 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3514 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3515 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3516 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3517 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3518 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3519 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3520 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3521 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3522 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3523 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3524 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3525
3526 return VINF_SUCCESS;
3527}
3528
3529
3530/**
3531 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3532 * host-state area of the VMCS.
3533 *
3534 * These MSRs will be automatically restored on the host after every successful
3535 * VM-exit.
3536 *
3537 * @param pVCpu The cross context virtual CPU structure.
3538 *
3539 * @remarks No-long-jump zone!!!
3540 */
3541static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3542{
3543 AssertPtr(pVCpu);
3544
3545 /*
3546 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3547 * rather than swapping them on every VM-entry.
3548 */
3549 hmR0VmxLazySaveHostMsrs(pVCpu);
3550
3551 /*
3552 * Host Sysenter MSRs.
3553 */
3554 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3555 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3556 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3557
3558 /*
3559 * Host EFER MSR.
3560 *
3561 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3562 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3563 */
3564 if (g_fHmVmxSupportsVmcsEfer)
3565 {
3566 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3567 AssertRC(rc);
3568 }
3569
3570 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3571 * hmR0VmxExportGuestEntryExitCtls(). */
3572}
3573
3574
3575/**
3576 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3577 *
3578 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3579 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3580 *
3581 * @returns true if we need to load guest EFER, false otherwise.
3582 * @param pVCpu The cross context virtual CPU structure.
3583 * @param pVmxTransient The VMX-transient structure.
3584 *
3585 * @remarks Requires EFER, CR4.
3586 * @remarks No-long-jump zone!!!
3587 */
3588static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3589{
3590#ifdef HMVMX_ALWAYS_SWAP_EFER
3591 RT_NOREF2(pVCpu, pVmxTransient);
3592 return true;
3593#else
3594 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3595 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3596 uint64_t const u64GuestEfer = pCtx->msrEFER;
3597
3598# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3599 /*
3600 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3601 * the nested-guest.
3602 */
3603 if ( pVmxTransient->fIsNestedGuest
3604 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3605 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3606 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3607 return true;
3608# else
3609 RT_NOREF(pVmxTransient);
3610#endif
3611
3612 /*
3613 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3614 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3615 */
3616 if ( CPUMIsGuestInLongModeEx(pCtx)
3617 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3618 return true;
3619
3620 /*
3621 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3622 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3623 *
3624 * See Intel spec. 4.5 "IA-32e Paging".
3625 * See Intel spec. 4.1.1 "Three Paging Modes".
3626 *
3627 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3628 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3629 */
3630 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3631 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3632 if ( (pCtx->cr4 & X86_CR4_PAE)
3633 && (pCtx->cr0 & X86_CR0_PG))
3634 {
3635 /*
3636 * If nested paging is not used, verify that the guest paging mode matches the
3637 * shadow paging mode which is/will be placed in the VMCS (which is what will
3638 * actually be used while executing the guest and not the CR4 shadow value).
3639 */
3640 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3641 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3642 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3643 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3644 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3645 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3646 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3647 {
3648 /* Verify that the host is NX capable. */
3649 Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
3650 return true;
3651 }
3652 }
3653
3654 return false;
3655#endif
3656}
3657
3658
3659/**
3660 * Exports the guest's RSP into the guest-state area in the VMCS.
3661 *
3662 * @param pVCpu The cross context virtual CPU structure.
3663 *
3664 * @remarks No-long-jump zone!!!
3665 */
3666static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3667{
3668 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3669 {
3670 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3671
3672 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3673 AssertRC(rc);
3674
3675 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3676 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3677 }
3678}
3679
3680
3681/**
3682 * Exports the guest hardware-virtualization state.
3683 *
3684 * @returns VBox status code.
3685 * @param pVCpu The cross context virtual CPU structure.
3686 * @param pVmxTransient The VMX-transient structure.
3687 *
3688 * @remarks No-long-jump zone!!!
3689 */
3690static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3691{
3692 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3693 {
3694#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3695 /*
3696 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3697 * VMCS shadowing.
3698 */
3699 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3700 {
3701 /*
3702 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3703 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3704 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3705 *
3706 * We check for VMX root mode here in case the guest executes VMXOFF without
3707 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3708 * not clear the current VMCS pointer.
3709 */
3710 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3711 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3712 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3713 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3714 {
3715 /* Paranoia. */
3716 Assert(!pVmxTransient->fIsNestedGuest);
3717
3718 /*
3719 * For performance reasons, also check if the nested hypervisor's current VMCS
3720 * was newly loaded or modified before copying it to the shadow VMCS.
3721 */
3722 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3723 {
3724 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3725 AssertRCReturn(rc, rc);
3726 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3727 }
3728 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3729 }
3730 else
3731 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3732 }
3733#else
3734 NOREF(pVmxTransient);
3735#endif
3736 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3737 }
3738 return VINF_SUCCESS;
3739}
3740
3741
3742/**
3743 * Exports the guest debug registers into the guest-state area in the VMCS.
3744 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3745 *
3746 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3747 *
3748 * @returns VBox status code.
3749 * @param pVCpu The cross context virtual CPU structure.
3750 * @param pVmxTransient The VMX-transient structure.
3751 *
3752 * @remarks No-long-jump zone!!!
3753 */
3754static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3755{
3756 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3757
3758 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3759 * stepping. */
3760 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3761 if (pVmxTransient->fIsNestedGuest)
3762 {
3763 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3764 AssertRC(rc);
3765
3766 /*
3767 * We don't want to always intercept MOV DRx for nested-guests as it causes
3768 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3769 * Instead, they are strictly only requested when the nested hypervisor intercepts
3770 * them -- handled while merging VMCS controls.
3771 *
3772 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3773 * then the nested-guest debug state should be actively loaded on the host so that
3774 * nested-guest reads its own debug registers without causing VM-exits.
3775 */
3776 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3777 && !CPUMIsGuestDebugStateActive(pVCpu))
3778 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3779 return VINF_SUCCESS;
3780 }
3781
3782#ifdef VBOX_STRICT
3783 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3784 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3785 {
3786 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3787 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3788 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3789 }
3790#endif
3791
3792 bool fSteppingDB = false;
3793 bool fInterceptMovDRx = false;
3794 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3795 if (pVCpu->hm.s.fSingleInstruction)
3796 {
3797 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3798 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3799 {
3800 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3801 Assert(fSteppingDB == false);
3802 }
3803 else
3804 {
3805 pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
3806 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3807 pVCpu->hmr0.s.fClearTrapFlag = true;
3808 fSteppingDB = true;
3809 }
3810 }
3811
3812 uint64_t u64GuestDr7;
3813 if ( fSteppingDB
3814 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3815 {
3816 /*
3817 * Use the combined guest and host DRx values found in the hypervisor register set
3818 * because the hypervisor debugger has breakpoints active or someone is single stepping
3819 * on the host side without a monitor trap flag.
3820 *
3821 * Note! DBGF expects a clean DR6 state before executing guest code.
3822 */
3823 if (!CPUMIsHyperDebugStateActive(pVCpu))
3824 {
3825 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3826 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3827 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3828 }
3829
3830 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3831 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3832 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3833 fInterceptMovDRx = true;
3834 }
3835 else
3836 {
3837 /*
3838 * If the guest has enabled debug registers, we need to load them prior to
3839 * executing guest code so they'll trigger at the right time.
3840 */
3841 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3842 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3843 {
3844 if (!CPUMIsGuestDebugStateActive(pVCpu))
3845 {
3846 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3847 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3848 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3849 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3850 }
3851 Assert(!fInterceptMovDRx);
3852 }
3853 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3854 {
3855 /*
3856 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3857 * must intercept #DB in order to maintain a correct DR6 guest value, and
3858 * because we need to intercept it to prevent nested #DBs from hanging the
3859 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3860 */
3861 fInterceptMovDRx = true;
3862 }
3863
3864 /* Update DR7 with the actual guest value. */
3865 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3866 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3867 }
3868
3869 if (fInterceptMovDRx)
3870 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3871 else
3872 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3873
3874 /*
3875 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3876 * monitor-trap flag and update our cache.
3877 */
3878 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3879 {
3880 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3881 AssertRC(rc);
3882 pVmcsInfo->u32ProcCtls = uProcCtls;
3883 }
3884
3885 /*
3886 * Update guest DR7.
3887 */
3888 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3889 AssertRC(rc);
3890
3891 /*
3892 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3893 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3894 *
3895 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3896 */
3897 if (fSteppingDB)
3898 {
3899 Assert(pVCpu->hm.s.fSingleInstruction);
3900 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3901
3902 uint32_t fIntrState = 0;
3903 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3904 AssertRC(rc);
3905
3906 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3907 {
3908 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3909 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3910 AssertRC(rc);
3911 }
3912 }
3913
3914 return VINF_SUCCESS;
3915}
3916
3917
3918/**
3919 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3920 * areas.
3921 *
3922 * These MSRs will automatically be loaded to the host CPU on every successful
3923 * VM-entry and stored from the host CPU on every successful VM-exit.
3924 *
3925 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3926 * actual host MSR values are not- updated here for performance reasons. See
3927 * hmR0VmxExportHostMsrs().
3928 *
3929 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3930 *
3931 * @returns VBox status code.
3932 * @param pVCpu The cross context virtual CPU structure.
3933 * @param pVmxTransient The VMX-transient structure.
3934 *
3935 * @remarks No-long-jump zone!!!
3936 */
3937static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3938{
3939 AssertPtr(pVCpu);
3940 AssertPtr(pVmxTransient);
3941
3942 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3943 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3944
3945 /*
3946 * MSRs that we use the auto-load/store MSR area in the VMCS.
3947 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
3948 * nothing to do here. The host MSR values are updated when it's safe in
3949 * hmR0VmxLazySaveHostMsrs().
3950 *
3951 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
3952 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
3953 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
3954 * for any MSR that are not part of the lazy MSRs so we do not need to place
3955 * those MSRs into the auto-load/store MSR area. Nothing to do here.
3956 */
3957 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
3958 {
3959 /* No auto-load/store MSRs currently. */
3960 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
3961 }
3962
3963 /*
3964 * Guest Sysenter MSRs.
3965 */
3966 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
3967 {
3968 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
3969
3970 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
3971 {
3972 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
3973 AssertRC(rc);
3974 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
3975 }
3976
3977 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
3978 {
3979 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
3980 AssertRC(rc);
3981 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
3982 }
3983
3984 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
3985 {
3986 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
3987 AssertRC(rc);
3988 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
3989 }
3990 }
3991
3992 /*
3993 * Guest/host EFER MSR.
3994 */
3995 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
3996 {
3997 /* Whether we are using the VMCS to swap the EFER MSR must have been
3998 determined earlier while exporting VM-entry/VM-exit controls. */
3999 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4000 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4001
4002 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4003 {
4004 /*
4005 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4006 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4007 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4008 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4009 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4010 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4011 * during VM-entry.
4012 */
4013 uint64_t uGuestEferMsr = pCtx->msrEFER;
4014 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4015 {
4016 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4017 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4018 else
4019 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4020 }
4021
4022 /*
4023 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4024 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4025 */
4026 if (g_fHmVmxSupportsVmcsEfer)
4027 {
4028 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4029 AssertRC(rc);
4030 }
4031 else
4032 {
4033 /*
4034 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4035 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4036 */
4037 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4038 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4039 AssertRCReturn(rc, rc);
4040 }
4041
4042 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4043 }
4044 else if (!g_fHmVmxSupportsVmcsEfer)
4045 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4046
4047 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4048 }
4049
4050 /*
4051 * Other MSRs.
4052 */
4053 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4054 {
4055 /* Speculation Control (R/W). */
4056 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4057 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4058 {
4059 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4060 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4061 AssertRCReturn(rc, rc);
4062 }
4063
4064 /* Last Branch Record. */
4065 if (pVM->hmr0.s.vmx.fLbr)
4066 {
4067 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4068 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4069 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4070 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4071 Assert(cLbrStack <= 32);
4072 for (uint32_t i = 0; i < cLbrStack; i++)
4073 {
4074 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4075 pVmcsInfoShared->au64LbrFromIpMsr[i],
4076 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4077 AssertRCReturn(rc, rc);
4078
4079 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4080 if (idToIpMsrStart != 0)
4081 {
4082 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4083 pVmcsInfoShared->au64LbrToIpMsr[i],
4084 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4085 AssertRCReturn(rc, rc);
4086 }
4087 }
4088
4089 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4090 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4091 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4092 false /* fUpdateHostMsr */);
4093 AssertRCReturn(rc, rc);
4094 }
4095
4096 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4097 }
4098
4099 return VINF_SUCCESS;
4100}
4101
4102
4103/**
4104 * Wrapper for running the guest code in VT-x.
4105 *
4106 * @returns VBox status code, no informational status codes.
4107 * @param pVCpu The cross context virtual CPU structure.
4108 * @param pVmxTransient The VMX-transient structure.
4109 *
4110 * @remarks No-long-jump zone!!!
4111 */
4112DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4113{
4114 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4115 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4116
4117 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4118 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4119#ifdef VBOX_WITH_STATISTICS
4120 if (fResumeVM)
4121 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4122 else
4123 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4124#endif
4125 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4126 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4127 return rc;
4128}
4129
4130
4131/**
4132 * Reports world-switch error and dumps some useful debug info.
4133 *
4134 * @param pVCpu The cross context virtual CPU structure.
4135 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4136 * @param pVmxTransient The VMX-transient structure (only
4137 * exitReason updated).
4138 */
4139static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4140{
4141 Assert(pVCpu);
4142 Assert(pVmxTransient);
4143 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4144
4145 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4146 switch (rcVMRun)
4147 {
4148 case VERR_VMX_INVALID_VMXON_PTR:
4149 AssertFailed();
4150 break;
4151 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4152 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4153 {
4154 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4155 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4156 AssertRC(rc);
4157 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4158
4159 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4160 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4161 Cannot do it here as we may have been long preempted. */
4162
4163#ifdef VBOX_STRICT
4164 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4165 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4166 pVmxTransient->uExitReason));
4167 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4168 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4169 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4170 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4171 else
4172 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4173 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4174 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4175
4176 static struct
4177 {
4178 /** Name of the field to log. */
4179 const char *pszName;
4180 /** The VMCS field. */
4181 uint32_t uVmcsField;
4182 /** Whether host support of this field needs to be checked. */
4183 bool fCheckSupport;
4184 } const s_aVmcsFields[] =
4185 {
4186 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4187 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4188 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4189 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4190 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4191 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4192 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4193 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4194 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4195 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4196 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4197 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4198 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4199 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4200 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4201 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4202 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4203 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4204 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4205 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4206 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4207 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4208 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4209 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4210 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4211 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4212 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4213 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4214 /* The order of selector fields below are fixed! */
4215 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4216 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4217 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4218 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4219 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4220 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4221 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4222 /* End of ordered selector fields. */
4223 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4224 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4225 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4226 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4227 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4228 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4229 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4230 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4231 };
4232
4233 RTGDTR HostGdtr;
4234 ASMGetGDTR(&HostGdtr);
4235
4236 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4237 for (uint32_t i = 0; i < cVmcsFields; i++)
4238 {
4239 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4240
4241 bool fSupported;
4242 if (!s_aVmcsFields[i].fCheckSupport)
4243 fSupported = true;
4244 else
4245 {
4246 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4247 switch (uVmcsField)
4248 {
4249 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4250 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4251 case VMX_VMCS32_CTRL_PROC_EXEC2:
4252 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4253 break;
4254 default:
4255 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4256 }
4257 }
4258
4259 if (fSupported)
4260 {
4261 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4262 switch (uWidth)
4263 {
4264 case VMX_VMCSFIELD_WIDTH_16BIT:
4265 {
4266 uint16_t u16Val;
4267 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4268 AssertRC(rc);
4269 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4270
4271 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4272 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4273 {
4274 if (u16Val < HostGdtr.cbGdt)
4275 {
4276 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4277 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4278 "Host FS", "Host GS", "Host TR" };
4279 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4280 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4281 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4282 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4283 }
4284 else
4285 Log4((" Selector value exceeds GDT limit!\n"));
4286 }
4287 break;
4288 }
4289
4290 case VMX_VMCSFIELD_WIDTH_32BIT:
4291 {
4292 uint32_t u32Val;
4293 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4294 AssertRC(rc);
4295 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4296 break;
4297 }
4298
4299 case VMX_VMCSFIELD_WIDTH_64BIT:
4300 case VMX_VMCSFIELD_WIDTH_NATURAL:
4301 {
4302 uint64_t u64Val;
4303 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4304 AssertRC(rc);
4305 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4306 break;
4307 }
4308 }
4309 }
4310 }
4311
4312 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4313 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4314 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4315 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4316 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4317 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4318#endif /* VBOX_STRICT */
4319 break;
4320 }
4321
4322 default:
4323 /* Impossible */
4324 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4325 break;
4326 }
4327}
4328
4329
4330/**
4331 * Sets up the usage of TSC-offsetting and updates the VMCS.
4332 *
4333 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4334 * VMX-preemption timer.
4335 *
4336 * @returns VBox status code.
4337 * @param pVCpu The cross context virtual CPU structure.
4338 * @param pVmxTransient The VMX-transient structure.
4339 * @param idCurrentCpu The current CPU number.
4340 *
4341 * @remarks No-long-jump zone!!!
4342 */
4343static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4344{
4345 bool fOffsettedTsc;
4346 bool fParavirtTsc;
4347 uint64_t uTscOffset;
4348 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4349 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4350
4351 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4352 {
4353 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4354 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4355 uint64_t cTicksToDeadline;
4356 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4357 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4358 {
4359 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4360 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4361 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4362 if ((int64_t)cTicksToDeadline > 0)
4363 { /* hopefully */ }
4364 else
4365 {
4366 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4367 cTicksToDeadline = 0;
4368 }
4369 }
4370 else
4371 {
4372 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4373 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4374 &pVCpu->hmr0.s.vmx.uTscDeadline,
4375 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4376 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4377 if (cTicksToDeadline >= 128)
4378 { /* hopefully */ }
4379 else
4380 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4381 }
4382
4383 /* Make sure the returned values have sane upper and lower boundaries. */
4384 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4385 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4386 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4387 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4388
4389 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4390 * preemption timers here. We probably need to clamp the preemption timer,
4391 * after converting the timer value to the host. */
4392 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4393 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4394 AssertRC(rc);
4395 }
4396 else
4397 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4398
4399 if (fParavirtTsc)
4400 {
4401 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4402 information before every VM-entry, hence disable it for performance sake. */
4403#if 0
4404 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4405 AssertRC(rc);
4406#endif
4407 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4408 }
4409
4410 if ( fOffsettedTsc
4411 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4412 {
4413 if (pVmxTransient->fIsNestedGuest)
4414 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4415 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4416 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4417 }
4418 else
4419 {
4420 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4421 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4422 }
4423}
4424
4425
4426/**
4427 * Worker for VMXR0ImportStateOnDemand.
4428 *
4429 * @returns VBox status code.
4430 * @param pVCpu The cross context virtual CPU structure.
4431 * @param pVmcsInfo The VMCS info. object.
4432 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4433 */
4434static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
4435{
4436 int rc = VINF_SUCCESS;
4437 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4438 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4439 uint32_t u32Val;
4440
4441 /*
4442 * Note! This is hack to workaround a mysterious BSOD observed with release builds
4443 * on Windows 10 64-bit hosts. Profile and debug builds are not affected and
4444 * neither are other host platforms.
4445 *
4446 * Committing this temporarily as it prevents BSOD.
4447 *
4448 * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
4449 */
4450#ifdef RT_OS_WINDOWS
4451 if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
4452 return VERR_HM_IPE_1;
4453#endif
4454
4455 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
4456
4457 /*
4458 * We disable interrupts to make the updating of the state and in particular
4459 * the fExtrn modification atomic wrt to preemption hooks.
4460 */
4461 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
4462
4463 fWhat &= pCtx->fExtrn;
4464 if (fWhat)
4465 {
4466 do
4467 {
4468 if (fWhat & CPUMCTX_EXTRN_RIP)
4469 vmxHCImportGuestRip(pVCpu);
4470
4471 if (fWhat & CPUMCTX_EXTRN_RFLAGS)
4472 vmxHCImportGuestRFlags(pVCpu, pVmcsInfo);
4473
4474 if (fWhat & (CPUMCTX_EXTRN_INHIBIT_INT | CPUMCTX_EXTRN_INHIBIT_NMI))
4475 vmxHCImportGuestIntrState(pVCpu, pVmcsInfo);
4476
4477 if (fWhat & CPUMCTX_EXTRN_RSP)
4478 {
4479 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
4480 AssertRC(rc);
4481 }
4482
4483 if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
4484 {
4485 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4486 bool const fRealOnV86Active = pVmcsInfoShared->RealMode.fRealOnV86Active;
4487 if (fWhat & CPUMCTX_EXTRN_CS)
4488 {
4489 vmxHCImportGuestSegReg(pVCpu, X86_SREG_CS);
4490 vmxHCImportGuestRip(pVCpu);
4491 if (fRealOnV86Active)
4492 pCtx->cs.Attr.u = pVmcsInfoShared->RealMode.AttrCS.u;
4493 EMHistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
4494 }
4495 if (fWhat & CPUMCTX_EXTRN_SS)
4496 {
4497 vmxHCImportGuestSegReg(pVCpu, X86_SREG_SS);
4498 if (fRealOnV86Active)
4499 pCtx->ss.Attr.u = pVmcsInfoShared->RealMode.AttrSS.u;
4500 }
4501 if (fWhat & CPUMCTX_EXTRN_DS)
4502 {
4503 vmxHCImportGuestSegReg(pVCpu, X86_SREG_DS);
4504 if (fRealOnV86Active)
4505 pCtx->ds.Attr.u = pVmcsInfoShared->RealMode.AttrDS.u;
4506 }
4507 if (fWhat & CPUMCTX_EXTRN_ES)
4508 {
4509 vmxHCImportGuestSegReg(pVCpu, X86_SREG_ES);
4510 if (fRealOnV86Active)
4511 pCtx->es.Attr.u = pVmcsInfoShared->RealMode.AttrES.u;
4512 }
4513 if (fWhat & CPUMCTX_EXTRN_FS)
4514 {
4515 vmxHCImportGuestSegReg(pVCpu, X86_SREG_FS);
4516 if (fRealOnV86Active)
4517 pCtx->fs.Attr.u = pVmcsInfoShared->RealMode.AttrFS.u;
4518 }
4519 if (fWhat & CPUMCTX_EXTRN_GS)
4520 {
4521 vmxHCImportGuestSegReg(pVCpu, X86_SREG_GS);
4522 if (fRealOnV86Active)
4523 pCtx->gs.Attr.u = pVmcsInfoShared->RealMode.AttrGS.u;
4524 }
4525 }
4526
4527 if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
4528 {
4529 if (fWhat & CPUMCTX_EXTRN_LDTR)
4530 vmxHCImportGuestLdtr(pVCpu);
4531
4532 if (fWhat & CPUMCTX_EXTRN_GDTR)
4533 {
4534 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &pCtx->gdtr.pGdt); AssertRC(rc);
4535 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRC(rc);
4536 pCtx->gdtr.cbGdt = u32Val;
4537 }
4538
4539 /* Guest IDTR. */
4540 if (fWhat & CPUMCTX_EXTRN_IDTR)
4541 {
4542 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &pCtx->idtr.pIdt); AssertRC(rc);
4543 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRC(rc);
4544 pCtx->idtr.cbIdt = u32Val;
4545 }
4546
4547 /* Guest TR. */
4548 if (fWhat & CPUMCTX_EXTRN_TR)
4549 {
4550 /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
4551 don't need to import that one. */
4552 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
4553 vmxHCImportGuestTr(pVCpu);
4554 }
4555 }
4556
4557 if (fWhat & CPUMCTX_EXTRN_DR7)
4558 {
4559 if (!pVCpu->hmr0.s.fUsingHyperDR7)
4560 {
4561 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);
4562 AssertRC(rc);
4563 }
4564 }
4565
4566 if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
4567 {
4568 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); AssertRC(rc);
4569 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); AssertRC(rc);
4570 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRC(rc);
4571 pCtx->SysEnter.cs = u32Val;
4572 }
4573
4574 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
4575 {
4576 if ( pVM->hmr0.s.fAllow64BitGuests
4577 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4578 pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
4579 }
4580
4581 if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
4582 {
4583 if ( pVM->hmr0.s.fAllow64BitGuests
4584 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4585 {
4586 pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
4587 pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
4588 pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
4589 }
4590 }
4591
4592 if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
4593 {
4594 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4595 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
4596 uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
4597 Assert(pMsrs);
4598 Assert(cMsrs <= VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
4599 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
4600 for (uint32_t i = 0; i < cMsrs; i++)
4601 {
4602 uint32_t const idMsr = pMsrs[i].u32Msr;
4603 switch (idMsr)
4604 {
4605 case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value); break;
4606 case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value); break;
4607 case MSR_K6_EFER: /* Can't be changed without causing a VM-exit */ break;
4608 default:
4609 {
4610 uint32_t idxLbrMsr;
4611 if (pVM->hmr0.s.vmx.fLbr)
4612 {
4613 if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
4614 {
4615 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4616 pVmcsInfoShared->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4617 break;
4618 }
4619 if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
4620 {
4621 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4622 pVmcsInfoShared->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4623 break;
4624 }
4625 if (idMsr == pVM->hmr0.s.vmx.idLbrTosMsr)
4626 {
4627 pVmcsInfoShared->u64LbrTosMsr = pMsrs[i].u64Value;
4628 break;
4629 }
4630 /* Fallthru (no break) */
4631 }
4632 pCtx->fExtrn = 0;
4633 pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
4634 ASMSetFlags(fEFlags);
4635 AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
4636 return VERR_HM_UNEXPECTED_LD_ST_MSR;
4637 }
4638 }
4639 }
4640 }
4641
4642 if (fWhat & CPUMCTX_EXTRN_CR_MASK)
4643 {
4644 if (fWhat & CPUMCTX_EXTRN_CR0)
4645 {
4646 uint64_t u64Cr0;
4647 uint64_t u64Shadow;
4648 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Cr0); AssertRC(rc);
4649 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow); AssertRC(rc);
4650#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4651 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4652 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4653#else
4654 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4655 {
4656 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4657 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4658 }
4659 else
4660 {
4661 /*
4662 * We've merged the guest and nested-guest's CR0 guest/host mask while executing
4663 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4664 * re-construct CR0. See @bugref{9180#c95} for details.
4665 */
4666 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4667 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4668 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4669 | (pVmcsNstGst->u64GuestCr0.u & pVmcsNstGst->u64Cr0Mask.u)
4670 | (u64Shadow & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
4671 }
4672#endif
4673 VMMRZCallRing3Disable(pVCpu); /* May call into PGM which has Log statements. */
4674 CPUMSetGuestCR0(pVCpu, u64Cr0);
4675 VMMRZCallRing3Enable(pVCpu);
4676 }
4677
4678 if (fWhat & CPUMCTX_EXTRN_CR4)
4679 {
4680 uint64_t u64Cr4;
4681 uint64_t u64Shadow;
4682 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64Cr4); AssertRC(rc);
4683 rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow); AssertRC(rc);
4684#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4685 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4686 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4687#else
4688 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4689 {
4690 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4691 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4692 }
4693 else
4694 {
4695 /*
4696 * We've merged the guest and nested-guest's CR4 guest/host mask while executing
4697 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4698 * re-construct CR4. See @bugref{9180#c95} for details.
4699 */
4700 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4701 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4702 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4703 | (pVmcsNstGst->u64GuestCr4.u & pVmcsNstGst->u64Cr4Mask.u)
4704 | (u64Shadow & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
4705 }
4706#endif
4707 pCtx->cr4 = u64Cr4;
4708 }
4709
4710 if (fWhat & CPUMCTX_EXTRN_CR3)
4711 {
4712 /* CR0.PG bit changes are always intercepted, so it's up to date. */
4713 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
4714 || ( pVM->hmr0.s.fNestedPaging
4715 && CPUMIsGuestPagingEnabledEx(pCtx)))
4716 {
4717 uint64_t u64Cr3;
4718 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3); AssertRC(rc);
4719 if (pCtx->cr3 != u64Cr3)
4720 {
4721 pCtx->cr3 = u64Cr3;
4722 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4723 }
4724
4725 /*
4726 * If the guest is in PAE mode, sync back the PDPE's into the guest state.
4727 * CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date.
4728 */
4729 if (CPUMIsGuestInPAEModeEx(pCtx))
4730 {
4731 X86PDPE aPaePdpes[4];
4732 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPaePdpes[0].u); AssertRC(rc);
4733 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPaePdpes[1].u); AssertRC(rc);
4734 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPaePdpes[2].u); AssertRC(rc);
4735 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPaePdpes[3].u); AssertRC(rc);
4736 if (memcmp(&aPaePdpes[0], &pCtx->aPaePdpes[0], sizeof(aPaePdpes)))
4737 {
4738 memcpy(&pCtx->aPaePdpes[0], &aPaePdpes[0], sizeof(aPaePdpes));
4739 /* PGM now updates PAE PDPTEs while updating CR3. */
4740 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4741 }
4742 }
4743 }
4744 }
4745 }
4746
4747#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4748 if (fWhat & CPUMCTX_EXTRN_HWVIRT)
4749 {
4750 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
4751 && !CPUMIsGuestInVmxNonRootMode(pCtx))
4752 {
4753 Assert(CPUMIsGuestInVmxRootMode(pCtx));
4754 rc = vmxHCCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
4755 if (RT_SUCCESS(rc))
4756 { /* likely */ }
4757 else
4758 break;
4759 }
4760 }
4761#endif
4762 } while (0);
4763
4764 if (RT_SUCCESS(rc))
4765 {
4766 /* Update fExtrn. */
4767 pCtx->fExtrn &= ~fWhat;
4768
4769 /* If everything has been imported, clear the HM keeper bit. */
4770 if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
4771 {
4772 pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
4773 Assert(!pCtx->fExtrn);
4774 }
4775 }
4776 }
4777 else
4778 AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
4779
4780 /*
4781 * Restore interrupts.
4782 */
4783 ASMSetFlags(fEFlags);
4784
4785 STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
4786
4787 if (RT_SUCCESS(rc))
4788 { /* likely */ }
4789 else
4790 return rc;
4791
4792 /*
4793 * Honor any pending CR3 updates.
4794 *
4795 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
4796 * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
4797 * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
4798 *
4799 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
4800 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
4801 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
4802 * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
4803 *
4804 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
4805 *
4806 * The force-flag is checked first as it's cheaper for potential superfluous calls to this function.
4807 */
4808 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)
4809 && VMMRZCallRing3IsEnabled(pVCpu))
4810 {
4811 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
4812 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
4813 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
4814 }
4815
4816 return VINF_SUCCESS;
4817}
4818
4819
4820/**
4821 * Saves the guest state from the VMCS into the guest-CPU context.
4822 *
4823 * @returns VBox status code.
4824 * @param pVCpu The cross context virtual CPU structure.
4825 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4826 */
4827VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4828{
4829 AssertPtr(pVCpu);
4830 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4831 return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
4832}
4833
4834
4835/**
4836 * Gets VMX VM-exit auxiliary information.
4837 *
4838 * @returns VBox status code.
4839 * @param pVCpu The cross context virtual CPU structure.
4840 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4841 * @param fWhat What to fetch, HMVMX_READ_XXX.
4842 */
4843VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4844{
4845 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4846 if (RT_LIKELY(pVmxTransient))
4847 {
4848 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4849 fWhat &= ~pVmxTransient->fVmcsFieldsRead;
4850
4851 /* The exit reason is always available. */
4852 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4853
4854 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4855 {
4856 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4857 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4858 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4859 }
4860
4861 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4862 {
4863 vmxHCReadIdtVectoringInfoVmcs(pVCpu, pVmxTransient);
4864 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4865 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4866 }
4867
4868 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4869 {
4870 vmxHCReadIdtVectoringErrorCodeVmcs(pVCpu, pVmxTransient);
4871 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4872 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4873 }
4874
4875 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4876 {
4877 vmxHCReadExitInstrLenVmcs(pVCpu, pVmxTransient);
4878 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4879 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4880 }
4881
4882 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4883 {
4884 vmxHCReadExitIntInfoVmcs(pVCpu, pVmxTransient);
4885 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4886 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4887 }
4888
4889 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4890 {
4891 vmxHCReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
4892 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4893 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4894 }
4895
4896 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4897 {
4898 vmxHCReadExitInstrInfoVmcs(pVCpu, pVmxTransient);
4899 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4900 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4901 }
4902
4903 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4904 {
4905 vmxHCReadGuestLinearAddrVmcs(pVCpu, pVmxTransient);
4906 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4907 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4908 }
4909
4910 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4911 {
4912 vmxHCReadGuestPhysicalAddrVmcs(pVCpu, pVmxTransient);
4913 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4914 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4915 }
4916
4917 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4918 {
4919 vmxHCReadGuestPendingDbgXctps(pVCpu, pVmxTransient);
4920 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4921 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4922 }
4923
4924 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4925 return VINF_SUCCESS;
4926 }
4927 return VERR_NOT_AVAILABLE;
4928}
4929
4930
4931/**
4932 * Does the necessary state syncing before returning to ring-3 for any reason
4933 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4934 *
4935 * @returns VBox status code.
4936 * @param pVCpu The cross context virtual CPU structure.
4937 * @param fImportState Whether to import the guest state from the VMCS back
4938 * to the guest-CPU context.
4939 *
4940 * @remarks No-long-jmp zone!!!
4941 */
4942static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4943{
4944 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4945 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4946
4947 RTCPUID const idCpu = RTMpCpuId();
4948 Log4Func(("HostCpuId=%u\n", idCpu));
4949
4950 /*
4951 * !!! IMPORTANT !!!
4952 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4953 */
4954
4955 /* Save the guest state if necessary. */
4956 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4957 if (fImportState)
4958 {
4959 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4960 AssertRCReturn(rc, rc);
4961 }
4962
4963 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4964 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4965 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4966
4967 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4968#ifdef VBOX_STRICT
4969 if (CPUMIsHyperDebugStateActive(pVCpu))
4970 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
4971#endif
4972 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4973 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4974 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4975
4976 /* Restore host-state bits that VT-x only restores partially. */
4977 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4978 {
4979 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4980 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4981 }
4982 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4983
4984 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4985 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4986 {
4987 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4988 if (!fImportState)
4989 {
4990 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4991 AssertRCReturn(rc, rc);
4992 }
4993 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4994 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4995 }
4996 else
4997 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4998
4999 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5000 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5001
5002 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
5003 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
5004 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
5005 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
5006 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
5007 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
5008 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
5009 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
5010 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
5011 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5012
5013 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5014
5015 /** @todo This partially defeats the purpose of having preemption hooks.
5016 * The problem is, deregistering the hooks should be moved to a place that
5017 * lasts until the EMT is about to be destroyed not everytime while leaving HM
5018 * context.
5019 */
5020 int rc = hmR0VmxClearVmcs(pVmcsInfo);
5021 AssertRCReturn(rc, rc);
5022
5023#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5024 /*
5025 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
5026 * clear a shadow VMCS before allowing that VMCS to become active on another
5027 * logical processor. We may or may not be importing guest state which clears
5028 * it, so cover for it here.
5029 *
5030 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
5031 */
5032 if ( pVmcsInfo->pvShadowVmcs
5033 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
5034 {
5035 rc = vmxHCClearShadowVmcs(pVmcsInfo);
5036 AssertRCReturn(rc, rc);
5037 }
5038
5039 /*
5040 * Flag that we need to re-export the host state if we switch to this VMCS before
5041 * executing guest or nested-guest code.
5042 */
5043 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
5044#endif
5045
5046 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
5047 NOREF(idCpu);
5048 return VINF_SUCCESS;
5049}
5050
5051
5052/**
5053 * Leaves the VT-x session.
5054 *
5055 * @returns VBox status code.
5056 * @param pVCpu The cross context virtual CPU structure.
5057 *
5058 * @remarks No-long-jmp zone!!!
5059 */
5060static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
5061{
5062 HM_DISABLE_PREEMPT(pVCpu);
5063 HMVMX_ASSERT_CPU_SAFE(pVCpu);
5064 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5065 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5066
5067 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
5068 and done this from the VMXR0ThreadCtxCallback(). */
5069 if (!pVCpu->hmr0.s.fLeaveDone)
5070 {
5071 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
5072 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
5073 pVCpu->hmr0.s.fLeaveDone = true;
5074 }
5075 Assert(!pVCpu->cpum.GstCtx.fExtrn);
5076
5077 /*
5078 * !!! IMPORTANT !!!
5079 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
5080 */
5081
5082 /* Deregister hook now that we've left HM context before re-enabling preemption. */
5083 /** @todo Deregistering here means we need to VMCLEAR always
5084 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
5085 * for calling VMMR0ThreadCtxHookDisable here! */
5086 VMMR0ThreadCtxHookDisable(pVCpu);
5087
5088 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
5089 int rc = HMR0LeaveCpu(pVCpu);
5090 HM_RESTORE_PREEMPT();
5091 return rc;
5092}
5093
5094
5095/**
5096 * Take necessary actions before going back to ring-3.
5097 *
5098 * An action requires us to go back to ring-3. This function does the necessary
5099 * steps before we can safely return to ring-3. This is not the same as longjmps
5100 * to ring-3, this is voluntary and prepares the guest so it may continue
5101 * executing outside HM (recompiler/IEM).
5102 *
5103 * @returns VBox status code.
5104 * @param pVCpu The cross context virtual CPU structure.
5105 * @param rcExit The reason for exiting to ring-3. Can be
5106 * VINF_VMM_UNKNOWN_RING3_CALL.
5107 */
5108static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
5109{
5110 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5111
5112 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5113 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
5114 {
5115 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
5116 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
5117 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
5118 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
5119 }
5120
5121 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
5122 VMMRZCallRing3Disable(pVCpu);
5123 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
5124
5125 /*
5126 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
5127 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
5128 *
5129 * This is because execution may continue from ring-3 and we would need to inject
5130 * the event from there (hence place it back in TRPM).
5131 */
5132 if (pVCpu->hm.s.Event.fPending)
5133 {
5134 vmxHCPendingEventToTrpmTrap(pVCpu);
5135 Assert(!pVCpu->hm.s.Event.fPending);
5136
5137 /* Clear the events from the VMCS. */
5138 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
5139 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
5140 }
5141#ifdef VBOX_STRICT
5142 /*
5143 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
5144 * fatal), we don't care about verifying duplicate injection of events. Errors like
5145 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
5146 * function so those should and will be checked below.
5147 */
5148 else if (RT_SUCCESS(rcExit))
5149 {
5150 /*
5151 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
5152 * This can be pretty hard to debug otherwise, interrupts might get injected twice
5153 * occasionally, see @bugref{9180#c42}.
5154 *
5155 * However, if the VM-entry failed, any VM entry-interruption info. field would
5156 * be left unmodified as the event would not have been injected to the guest. In
5157 * such cases, don't assert, we're not going to continue guest execution anyway.
5158 */
5159 uint32_t uExitReason;
5160 uint32_t uEntryIntInfo;
5161 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
5162 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
5163 AssertRC(rc);
5164 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
5165 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
5166 }
5167#endif
5168
5169 /*
5170 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
5171 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
5172 * (e.g. TPR below threshold).
5173 */
5174 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5175 {
5176 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
5177 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
5178 }
5179
5180 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
5181 and if we're injecting an event we should have a TRPM trap pending. */
5182 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5183#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
5184 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5185#endif
5186
5187 /* Save guest state and restore host state bits. */
5188 int rc = hmR0VmxLeaveSession(pVCpu);
5189 AssertRCReturn(rc, rc);
5190 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5191
5192 /* Thread-context hooks are unregistered at this point!!! */
5193 /* Ring-3 callback notifications are unregistered at this point!!! */
5194
5195 /* Sync recompiler state. */
5196 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
5197 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
5198 | CPUM_CHANGED_LDTR
5199 | CPUM_CHANGED_GDTR
5200 | CPUM_CHANGED_IDTR
5201 | CPUM_CHANGED_TR
5202 | CPUM_CHANGED_HIDDEN_SEL_REGS);
5203 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
5204 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
5205 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5206
5207 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
5208
5209 /* Update the exit-to-ring 3 reason. */
5210 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
5211
5212 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
5213 if ( rcExit != VINF_EM_RAW_INTERRUPT
5214 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5215 {
5216 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
5217 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5218 }
5219
5220 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
5221 VMMRZCallRing3Enable(pVCpu);
5222 return rc;
5223}
5224
5225
5226/**
5227 * VMMRZCallRing3() callback wrapper which saves the guest state before we
5228 * longjump due to a ring-0 assertion.
5229 *
5230 * @returns VBox status code.
5231 * @param pVCpu The cross context virtual CPU structure.
5232 */
5233VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
5234{
5235 /*
5236 * !!! IMPORTANT !!!
5237 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
5238 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
5239 */
5240 VMMR0AssertionRemoveNotification(pVCpu);
5241 VMMRZCallRing3Disable(pVCpu);
5242 HM_DISABLE_PREEMPT(pVCpu);
5243
5244 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5245 vmxHCImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
5246 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
5247 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
5248
5249 /* Restore host-state bits that VT-x only restores partially. */
5250 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
5251 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
5252 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
5253
5254 /* Restore the lazy host MSRs as we're leaving VT-x context. */
5255 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
5256 hmR0VmxLazyRestoreHostMsrs(pVCpu);
5257
5258 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5259 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5260 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5261
5262 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
5263 cleared as part of importing the guest state above. */
5264 hmR0VmxClearVmcs(pVmcsInfo);
5265
5266 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
5267 VMMR0ThreadCtxHookDisable(pVCpu);
5268
5269 /* Leave HM context. This takes care of local init (term). */
5270 HMR0LeaveCpu(pVCpu);
5271 HM_RESTORE_PREEMPT();
5272 return VINF_SUCCESS;
5273}
5274
5275
5276/**
5277 * Enters the VT-x session.
5278 *
5279 * @returns VBox status code.
5280 * @param pVCpu The cross context virtual CPU structure.
5281 */
5282VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
5283{
5284 AssertPtr(pVCpu);
5285 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
5286 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5287
5288 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5289 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5290 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5291
5292#ifdef VBOX_STRICT
5293 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
5294 RTCCUINTREG uHostCr4 = ASMGetCR4();
5295 if (!(uHostCr4 & X86_CR4_VMXE))
5296 {
5297 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
5298 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5299 }
5300#endif
5301
5302 /*
5303 * Do the EMT scheduled L1D and MDS flush here if needed.
5304 */
5305 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5306 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5307 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5308 hmR0MdsClear();
5309
5310 /*
5311 * Load the appropriate VMCS as the current and active one.
5312 */
5313 PVMXVMCSINFO pVmcsInfo;
5314 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5315 if (!fInNestedGuestMode)
5316 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5317 else
5318 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5319 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5320 if (RT_SUCCESS(rc))
5321 {
5322 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5323 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5324 pVCpu->hmr0.s.fLeaveDone = false;
5325 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5326 }
5327 return rc;
5328}
5329
5330
5331/**
5332 * The thread-context callback.
5333 *
5334 * This is used together with RTThreadCtxHookCreate() on platforms which
5335 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5336 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5337 *
5338 * @param enmEvent The thread-context event.
5339 * @param pVCpu The cross context virtual CPU structure.
5340 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5341 * @thread EMT(pVCpu)
5342 */
5343VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5344{
5345 AssertPtr(pVCpu);
5346 RT_NOREF1(fGlobalInit);
5347
5348 switch (enmEvent)
5349 {
5350 case RTTHREADCTXEVENT_OUT:
5351 {
5352 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5353 VMCPU_ASSERT_EMT(pVCpu);
5354
5355 /* No longjmps (logger flushes, locks) in this fragile context. */
5356 VMMRZCallRing3Disable(pVCpu);
5357 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5358
5359 /* Restore host-state (FPU, debug etc.) */
5360 if (!pVCpu->hmr0.s.fLeaveDone)
5361 {
5362 /*
5363 * Do -not- import the guest-state here as we might already be in the middle of importing
5364 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
5365 */
5366 hmR0VmxLeave(pVCpu, false /* fImportState */);
5367 pVCpu->hmr0.s.fLeaveDone = true;
5368 }
5369
5370 /* Leave HM context, takes care of local init (term). */
5371 int rc = HMR0LeaveCpu(pVCpu);
5372 AssertRC(rc);
5373
5374 /* Restore longjmp state. */
5375 VMMRZCallRing3Enable(pVCpu);
5376 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5377 break;
5378 }
5379
5380 case RTTHREADCTXEVENT_IN:
5381 {
5382 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5383 VMCPU_ASSERT_EMT(pVCpu);
5384
5385 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5386 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5387 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5388 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5389 hmR0MdsClear();
5390
5391 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5392 VMMRZCallRing3Disable(pVCpu);
5393 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5394
5395 /* Initialize the bare minimum state required for HM. This takes care of
5396 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5397 int rc = hmR0EnterCpu(pVCpu);
5398 AssertRC(rc);
5399 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5400 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5401
5402 /* Load the active VMCS as the current one. */
5403 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5404 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5405 AssertRC(rc);
5406 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5407 pVCpu->hmr0.s.fLeaveDone = false;
5408
5409 /* Restore longjmp state. */
5410 VMMRZCallRing3Enable(pVCpu);
5411 break;
5412 }
5413
5414 default:
5415 break;
5416 }
5417}
5418
5419
5420/**
5421 * Exports the host state into the VMCS host-state area.
5422 * Sets up the VM-exit MSR-load area.
5423 *
5424 * The CPU state will be loaded from these fields on every successful VM-exit.
5425 *
5426 * @returns VBox status code.
5427 * @param pVCpu The cross context virtual CPU structure.
5428 *
5429 * @remarks No-long-jump zone!!!
5430 */
5431static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5432{
5433 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5434
5435 int rc = VINF_SUCCESS;
5436 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5437 {
5438 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5439
5440 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5441 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5442
5443 hmR0VmxExportHostMsrs(pVCpu);
5444
5445 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5446 }
5447 return rc;
5448}
5449
5450
5451/**
5452 * Saves the host state in the VMCS host-state.
5453 *
5454 * @returns VBox status code.
5455 * @param pVCpu The cross context virtual CPU structure.
5456 *
5457 * @remarks No-long-jump zone!!!
5458 */
5459VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5460{
5461 AssertPtr(pVCpu);
5462 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5463
5464 /*
5465 * Export the host state here while entering HM context.
5466 * When thread-context hooks are used, we might get preempted and have to re-save the host
5467 * state but most of the time we won't be, so do it here before we disable interrupts.
5468 */
5469 return hmR0VmxExportHostState(pVCpu);
5470}
5471
5472
5473/**
5474 * Exports the guest state into the VMCS guest-state area.
5475 *
5476 * The will typically be done before VM-entry when the guest-CPU state and the
5477 * VMCS state may potentially be out of sync.
5478 *
5479 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5480 * VM-entry controls.
5481 * Sets up the appropriate VMX non-root function to execute guest code based on
5482 * the guest CPU mode.
5483 *
5484 * @returns VBox strict status code.
5485 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5486 * without unrestricted guest execution and the VMMDev is not presently
5487 * mapped (e.g. EFI32).
5488 *
5489 * @param pVCpu The cross context virtual CPU structure.
5490 * @param pVmxTransient The VMX-transient structure.
5491 *
5492 * @remarks No-long-jump zone!!!
5493 */
5494static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5495{
5496 AssertPtr(pVCpu);
5497 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5498 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5499
5500 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5501
5502 /*
5503 * Determine real-on-v86 mode.
5504 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5505 */
5506 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5507 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5508 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5509 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5510 else
5511 {
5512 Assert(!pVmxTransient->fIsNestedGuest);
5513 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5514 }
5515
5516 /*
5517 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5518 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5519 */
5520 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5521 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5522
5523 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5524 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5525
5526 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5527 if (rcStrict == VINF_SUCCESS)
5528 { /* likely */ }
5529 else
5530 {
5531 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5532 return rcStrict;
5533 }
5534
5535 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5536 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5537
5538 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5539 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5540
5541 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5542 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5543 vmxHCExportGuestRip(pVCpu);
5544 hmR0VmxExportGuestRsp(pVCpu);
5545 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5546
5547 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5548 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5549
5550 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5551 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5552 | HM_CHANGED_GUEST_CR2
5553 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5554 | HM_CHANGED_GUEST_X87
5555 | HM_CHANGED_GUEST_SSE_AVX
5556 | HM_CHANGED_GUEST_OTHER_XSAVE
5557 | HM_CHANGED_GUEST_XCRx
5558 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5559 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5560 | HM_CHANGED_GUEST_TSC_AUX
5561 | HM_CHANGED_GUEST_OTHER_MSRS
5562 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5563
5564 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5565 return rc;
5566}
5567
5568
5569/**
5570 * Exports the state shared between the host and guest into the VMCS.
5571 *
5572 * @param pVCpu The cross context virtual CPU structure.
5573 * @param pVmxTransient The VMX-transient structure.
5574 *
5575 * @remarks No-long-jump zone!!!
5576 */
5577static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5578{
5579 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5580 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5581
5582 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5583 {
5584 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5585 AssertRC(rc);
5586 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5587
5588 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5589 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5590 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5591 }
5592
5593 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5594 {
5595 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5596 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5597 }
5598
5599 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5600 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5601}
5602
5603
5604/**
5605 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5606 *
5607 * @returns Strict VBox status code (i.e. informational status codes too).
5608 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5609 * without unrestricted guest execution and the VMMDev is not presently
5610 * mapped (e.g. EFI32).
5611 *
5612 * @param pVCpu The cross context virtual CPU structure.
5613 * @param pVmxTransient The VMX-transient structure.
5614 *
5615 * @remarks No-long-jump zone!!!
5616 */
5617static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5618{
5619 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5620 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5621
5622#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5623 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5624#endif
5625
5626 /*
5627 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5628 * changes. First try to export only these without going through all other changed-flag checks.
5629 */
5630 VBOXSTRICTRC rcStrict;
5631 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5632 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5633 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5634
5635 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5636 if ( (fCtxChanged & fMinimalMask)
5637 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5638 {
5639 vmxHCExportGuestRip(pVCpu);
5640 hmR0VmxExportGuestRsp(pVCpu);
5641 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5642 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5643 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5644 }
5645 /* If anything else also changed, go through the full export routine and export as required. */
5646 else if (fCtxChanged & fCtxMask)
5647 {
5648 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5649 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5650 { /* likely */}
5651 else
5652 {
5653 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5654 VBOXSTRICTRC_VAL(rcStrict)));
5655 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5656 return rcStrict;
5657 }
5658 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5659 }
5660 /* Nothing changed, nothing to load here. */
5661 else
5662 rcStrict = VINF_SUCCESS;
5663
5664#ifdef VBOX_STRICT
5665 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5666 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5667 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5668#endif
5669 return rcStrict;
5670}
5671
5672
5673/**
5674 * Map the APIC-access page for virtualizing APIC accesses.
5675 *
5676 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5677 * this not done as part of exporting guest state, see @bugref{8721}.
5678 *
5679 * @returns VBox status code.
5680 * @param pVCpu The cross context virtual CPU structure.
5681 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5682 */
5683static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5684{
5685 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5686 Assert(GCPhysApicBase);
5687
5688 LogFunc(("Mappping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5689
5690 /* Unalias the existing mapping. */
5691 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5692 AssertRCReturn(rc, rc);
5693
5694 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5695 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5696 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5697 AssertRCReturn(rc, rc);
5698
5699 return VINF_SUCCESS;
5700}
5701
5702
5703/**
5704 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5705 * CPU.
5706 *
5707 * @param idCpu The ID for the CPU the function is called on.
5708 * @param pvUser1 Null, not used.
5709 * @param pvUser2 Null, not used.
5710 */
5711static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5712{
5713 RT_NOREF3(idCpu, pvUser1, pvUser2);
5714 VMXDispatchHostNmi();
5715}
5716
5717
5718/**
5719 * Dispatching an NMI on the host CPU that received it.
5720 *
5721 * @returns VBox status code.
5722 * @param pVCpu The cross context virtual CPU structure.
5723 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5724 * executing when receiving the host NMI in VMX non-root
5725 * operation.
5726 */
5727static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5728{
5729 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5730 Assert(idCpu != NIL_RTCPUID);
5731
5732 /*
5733 * We don't want to delay dispatching the NMI any more than we have to. However,
5734 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5735 * after executing guest or nested-guest code for the following reasons:
5736 *
5737 * - We would need to perform VMREADs with interrupts disabled and is orders of
5738 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5739 * supported by the host hypervisor.
5740 *
5741 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5742 * longer period of time just for handling an edge case like host NMIs which do
5743 * not occur nearly as frequently as other VM-exits.
5744 *
5745 * Let's cover the most likely scenario first. Check if we are on the target CPU
5746 * and dispatch the NMI right away. This should be much faster than calling into
5747 * RTMpOnSpecific() machinery.
5748 */
5749 bool fDispatched = false;
5750 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5751 if (idCpu == RTMpCpuId())
5752 {
5753 VMXDispatchHostNmi();
5754 fDispatched = true;
5755 }
5756 ASMSetFlags(fEFlags);
5757 if (fDispatched)
5758 {
5759 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5760 return VINF_SUCCESS;
5761 }
5762
5763 /*
5764 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5765 * there should be no race or recursion even if we are unlucky enough to be preempted
5766 * (to the target CPU) without dispatching the host NMI above.
5767 */
5768 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5769 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5770}
5771
5772
5773#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5774/**
5775 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5776 * nested-guest using hardware-assisted VMX.
5777 *
5778 * @param pVCpu The cross context virtual CPU structure.
5779 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5780 * @param pVmcsInfoGst The guest VMCS info. object.
5781 */
5782static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5783{
5784 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5785 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5786 Assert(pu64MsrBitmap);
5787
5788 /*
5789 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5790 * MSR that is intercepted by the guest is also intercepted while executing the
5791 * nested-guest using hardware-assisted VMX.
5792 *
5793 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5794 * nested-guest VM-exit even if the outer guest is not intercepting some
5795 * MSRs. We cannot assume the caller has initialized the nested-guest
5796 * MSR bitmap in this case.
5797 *
5798 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5799 * each of its VM-entry, hence initializing it once per-VM while setting
5800 * up the nested-guest VMCS is not sufficient.
5801 */
5802 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5803 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5804 {
5805 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5806 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5807 Assert(pu64MsrBitmapNstGst);
5808 Assert(pu64MsrBitmapGst);
5809
5810 /** @todo Detect and use EVEX.POR? */
5811 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5812 for (uint32_t i = 0; i < cFrags; i++)
5813 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5814 }
5815 else
5816 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5817}
5818
5819
5820/**
5821 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5822 * hardware-assisted VMX execution of the nested-guest.
5823 *
5824 * For a guest, we don't modify these controls once we set up the VMCS and hence
5825 * this function is never called.
5826 *
5827 * For nested-guests since the nested hypervisor provides these controls on every
5828 * nested-guest VM-entry and could potentially change them everytime we need to
5829 * merge them before every nested-guest VM-entry.
5830 *
5831 * @returns VBox status code.
5832 * @param pVCpu The cross context virtual CPU structure.
5833 */
5834static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5835{
5836 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5837 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5838 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5839
5840 /*
5841 * Merge the controls with the requirements of the guest VMCS.
5842 *
5843 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5844 * VMCS with the features supported by the physical CPU as it's already done by the
5845 * VMLAUNCH/VMRESUME instruction emulation.
5846 *
5847 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5848 * derived from the VMX features supported by the physical CPU.
5849 */
5850
5851 /* Pin-based VM-execution controls. */
5852 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5853
5854 /* Processor-based VM-execution controls. */
5855 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5856 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5857 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5858 | VMX_PROC_CTLS_MOV_DR_EXIT
5859 | VMX_PROC_CTLS_USE_TPR_SHADOW
5860 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5861
5862 /* Secondary processor-based VM-execution controls. */
5863 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5864 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5865 | VMX_PROC_CTLS2_INVPCID
5866 | VMX_PROC_CTLS2_VMCS_SHADOWING
5867 | VMX_PROC_CTLS2_RDTSCP
5868 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5869 | VMX_PROC_CTLS2_APIC_REG_VIRT
5870 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5871 | VMX_PROC_CTLS2_VMFUNC));
5872
5873 /*
5874 * VM-entry controls:
5875 * These controls contains state that depends on the nested-guest state (primarily
5876 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5877 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5878 * properly continue executing the nested-guest if the EFER MSR changes but does not
5879 * cause a nested-guest VM-exits.
5880 *
5881 * VM-exit controls:
5882 * These controls specify the host state on return. We cannot use the controls from
5883 * the nested hypervisor state as is as it would contain the guest state rather than
5884 * the host state. Since the host state is subject to change (e.g. preemption, trips
5885 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5886 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5887 *
5888 * VM-entry MSR-load:
5889 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5890 * context by the VMLAUNCH/VMRESUME instruction emulation.
5891 *
5892 * VM-exit MSR-store:
5893 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5894 * back into the VM-exit MSR-store area.
5895 *
5896 * VM-exit MSR-load areas:
5897 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5898 * can entirely ignore what the nested hypervisor wants to load here.
5899 */
5900
5901 /*
5902 * Exception bitmap.
5903 *
5904 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5905 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5906 * code more flexible if intercepting exceptions become more dynamic in the future we do
5907 * it as part of exporting the nested-guest state.
5908 */
5909 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5910
5911 /*
5912 * CR0/CR4 guest/host mask.
5913 *
5914 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5915 * cause VM-exits, so we need to merge them here.
5916 */
5917 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5918 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5919
5920 /*
5921 * Page-fault error-code mask and match.
5922 *
5923 * Although we require unrestricted guest execution (and thereby nested-paging) for
5924 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5925 * normally intercept #PFs, it might intercept them for debugging purposes.
5926 *
5927 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5928 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5929 */
5930 uint32_t u32XcptPFMask;
5931 uint32_t u32XcptPFMatch;
5932 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5933 {
5934 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5935 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5936 }
5937 else
5938 {
5939 u32XcptPFMask = 0;
5940 u32XcptPFMatch = 0;
5941 }
5942
5943 /*
5944 * Pause-Loop exiting.
5945 */
5946 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5947 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5948 * this will work... */
5949 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5950 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5951
5952 /*
5953 * Pending debug exceptions.
5954 * Currently just copy whatever the nested-guest provides us.
5955 */
5956 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5957
5958 /*
5959 * I/O Bitmap.
5960 *
5961 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5962 * intercept all I/O port accesses.
5963 */
5964 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5965 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5966
5967 /*
5968 * VMCS shadowing.
5969 *
5970 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5971 * enabled while executing the nested-guest.
5972 */
5973 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5974
5975 /*
5976 * APIC-access page.
5977 */
5978 RTHCPHYS HCPhysApicAccess;
5979 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5980 {
5981 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5982 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5983
5984 /** @todo NSTVMX: This is not really correct but currently is required to make
5985 * things work. We need to re-enable the page handler when we fallback to
5986 * IEM execution of the nested-guest! */
5987 PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
5988
5989 void *pvPage;
5990 PGMPAGEMAPLOCK PgLockApicAccess;
5991 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5992 if (RT_SUCCESS(rc))
5993 {
5994 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5995 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5996
5997 /** @todo Handle proper releasing of page-mapping lock later. */
5998 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5999 }
6000 else
6001 return rc;
6002 }
6003 else
6004 HCPhysApicAccess = 0;
6005
6006 /*
6007 * Virtual-APIC page and TPR threshold.
6008 */
6009 RTHCPHYS HCPhysVirtApic;
6010 uint32_t u32TprThreshold;
6011 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6012 {
6013 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
6014 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
6015
6016 void *pvPage;
6017 PGMPAGEMAPLOCK PgLockVirtApic;
6018 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
6019 if (RT_SUCCESS(rc))
6020 {
6021 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
6022 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
6023
6024 /** @todo Handle proper releasing of page-mapping lock later. */
6025 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
6026 }
6027 else
6028 return rc;
6029
6030 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
6031 }
6032 else
6033 {
6034 HCPhysVirtApic = 0;
6035 u32TprThreshold = 0;
6036
6037 /*
6038 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
6039 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
6040 * be taken care of by EPT/shadow paging.
6041 */
6042 if (pVM->hmr0.s.fAllow64BitGuests)
6043 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
6044 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
6045 }
6046
6047 /*
6048 * Validate basic assumptions.
6049 */
6050 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
6051 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
6052 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
6053 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
6054
6055 /*
6056 * Commit it to the nested-guest VMCS.
6057 */
6058 int rc = VINF_SUCCESS;
6059 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
6060 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
6061 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
6062 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
6063 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
6064 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
6065 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
6066 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
6067 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
6068 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
6069 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
6070 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
6071 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
6072 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
6073 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
6074 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
6075 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
6076 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
6077 {
6078 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
6079 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
6080 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
6081 }
6082 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6083 {
6084 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
6085 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
6086 }
6087 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6088 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
6089 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
6090 AssertRC(rc);
6091
6092 /*
6093 * Update the nested-guest VMCS cache.
6094 */
6095 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
6096 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
6097 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
6098 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
6099 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
6100 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
6101 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
6102 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
6103 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
6104
6105 /*
6106 * We need to flush the TLB if we are switching the APIC-access page address.
6107 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
6108 */
6109 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6110 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
6111
6112 /*
6113 * MSR bitmap.
6114 *
6115 * The MSR bitmap address has already been initialized while setting up the nested-guest
6116 * VMCS, here we need to merge the MSR bitmaps.
6117 */
6118 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
6119 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
6120
6121 return VINF_SUCCESS;
6122}
6123#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6124
6125
6126/**
6127 * Does the preparations before executing guest code in VT-x.
6128 *
6129 * This may cause longjmps to ring-3 and may even result in rescheduling to the
6130 * recompiler/IEM. We must be cautious what we do here regarding committing
6131 * guest-state information into the VMCS assuming we assuredly execute the
6132 * guest in VT-x mode.
6133 *
6134 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
6135 * the common-state (TRPM/forceflags), we must undo those changes so that the
6136 * recompiler/IEM can (and should) use them when it resumes guest execution.
6137 * Otherwise such operations must be done when we can no longer exit to ring-3.
6138 *
6139 * @returns Strict VBox status code (i.e. informational status codes too).
6140 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
6141 * have been disabled.
6142 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
6143 * pending events).
6144 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
6145 * double-fault into the guest.
6146 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
6147 * dispatched directly.
6148 * @retval VINF_* scheduling changes, we have to go back to ring-3.
6149 *
6150 * @param pVCpu The cross context virtual CPU structure.
6151 * @param pVmxTransient The VMX-transient structure.
6152 * @param fStepping Whether we are single-stepping the guest in the
6153 * hypervisor debugger. Makes us ignore some of the reasons
6154 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
6155 * if event dispatching took place.
6156 */
6157static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
6158{
6159 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6160
6161 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
6162
6163#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
6164 if (pVmxTransient->fIsNestedGuest)
6165 {
6166 RT_NOREF2(pVCpu, fStepping);
6167 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
6168 return VINF_EM_RESCHEDULE_REM;
6169 }
6170#endif
6171
6172 /*
6173 * Check and process force flag actions, some of which might require us to go back to ring-3.
6174 */
6175 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
6176 if (rcStrict == VINF_SUCCESS)
6177 {
6178 /* FFs don't get set all the time. */
6179#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6180 if ( pVmxTransient->fIsNestedGuest
6181 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6182 {
6183 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6184 return VINF_VMX_VMEXIT;
6185 }
6186#endif
6187 }
6188 else
6189 return rcStrict;
6190
6191 /*
6192 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
6193 */
6194 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6195 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
6196 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6197 && PDMHasApic(pVM))
6198 {
6199 /* Get the APIC base MSR from the virtual APIC device. */
6200 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
6201
6202 /* Map the APIC access page. */
6203 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
6204 AssertRCReturn(rc, rc);
6205
6206 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
6207 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
6208 }
6209
6210#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6211 /*
6212 * Merge guest VMCS controls with the nested-guest VMCS controls.
6213 *
6214 * Even if we have not executed the guest prior to this (e.g. when resuming from a
6215 * saved state), we should be okay with merging controls as we initialize the
6216 * guest VMCS controls as part of VM setup phase.
6217 */
6218 if ( pVmxTransient->fIsNestedGuest
6219 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
6220 {
6221 int rc = hmR0VmxMergeVmcsNested(pVCpu);
6222 AssertRCReturn(rc, rc);
6223 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
6224 }
6225#endif
6226
6227 /*
6228 * Evaluate events to be injected into the guest.
6229 *
6230 * Events in TRPM can be injected without inspecting the guest state.
6231 * If any new events (interrupts/NMI) are pending currently, we try to set up the
6232 * guest to cause a VM-exit the next time they are ready to receive the event.
6233 */
6234 if (TRPMHasTrap(pVCpu))
6235 vmxHCTrpmTrapToPendingEvent(pVCpu);
6236
6237 uint32_t fIntrState;
6238 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6239 &fIntrState);
6240
6241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6242 /*
6243 * While evaluating pending events if something failed (unlikely) or if we were
6244 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
6245 */
6246 if (rcStrict != VINF_SUCCESS)
6247 return rcStrict;
6248 if ( pVmxTransient->fIsNestedGuest
6249 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6250 {
6251 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6252 return VINF_VMX_VMEXIT;
6253 }
6254#else
6255 Assert(rcStrict == VINF_SUCCESS);
6256#endif
6257
6258 /*
6259 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
6260 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
6261 * also result in triple-faulting the VM.
6262 *
6263 * With nested-guests, the above does not apply since unrestricted guest execution is a
6264 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
6265 */
6266 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6267 fIntrState, fStepping);
6268 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6269 { /* likely */ }
6270 else
6271 {
6272 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
6273 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
6274 return rcStrict;
6275 }
6276
6277 /*
6278 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
6279 * import CR3 themselves. We will need to update them here, as even as late as the above
6280 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
6281 * the below force flags to be set.
6282 */
6283 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6284 {
6285 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
6286 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
6287 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6288 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6289 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6290 }
6291
6292#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6293 /* Paranoia. */
6294 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6295#endif
6296
6297 /*
6298 * No longjmps to ring-3 from this point on!!!
6299 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
6300 * This also disables flushing of the R0-logger instance (if any).
6301 */
6302 VMMRZCallRing3Disable(pVCpu);
6303
6304 /*
6305 * Export the guest state bits.
6306 *
6307 * We cannot perform longjmps while loading the guest state because we do not preserve the
6308 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
6309 * CPU migration.
6310 *
6311 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6312 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6313 */
6314 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6315 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6316 { /* likely */ }
6317 else
6318 {
6319 VMMRZCallRing3Enable(pVCpu);
6320 return rcStrict;
6321 }
6322
6323 /*
6324 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6325 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6326 * preemption disabled for a while. Since this is purely to aid the
6327 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6328 * disable interrupt on NT.
6329 *
6330 * We need to check for force-flags that could've possible been altered since we last
6331 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6332 * see @bugref{6398}).
6333 *
6334 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6335 * to ring-3 before executing guest code.
6336 */
6337 pVmxTransient->fEFlags = ASMIntDisableFlags();
6338
6339 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6340 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6341 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6342 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6343 {
6344 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6345 {
6346#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6347 /*
6348 * If we are executing a nested-guest make sure that we should intercept subsequent
6349 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6350 * the VM-exit instruction emulation happy.
6351 */
6352 if (pVmxTransient->fIsNestedGuest)
6353 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6354#endif
6355
6356 /*
6357 * We've injected any pending events. This is really the point of no return (to ring-3).
6358 *
6359 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6360 * returns from this function, so do -not- enable them here.
6361 */
6362 pVCpu->hm.s.Event.fPending = false;
6363 return VINF_SUCCESS;
6364 }
6365
6366 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6367 rcStrict = VINF_EM_RAW_INTERRUPT;
6368 }
6369 else
6370 {
6371 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6372 rcStrict = VINF_EM_RAW_TO_R3;
6373 }
6374
6375 ASMSetFlags(pVmxTransient->fEFlags);
6376 VMMRZCallRing3Enable(pVCpu);
6377
6378 return rcStrict;
6379}
6380
6381
6382/**
6383 * Final preparations before executing guest code using hardware-assisted VMX.
6384 *
6385 * We can no longer get preempted to a different host CPU and there are no returns
6386 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6387 * failures), this function is not intended to fail sans unrecoverable hardware
6388 * errors.
6389 *
6390 * @param pVCpu The cross context virtual CPU structure.
6391 * @param pVmxTransient The VMX-transient structure.
6392 *
6393 * @remarks Called with preemption disabled.
6394 * @remarks No-long-jump zone!!!
6395 */
6396static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6397{
6398 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6399 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6400 Assert(!pVCpu->hm.s.Event.fPending);
6401
6402 /*
6403 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6404 */
6405 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6406 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6407
6408 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6409 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6410 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6411 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6412
6413 if (!CPUMIsGuestFPUStateActive(pVCpu))
6414 {
6415 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6416 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6417 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6418 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6419 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6420 }
6421
6422 /*
6423 * Re-export the host state bits as we may've been preempted (only happens when
6424 * thread-context hooks are used or when the VM start function changes) or if
6425 * the host CR0 is modified while loading the guest FPU state above.
6426 *
6427 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6428 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6429 * see @bugref{8432}.
6430 *
6431 * This may also happen when switching to/from a nested-guest VMCS without leaving
6432 * ring-0.
6433 */
6434 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6435 {
6436 hmR0VmxExportHostState(pVCpu);
6437 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6438 }
6439 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6440
6441 /*
6442 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6443 */
6444 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6445 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6446 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6447
6448 /*
6449 * Store status of the shared guest/host debug state at the time of VM-entry.
6450 */
6451 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6452 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6453
6454 /*
6455 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6456 * more than one conditional check. The post-run side of our code shall determine
6457 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6458 */
6459 if (pVmcsInfo->pbVirtApic)
6460 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6461
6462 /*
6463 * Update the host MSRs values in the VM-exit MSR-load area.
6464 */
6465 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6466 {
6467 if (pVmcsInfo->cExitMsrLoad > 0)
6468 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6469 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6470 }
6471
6472 /*
6473 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6474 * VMX-preemption timer based on the next virtual sync clock deadline.
6475 */
6476 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6477 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6478 {
6479 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6480 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6481 }
6482
6483 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6484 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6485 if (!fIsRdtscIntercepted)
6486 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6487 else
6488 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6489
6490 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6491 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6492 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6493 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6494 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6495 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6496
6497 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6498
6499 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6500 as we're about to start executing the guest. */
6501
6502 /*
6503 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6504 *
6505 * This is done this late as updating the TSC offsetting/preemption timer above
6506 * figures out if we can skip intercepting RDTSCP by calculating the number of
6507 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6508 */
6509 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6510 && !fIsRdtscIntercepted)
6511 {
6512 vmxHCImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6513
6514 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6515 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6516 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6517 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6518 AssertRC(rc);
6519 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6520 pVmxTransient->fRemoveTscAuxMsr = true;
6521 }
6522
6523#ifdef VBOX_STRICT
6524 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6525 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6526 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6527 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6528#endif
6529
6530#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6531 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6532 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6533 * see @bugref{9180#c54}. */
6534 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6535 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6536 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6537#endif
6538}
6539
6540
6541/**
6542 * First C routine invoked after running guest code using hardware-assisted VMX.
6543 *
6544 * @param pVCpu The cross context virtual CPU structure.
6545 * @param pVmxTransient The VMX-transient structure.
6546 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6547 *
6548 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6549 *
6550 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6551 * unconditionally when it is safe to do so.
6552 */
6553static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6554{
6555 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6556 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6557 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6558 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6559 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6560 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6561
6562 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6563 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6564 {
6565 uint64_t uGstTsc;
6566 if (!pVmxTransient->fIsNestedGuest)
6567 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6568 else
6569 {
6570 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6571 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6572 }
6573 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6574 }
6575
6576 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6577 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6578 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6579
6580 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6581 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6582#ifdef VBOX_STRICT
6583 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6584#endif
6585 Assert(!ASMIntAreEnabled());
6586 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6587 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6588
6589#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6590 /*
6591 * Clean all the VMCS fields in the transient structure before reading
6592 * anything from the VMCS.
6593 */
6594 pVmxTransient->uExitReason = 0;
6595 pVmxTransient->uExitIntErrorCode = 0;
6596 pVmxTransient->uExitQual = 0;
6597 pVmxTransient->uGuestLinearAddr = 0;
6598 pVmxTransient->uExitIntInfo = 0;
6599 pVmxTransient->cbExitInstr = 0;
6600 pVmxTransient->ExitInstrInfo.u = 0;
6601 pVmxTransient->uEntryIntInfo = 0;
6602 pVmxTransient->uEntryXcptErrorCode = 0;
6603 pVmxTransient->cbEntryInstr = 0;
6604 pVmxTransient->uIdtVectoringInfo = 0;
6605 pVmxTransient->uIdtVectoringErrorCode = 0;
6606#endif
6607
6608 /*
6609 * Save the basic VM-exit reason and check if the VM-entry failed.
6610 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6611 */
6612 uint32_t uExitReason;
6613 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6614 AssertRC(rc);
6615 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6616 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6617
6618 /*
6619 * Log the VM-exit before logging anything else as otherwise it might be a
6620 * tad confusing what happens before and after the world-switch.
6621 */
6622 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6623
6624 /*
6625 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6626 * bitmap permissions, if it was added before VM-entry.
6627 */
6628 if (pVmxTransient->fRemoveTscAuxMsr)
6629 {
6630 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6631 pVmxTransient->fRemoveTscAuxMsr = false;
6632 }
6633
6634 /*
6635 * Check if VMLAUNCH/VMRESUME succeeded.
6636 * If this failed, we cause a guru meditation and cease further execution.
6637 */
6638 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6639 {
6640 /*
6641 * Update the VM-exit history array here even if the VM-entry failed due to:
6642 * - Invalid guest state.
6643 * - MSR loading.
6644 * - Machine-check event.
6645 *
6646 * In any of the above cases we will still have a "valid" VM-exit reason
6647 * despite @a fVMEntryFailed being false.
6648 *
6649 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6650 *
6651 * Note! We don't have CS or RIP at this point. Will probably address that later
6652 * by amending the history entry added here.
6653 */
6654 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6655 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6656
6657 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6658 {
6659 VMMRZCallRing3Enable(pVCpu);
6660 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6661
6662#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6663 hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
6664#endif
6665
6666 /*
6667 * Always import the guest-interruptibility state as we need it while evaluating
6668 * injecting events on re-entry.
6669 *
6670 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6671 * checking for real-mode while exporting the state because all bits that cause
6672 * mode changes wrt CR0 are intercepted.
6673 */
6674 uint64_t const fImportMask = CPUMCTX_EXTRN_INHIBIT_INT
6675 | CPUMCTX_EXTRN_INHIBIT_NMI
6676#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6677 | HMVMX_CPUMCTX_EXTRN_ALL
6678#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6679 | CPUMCTX_EXTRN_RFLAGS
6680#endif
6681 ;
6682 rc = vmxHCImportGuestState(pVCpu, pVmcsInfo, fImportMask);
6683 AssertRC(rc);
6684
6685 /*
6686 * Sync the TPR shadow with our APIC state.
6687 */
6688 if ( !pVmxTransient->fIsNestedGuest
6689 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6690 {
6691 Assert(pVmcsInfo->pbVirtApic);
6692 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6693 {
6694 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6695 AssertRC(rc);
6696 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6697 }
6698 }
6699
6700 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6701 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6702 || pVmxTransient->fWasHyperDebugStateActive == false);
6703 return;
6704 }
6705 }
6706#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6707 else if (pVmxTransient->fIsNestedGuest)
6708 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6709#endif
6710 else
6711 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6712
6713 VMMRZCallRing3Enable(pVCpu);
6714}
6715
6716
6717/**
6718 * Runs the guest code using hardware-assisted VMX the normal way.
6719 *
6720 * @returns VBox status code.
6721 * @param pVCpu The cross context virtual CPU structure.
6722 * @param pcLoops Pointer to the number of executed loops.
6723 */
6724static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6725{
6726 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6727 Assert(pcLoops);
6728 Assert(*pcLoops <= cMaxResumeLoops);
6729 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6730
6731#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6732 /*
6733 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6734 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6735 * guest VMCS while entering the VMX ring-0 session.
6736 */
6737 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6738 {
6739 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6740 if (RT_SUCCESS(rc))
6741 { /* likely */ }
6742 else
6743 {
6744 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6745 return rc;
6746 }
6747 }
6748#endif
6749
6750 VMXTRANSIENT VmxTransient;
6751 RT_ZERO(VmxTransient);
6752 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6753
6754 /* Paranoia. */
6755 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6756
6757 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6758 for (;;)
6759 {
6760 Assert(!HMR0SuspendPending());
6761 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6762 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6763
6764 /*
6765 * Preparatory work for running nested-guest code, this may force us to
6766 * return to ring-3.
6767 *
6768 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6769 */
6770 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6771 if (rcStrict != VINF_SUCCESS)
6772 break;
6773
6774 /* Interrupts are disabled at this point! */
6775 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6776 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6777 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6778 /* Interrupts are re-enabled at this point! */
6779
6780 /*
6781 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6782 */
6783 if (RT_SUCCESS(rcRun))
6784 { /* very likely */ }
6785 else
6786 {
6787 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6788 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6789 return rcRun;
6790 }
6791
6792 /*
6793 * Profile the VM-exit.
6794 */
6795 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6796 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6797 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6798 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6799 HMVMX_START_EXIT_DISPATCH_PROF();
6800
6801 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6802
6803 /*
6804 * Handle the VM-exit.
6805 */
6806#ifdef HMVMX_USE_FUNCTION_TABLE
6807 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6808#else
6809 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6810#endif
6811 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6812 if (rcStrict == VINF_SUCCESS)
6813 {
6814 if (++(*pcLoops) <= cMaxResumeLoops)
6815 continue;
6816 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6817 rcStrict = VINF_EM_RAW_INTERRUPT;
6818 }
6819 break;
6820 }
6821
6822 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6823 return rcStrict;
6824}
6825
6826
6827#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6828/**
6829 * Runs the nested-guest code using hardware-assisted VMX.
6830 *
6831 * @returns VBox status code.
6832 * @param pVCpu The cross context virtual CPU structure.
6833 * @param pcLoops Pointer to the number of executed loops.
6834 *
6835 * @sa hmR0VmxRunGuestCodeNormal.
6836 */
6837static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6838{
6839 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6840 Assert(pcLoops);
6841 Assert(*pcLoops <= cMaxResumeLoops);
6842 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6843
6844 /*
6845 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6846 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6847 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6848 */
6849 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6850 {
6851 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6852 if (RT_SUCCESS(rc))
6853 { /* likely */ }
6854 else
6855 {
6856 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6857 return rc;
6858 }
6859 }
6860
6861 VMXTRANSIENT VmxTransient;
6862 RT_ZERO(VmxTransient);
6863 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6864 VmxTransient.fIsNestedGuest = true;
6865
6866 /* Paranoia. */
6867 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6868
6869 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info. on demand in ring-0. */
6870 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6871
6872 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6873 for (;;)
6874 {
6875 Assert(!HMR0SuspendPending());
6876 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6877 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6878
6879 /*
6880 * Preparatory work for running guest code, this may force us to
6881 * return to ring-3.
6882 *
6883 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6884 */
6885 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6886 if (rcStrict != VINF_SUCCESS)
6887 break;
6888
6889 /* Interrupts are disabled at this point! */
6890 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6891 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6892 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6893 /* Interrupts are re-enabled at this point! */
6894
6895 /*
6896 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6897 */
6898 if (RT_SUCCESS(rcRun))
6899 { /* very likely */ }
6900 else
6901 {
6902 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6903 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6904 rcStrict = rcRun;
6905 break;
6906 }
6907
6908 /*
6909 * Undo temporary disabling of the APIC-access page monitoring we did in hmR0VmxMergeVmcsNested.
6910 * This is needed for NestedTrap0eHandler (and IEM) to cause nested-guest APIC-access VM-exits.
6911 */
6912 if (VmxTransient.pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6913 {
6914 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
6915 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
6916 PGMHandlerPhysicalReset(pVCpu->CTX_SUFF(pVM), GCPhysApicAccess);
6917 }
6918
6919 /*
6920 * Profile the VM-exit.
6921 */
6922 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6923 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6924 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6925 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6926 HMVMX_START_EXIT_DISPATCH_PROF();
6927
6928 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6929
6930 /*
6931 * Handle the VM-exit.
6932 */
6933 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6934 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6935 if (rcStrict == VINF_SUCCESS)
6936 {
6937 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6938 {
6939 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6940 rcStrict = VINF_VMX_VMEXIT;
6941 }
6942 else
6943 {
6944 if (++(*pcLoops) <= cMaxResumeLoops)
6945 continue;
6946 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6947 rcStrict = VINF_EM_RAW_INTERRUPT;
6948 }
6949 }
6950 else
6951 Assert(rcStrict != VINF_VMX_VMEXIT);
6952 break;
6953 }
6954
6955 /* Ensure VM-exit auxiliary info. is no longer available. */
6956 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6957
6958 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6959 return rcStrict;
6960}
6961#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6962
6963
6964/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6965 * probes.
6966 *
6967 * The following few functions and associated structure contains the bloat
6968 * necessary for providing detailed debug events and dtrace probes as well as
6969 * reliable host side single stepping. This works on the principle of
6970 * "subclassing" the normal execution loop and workers. We replace the loop
6971 * method completely and override selected helpers to add necessary adjustments
6972 * to their core operation.
6973 *
6974 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6975 * any performance for debug and analysis features.
6976 *
6977 * @{
6978 */
6979
6980/**
6981 * Single steps guest code using hardware-assisted VMX.
6982 *
6983 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6984 * but single-stepping through the hypervisor debugger.
6985 *
6986 * @returns Strict VBox status code (i.e. informational status codes too).
6987 * @param pVCpu The cross context virtual CPU structure.
6988 * @param pcLoops Pointer to the number of executed loops.
6989 *
6990 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6991 */
6992static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6993{
6994 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6995 Assert(pcLoops);
6996 Assert(*pcLoops <= cMaxResumeLoops);
6997
6998 VMXTRANSIENT VmxTransient;
6999 RT_ZERO(VmxTransient);
7000 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
7001
7002 /* Set HMCPU indicators. */
7003 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
7004 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
7005 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7006 pVCpu->hmr0.s.fUsingDebugLoop = true;
7007
7008 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
7009 VMXRUNDBGSTATE DbgState;
7010 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
7011 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7012
7013 /*
7014 * The loop.
7015 */
7016 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
7017 for (;;)
7018 {
7019 Assert(!HMR0SuspendPending());
7020 HMVMX_ASSERT_CPU_SAFE(pVCpu);
7021 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7022 bool fStepping = pVCpu->hm.s.fSingleInstruction;
7023
7024 /* Set up VM-execution controls the next two can respond to. */
7025 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7026
7027 /*
7028 * Preparatory work for running guest code, this may force us to
7029 * return to ring-3.
7030 *
7031 * Warning! This bugger disables interrupts on VINF_SUCCESS!
7032 */
7033 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
7034 if (rcStrict != VINF_SUCCESS)
7035 break;
7036
7037 /* Interrupts are disabled at this point! */
7038 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
7039
7040 /* Override any obnoxious code in the above two calls. */
7041 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7042
7043 /*
7044 * Finally execute the guest.
7045 */
7046 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
7047
7048 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
7049 /* Interrupts are re-enabled at this point! */
7050
7051 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7052 if (RT_SUCCESS(rcRun))
7053 { /* very likely */ }
7054 else
7055 {
7056 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
7057 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
7058 return rcRun;
7059 }
7060
7061 /* Profile the VM-exit. */
7062 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7063 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
7064 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7065 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
7066 HMVMX_START_EXIT_DISPATCH_PROF();
7067
7068 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
7069
7070 /*
7071 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
7072 */
7073 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
7074 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
7075 if (rcStrict != VINF_SUCCESS)
7076 break;
7077 if (++(*pcLoops) > cMaxResumeLoops)
7078 {
7079 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
7080 rcStrict = VINF_EM_RAW_INTERRUPT;
7081 break;
7082 }
7083
7084 /*
7085 * Stepping: Did the RIP change, if so, consider it a single step.
7086 * Otherwise, make sure one of the TFs gets set.
7087 */
7088 if (fStepping)
7089 {
7090 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
7091 AssertRC(rc);
7092 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
7093 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
7094 {
7095 rcStrict = VINF_EM_DBG_STEPPED;
7096 break;
7097 }
7098 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
7099 }
7100
7101 /*
7102 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
7103 */
7104 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
7105 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7106
7107 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
7108 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
7109 Assert(rcStrict == VINF_SUCCESS);
7110 }
7111
7112 /*
7113 * Clear the X86_EFL_TF if necessary.
7114 */
7115 if (pVCpu->hmr0.s.fClearTrapFlag)
7116 {
7117 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
7118 AssertRC(rc);
7119 pVCpu->hmr0.s.fClearTrapFlag = false;
7120 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
7121 }
7122 /** @todo there seems to be issues with the resume flag when the monitor trap
7123 * flag is pending without being used. Seen early in bios init when
7124 * accessing APIC page in protected mode. */
7125
7126/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
7127 * out of the above loop. */
7128
7129 /* Restore HMCPU indicators. */
7130 pVCpu->hmr0.s.fUsingDebugLoop = false;
7131 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7132 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
7133
7134 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7135 return rcStrict;
7136}
7137
7138/** @} */
7139
7140
7141/**
7142 * Checks if any expensive dtrace probes are enabled and we should go to the
7143 * debug loop.
7144 *
7145 * @returns true if we should use debug loop, false if not.
7146 */
7147static bool hmR0VmxAnyExpensiveProbesEnabled(void)
7148{
7149 /* It's probably faster to OR the raw 32-bit counter variables together.
7150 Since the variables are in an array and the probes are next to one
7151 another (more or less), we have good locality. So, better read
7152 eight-nine cache lines ever time and only have one conditional, than
7153 128+ conditionals, right? */
7154 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
7155 | VBOXVMM_XCPT_DE_ENABLED_RAW()
7156 | VBOXVMM_XCPT_DB_ENABLED_RAW()
7157 | VBOXVMM_XCPT_BP_ENABLED_RAW()
7158 | VBOXVMM_XCPT_OF_ENABLED_RAW()
7159 | VBOXVMM_XCPT_BR_ENABLED_RAW()
7160 | VBOXVMM_XCPT_UD_ENABLED_RAW()
7161 | VBOXVMM_XCPT_NM_ENABLED_RAW()
7162 | VBOXVMM_XCPT_DF_ENABLED_RAW()
7163 | VBOXVMM_XCPT_TS_ENABLED_RAW()
7164 | VBOXVMM_XCPT_NP_ENABLED_RAW()
7165 | VBOXVMM_XCPT_SS_ENABLED_RAW()
7166 | VBOXVMM_XCPT_GP_ENABLED_RAW()
7167 | VBOXVMM_XCPT_PF_ENABLED_RAW()
7168 | VBOXVMM_XCPT_MF_ENABLED_RAW()
7169 | VBOXVMM_XCPT_AC_ENABLED_RAW()
7170 | VBOXVMM_XCPT_XF_ENABLED_RAW()
7171 | VBOXVMM_XCPT_VE_ENABLED_RAW()
7172 | VBOXVMM_XCPT_SX_ENABLED_RAW()
7173 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
7174 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
7175 ) != 0
7176 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
7177 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
7178 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
7179 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
7180 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
7181 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
7182 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
7183 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
7184 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
7185 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
7186 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
7187 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
7188 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
7189 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
7190 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
7191 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
7192 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
7193 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
7194 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
7195 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
7196 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
7197 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
7198 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
7199 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
7200 | VBOXVMM_INSTR_STR_ENABLED_RAW()
7201 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
7202 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
7203 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
7204 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
7205 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
7206 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
7207 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
7208 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
7209 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
7210 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
7211 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
7212 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
7213 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
7214 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
7215 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
7216 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
7217 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
7218 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
7219 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
7220 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
7221 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
7222 ) != 0
7223 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
7224 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
7225 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
7226 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
7227 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
7228 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
7229 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
7230 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
7231 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
7232 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
7233 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
7234 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
7235 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
7236 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
7237 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
7238 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
7239 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
7240 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
7241 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
7242 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
7243 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
7244 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
7245 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
7246 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
7247 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
7248 | VBOXVMM_EXIT_STR_ENABLED_RAW()
7249 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
7250 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
7251 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
7252 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
7253 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
7254 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
7255 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
7256 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
7257 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
7258 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
7259 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
7260 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
7261 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
7262 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
7263 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
7264 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
7265 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
7266 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
7267 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
7268 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
7269 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
7270 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
7271 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
7272 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
7273 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
7274 ) != 0;
7275}
7276
7277
7278/**
7279 * Runs the guest using hardware-assisted VMX.
7280 *
7281 * @returns Strict VBox status code (i.e. informational status codes too).
7282 * @param pVCpu The cross context virtual CPU structure.
7283 */
7284VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
7285{
7286 AssertPtr(pVCpu);
7287 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7288 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7289 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7290 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
7291
7292 VBOXSTRICTRC rcStrict;
7293 uint32_t cLoops = 0;
7294 for (;;)
7295 {
7296#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7297 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
7298#else
7299 NOREF(pCtx);
7300 bool const fInNestedGuestMode = false;
7301#endif
7302 if (!fInNestedGuestMode)
7303 {
7304 if ( !pVCpu->hm.s.fUseDebugLoop
7305 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
7306 && !DBGFIsStepping(pVCpu)
7307 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
7308 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
7309 else
7310 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
7311 }
7312#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7313 else
7314 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7315
7316 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7317 {
7318 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7319 continue;
7320 }
7321 if (rcStrict == VINF_VMX_VMEXIT)
7322 {
7323 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7324 continue;
7325 }
7326#endif
7327 break;
7328 }
7329
7330 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7331 switch (rcLoop)
7332 {
7333 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7334 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7335 }
7336
7337 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7338 if (RT_FAILURE(rc2))
7339 {
7340 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7341 rcStrict = rc2;
7342 }
7343 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7344 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7345 return rcStrict;
7346}
7347
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette