VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 100735

Last change on this file since 100735 was 100734, checked in by vboxsync, 22 months ago

VMM/IEM: Generate TBs for invalid instruction encodings as well. This involved special casing recompiler call generation for C instruction implementation function that doesn't take any extra arguments, so that we can catch all the deeply hidden IEMOP_RAISE_INVALID_OPCODE_RET invocations and similar. Also had to clean up hacky decoding of effective address related opcode bytes for undefined opcodes, introducing IEM_OPCODE_SKIP_RM_EFF_ADDR_BYTES(a_bRm) to hide the uglyness. bugref:10369

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 68.8 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 100734 2023-07-29 02:04:22Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) :
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) :
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/x86.h>
86
87#ifndef TST_IEM_CHECK_MC
88# include "IEMInline.h"
89# include "IEMOpHlp.h"
90# include "IEMMc.h"
91#endif
92
93#include "IEMThreadedFunctions.h"
94
95
96/*
97 * Narrow down configs here to avoid wasting time on unused configs here.
98 */
99
100#ifndef IEM_WITH_CODE_TLB
101# error The code TLB must be enabled for the recompiler.
102#endif
103
104#ifndef IEM_WITH_DATA_TLB
105# error The data TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_SETJMP
109# error The setjmp approach must be enabled for the recompiler.
110#endif
111
112
113/*********************************************************************************************************************************
114* Internal Functions *
115*********************************************************************************************************************************/
116static bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb);
117static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/*********************************************************************************************************************************
121* Defined Constants And Macros *
122*********************************************************************************************************************************/
123#define g_apfnOneByteMap g_apfnIemThreadedRecompilerOneByteMap
124
125
126/*
127 * Override IEM_MC_CALC_RM_EFF_ADDR to use iemOpHlpCalcRmEffAddrJmpEx and produce uEffAddrInfo.
128 */
129#undef IEM_MC_CALC_RM_EFF_ADDR
130#ifndef IEM_WITH_SETJMP
131# define IEM_MC_CALC_RM_EFF_ADDR(a_GCPtrEff, a_bRm, a_cbImmAndRspOffset) \
132 uint64_t uEffAddrInfo; \
133 IEM_MC_RETURN_ON_FAILURE(iemOpHlpCalcRmEffAddrJmpEx(pVCpu, (a_bRm), (a_cbImmAndRspOffset), &(a_GCPtrEff), &uEffAddrInfo))
134#else
135# define IEM_MC_CALC_RM_EFF_ADDR(a_GCPtrEff, a_bRm, a_cbImmAndRspOffset) \
136 uint64_t uEffAddrInfo; \
137 ((a_GCPtrEff) = iemOpHlpCalcRmEffAddrJmpEx(pVCpu, (a_bRm), (a_cbImmAndRspOffset), &uEffAddrInfo))
138#endif
139
140/*
141 * Likewise override IEM_OPCODE_SKIP_RM_EFF_ADDR_BYTES so we fetch all the opcodes.
142 */
143#undef IEM_OPCODE_SKIP_RM_EFF_ADDR_BYTES
144#define IEM_OPCODE_SKIP_RM_EFF_ADDR_BYTES(a_bRm) do { \
145 uint64_t uEffAddrInfo; \
146 (void)iemOpHlpCalcRmEffAddrJmpEx(pVCpu, bRm, 0, &uEffAddrInfo); \
147 } while (0)
148
149/*
150 * Override the IEM_MC_REL_JMP_S*_AND_FINISH macros to check for zero byte jumps.
151 */
152#undef IEM_MC_REL_JMP_S8_AND_FINISH
153#define IEM_MC_REL_JMP_S8_AND_FINISH(a_i8) do { \
154 Assert(pVCpu->iem.s.fTbBranched != 0); \
155 if ((a_i8) == 0) \
156 pVCpu->iem.s.fTbBranched |= IEMBRANCHED_F_ZERO; \
157 return iemRegRipRelativeJumpS8AndFinishClearingRF(pVCpu, IEM_GET_INSTR_LEN(pVCpu), (a_i8), pVCpu->iem.s.enmEffOpSize); \
158 } while (0)
159
160#undef IEM_MC_REL_JMP_S16_AND_FINISH
161#define IEM_MC_REL_JMP_S16_AND_FINISH(a_i16) do { \
162 Assert(pVCpu->iem.s.fTbBranched != 0); \
163 if ((a_i16) == 0) \
164 pVCpu->iem.s.fTbBranched |= IEMBRANCHED_F_ZERO; \
165 return iemRegRipRelativeJumpS16AndFinishClearingRF(pVCpu, IEM_GET_INSTR_LEN(pVCpu), (a_i16)); \
166 } while (0)
167
168#undef IEM_MC_REL_JMP_S32_AND_FINISH
169#define IEM_MC_REL_JMP_S32_AND_FINISH(a_i32) do { \
170 Assert(pVCpu->iem.s.fTbBranched != 0); \
171 if ((a_i32) == 0) \
172 pVCpu->iem.s.fTbBranched |= IEMBRANCHED_F_ZERO; \
173 return iemRegRipRelativeJumpS32AndFinishClearingRF(pVCpu, IEM_GET_INSTR_LEN(pVCpu), (a_i32), pVCpu->iem.s.enmEffOpSize); \
174 } while (0)
175
176
177/*
178 * Emit call macros.
179 */
180#define IEM_MC2_BEGIN_EMIT_CALLS() \
181 { \
182 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3; \
183 uint8_t const cbInstrMc2 = IEM_GET_INSTR_LEN(pVCpu); \
184 AssertMsg(pVCpu->iem.s.offOpcode == cbInstrMc2, \
185 ("%u vs %u (%04x:%08RX64)\n", pVCpu->iem.s.offOpcode, cbInstrMc2, \
186 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip)); \
187 \
188 /* No page crossing, right? */ \
189 uint16_t const offOpcodeMc2 = pTb->cbOpcodes; \
190 uint8_t const idxRangeMc2 = pTb->cRanges - 1; \
191 if ( !pVCpu->iem.s.fTbCrossedPage \
192 && !pVCpu->iem.s.fTbCheckOpcodes \
193 && !pVCpu->iem.s.fTbBranched \
194 && !(pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)) \
195 { \
196 /** @todo Custom copy function, given range is 1 thru 15 bytes. */ \
197 memcpy(&pTb->pabOpcodes[offOpcodeMc2], pVCpu->iem.s.abOpcode, pVCpu->iem.s.offOpcode); \
198 pTb->cbOpcodes = offOpcodeMc2 + pVCpu->iem.s.offOpcode; \
199 pTb->aRanges[idxRangeMc2].cbOpcodes += cbInstrMc2; \
200 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated); \
201 } \
202 else if (iemThreadedCompileBeginEmitCallsComplications(pVCpu, pTb)) \
203 { /* likely */ } \
204 else \
205 return VINF_IEM_RECOMPILE_END_TB; \
206 \
207 do { } while (0)
208#define IEM_MC2_EMIT_CALL_0(a_enmFunction) do { \
209 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \
210 \
211 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \
212 pCall->enmFunction = a_enmFunction; \
213 pCall->offOpcode = offOpcodeMc2; \
214 pCall->cbOpcode = cbInstrMc2; \
215 pCall->idxRange = idxRangeMc2; \
216 pCall->auParams[0] = 0; \
217 pCall->auParams[1] = 0; \
218 pCall->auParams[2] = 0; \
219 } while (0)
220#define IEM_MC2_EMIT_CALL_1(a_enmFunction, a_uArg0) do { \
221 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \
222 uint64_t const uArg0Check = (a_uArg0); RT_NOREF(uArg0Check); \
223 \
224 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \
225 pCall->enmFunction = a_enmFunction; \
226 pCall->offOpcode = offOpcodeMc2; \
227 pCall->cbOpcode = cbInstrMc2; \
228 pCall->idxRange = idxRangeMc2; \
229 pCall->auParams[0] = a_uArg0; \
230 pCall->auParams[1] = 0; \
231 pCall->auParams[2] = 0; \
232 } while (0)
233#define IEM_MC2_EMIT_CALL_2(a_enmFunction, a_uArg0, a_uArg1) do { \
234 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \
235 uint64_t const uArg0Check = (a_uArg0); RT_NOREF(uArg0Check); \
236 uint64_t const uArg1Check = (a_uArg1); RT_NOREF(uArg1Check); \
237 \
238 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \
239 pCall->enmFunction = a_enmFunction; \
240 pCall->offOpcode = offOpcodeMc2; \
241 pCall->cbOpcode = cbInstrMc2; \
242 pCall->idxRange = idxRangeMc2; \
243 pCall->auParams[0] = a_uArg0; \
244 pCall->auParams[1] = a_uArg1; \
245 pCall->auParams[2] = 0; \
246 } while (0)
247#define IEM_MC2_EMIT_CALL_3(a_enmFunction, a_uArg0, a_uArg1, a_uArg2) do { \
248 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \
249 uint64_t const uArg0Check = (a_uArg0); RT_NOREF(uArg0Check); \
250 uint64_t const uArg1Check = (a_uArg1); RT_NOREF(uArg1Check); \
251 uint64_t const uArg2Check = (a_uArg2); RT_NOREF(uArg2Check); \
252 \
253 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \
254 pCall->enmFunction = a_enmFunction; \
255 pCall->offOpcode = offOpcodeMc2; \
256 pCall->cbOpcode = cbInstrMc2; \
257 pCall->idxRange = idxRangeMc2; \
258 pCall->auParams[0] = a_uArg0; \
259 pCall->auParams[1] = a_uArg1; \
260 pCall->auParams[2] = a_uArg2; \
261 } while (0)
262#define IEM_MC2_END_EMIT_CALLS(a_fCImplFlags) \
263 Assert(pTb->cInstructions <= pTb->Thrd.cCalls); \
264 if (pTb->cInstructions < 255) \
265 pTb->cInstructions++; \
266 uint32_t const fCImplFlagsMc2 = (a_fCImplFlags); \
267 RT_NOREF(fCImplFlagsMc2); \
268 } while (0)
269
270
271/*
272 * IEM_MC_DEFER_TO_CIMPL_0 is easily wrapped up.
273 *
274 * Doing so will also take care of IEMOP_RAISE_DIVIDE_ERROR, IEMOP_RAISE_INVALID_LOCK_PREFIX,
275 * IEMOP_RAISE_INVALID_OPCODE and their users.
276 */
277#undef IEM_MC_DEFER_TO_CIMPL_0_RET
278#define IEM_MC_DEFER_TO_CIMPL_0_RET(a_fFlags, a_pfnCImpl) \
279 return iemThreadedRecompilerMcDeferToCImpl0(pVCpu, a_fFlags, a_pfnCImpl)
280
281DECLINLINE(VBOXSTRICTRC) iemThreadedRecompilerMcDeferToCImpl0(PVMCPUCC pVCpu, uint32_t fFlags, PFNIEMCIMPL0 pfnCImpl)
282{
283 Log8(("CImpl0: %04x:%08RX64 LB %#x: %#x %p\n",
284 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, IEM_GET_INSTR_LEN(pVCpu), fFlags, pfnCImpl));
285
286 IEM_MC2_BEGIN_EMIT_CALLS();
287 IEM_MC2_EMIT_CALL_2(kIemThreadedFunc_DeferToCImpl0, (uintptr_t)pfnCImpl, IEM_GET_INSTR_LEN(pVCpu));
288 IEM_MC2_END_EMIT_CALLS(fFlags);
289
290 /* We have to repeat work normally done by kdCImplFlags and
291 ThreadedFunctionVariation.emitThreadedCallStmts here. */
292 if (fFlags & (IEM_CIMPL_F_END_TB | IEM_CIMPL_F_MODE | IEM_CIMPL_F_VMEXIT | IEM_CIMPL_F_BRANCH_FAR | IEM_CIMPL_F_REP))
293 pVCpu->iem.s.fEndTb = true;
294
295 AssertCompile(IEM_CIMPL_F_BRANCH_DIRECT == IEMBRANCHED_F_DIRECT);
296 AssertCompile(IEM_CIMPL_F_BRANCH_INDIRECT == IEMBRANCHED_F_INDIRECT);
297 AssertCompile(IEM_CIMPL_F_BRANCH_RELATIVE == IEMBRANCHED_F_RELATIVE);
298 AssertCompile(IEM_CIMPL_F_BRANCH_CONDITIONAL == IEMBRANCHED_F_CONDITIONAL);
299 AssertCompile(IEM_CIMPL_F_BRANCH_FAR == IEMBRANCHED_F_FAR);
300 if (fFlags & IEM_CIMPL_F_BRANCH_ANY)
301 pVCpu->iem.s.fTbBranched = fFlags & (IEM_CIMPL_F_BRANCH_ANY | IEM_CIMPL_F_BRANCH_FAR | IEM_CIMPL_F_BRANCH_CONDITIONAL);
302
303 return pfnCImpl(pVCpu, IEM_GET_INSTR_LEN(pVCpu));
304}
305
306/**
307 * Calculates the effective address of a ModR/M memory operand, extended version
308 * for use in the recompilers.
309 *
310 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
311 *
312 * May longjmp on internal error.
313 *
314 * @return The effective address.
315 * @param pVCpu The cross context virtual CPU structure of the calling thread.
316 * @param bRm The ModRM byte.
317 * @param cbImmAndRspOffset - First byte: The size of any immediate
318 * following the effective address opcode bytes
319 * (only for RIP relative addressing).
320 * - Second byte: RSP displacement (for POP [ESP]).
321 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
322 * SIB byte (bits 39:32).
323 *
324 * @note This must be defined in a source file with matching
325 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
326 * or implemented differently...
327 */
328RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
329{
330 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
331# define SET_SS_DEF() \
332 do \
333 { \
334 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
335 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
336 } while (0)
337
338 if (!IEM_IS_64BIT_CODE(pVCpu))
339 {
340/** @todo Check the effective address size crap! */
341 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
342 {
343 uint16_t u16EffAddr;
344
345 /* Handle the disp16 form with no registers first. */
346 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
347 {
348 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
349 *puInfo = u16EffAddr;
350 }
351 else
352 {
353 /* Get the displacment. */
354 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
355 {
356 case 0: u16EffAddr = 0; break;
357 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
358 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
359 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
360 }
361 *puInfo = u16EffAddr;
362
363 /* Add the base and index registers to the disp. */
364 switch (bRm & X86_MODRM_RM_MASK)
365 {
366 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
367 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
368 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
369 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
370 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
371 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
372 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
373 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
374 }
375 }
376
377 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
378 return u16EffAddr;
379 }
380
381 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
382 uint32_t u32EffAddr;
383 uint64_t uInfo;
384
385 /* Handle the disp32 form with no registers first. */
386 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
387 {
388 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
389 uInfo = u32EffAddr;
390 }
391 else
392 {
393 /* Get the register (or SIB) value. */
394 uInfo = 0;
395 switch ((bRm & X86_MODRM_RM_MASK))
396 {
397 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
398 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
399 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
400 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
401 case 4: /* SIB */
402 {
403 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
404 uInfo = (uint64_t)bSib << 32;
405
406 /* Get the index and scale it. */
407 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
408 {
409 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
410 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
411 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
412 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
413 case 4: u32EffAddr = 0; /*none */ break;
414 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
415 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
416 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
417 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
418 }
419 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
420
421 /* add base */
422 switch (bSib & X86_SIB_BASE_MASK)
423 {
424 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
425 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
426 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
427 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
428 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
429 case 5:
430 if ((bRm & X86_MODRM_MOD_MASK) != 0)
431 {
432 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
433 SET_SS_DEF();
434 }
435 else
436 {
437 uint32_t u32Disp;
438 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
439 u32EffAddr += u32Disp;
440 uInfo |= u32Disp;
441 }
442 break;
443 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
444 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
445 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
446 }
447 break;
448 }
449 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
450 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
451 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
452 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
453 }
454
455 /* Get and add the displacement. */
456 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
457 {
458 case 0:
459 break;
460 case 1:
461 {
462 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
463 u32EffAddr += i8Disp;
464 uInfo |= (uint32_t)(int32_t)i8Disp;
465 break;
466 }
467 case 2:
468 {
469 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
470 u32EffAddr += u32Disp;
471 uInfo |= u32Disp;
472 break;
473 }
474 default:
475 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
476 }
477 }
478
479 *puInfo = uInfo;
480 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
481 return u32EffAddr;
482 }
483
484 uint64_t u64EffAddr;
485 uint64_t uInfo;
486
487 /* Handle the rip+disp32 form with no registers first. */
488 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
489 {
490 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
491 uInfo = (uint32_t)u64EffAddr;
492 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
493 }
494 else
495 {
496 /* Get the register (or SIB) value. */
497 uInfo = 0;
498 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
499 {
500 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
501 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
502 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
503 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
504 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
505 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
506 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
507 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
508 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
509 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
510 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
511 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
512 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
513 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
514 /* SIB */
515 case 4:
516 case 12:
517 {
518 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
519 uInfo = (uint64_t)bSib << 32;
520
521 /* Get the index and scale it. */
522 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
523 {
524 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
525 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
526 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
527 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
528 case 4: u64EffAddr = 0; /*none */ break;
529 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
530 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
531 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
532 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
533 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
534 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
535 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
536 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
537 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
538 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
539 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
540 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
541 }
542 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
543
544 /* add base */
545 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
546 {
547 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
548 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
549 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
550 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
551 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
552 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
553 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
554 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
555 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
556 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
557 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
558 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
559 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
560 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
561 /* complicated encodings */
562 case 5:
563 case 13:
564 if ((bRm & X86_MODRM_MOD_MASK) != 0)
565 {
566 if (!pVCpu->iem.s.uRexB)
567 {
568 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
569 SET_SS_DEF();
570 }
571 else
572 u64EffAddr += pVCpu->cpum.GstCtx.r13;
573 }
574 else
575 {
576 uint32_t u32Disp;
577 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
578 u64EffAddr += (int32_t)u32Disp;
579 uInfo |= u32Disp;
580 }
581 break;
582 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
583 }
584 break;
585 }
586 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
587 }
588
589 /* Get and add the displacement. */
590 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
591 {
592 case 0:
593 break;
594 case 1:
595 {
596 int8_t i8Disp;
597 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
598 u64EffAddr += i8Disp;
599 uInfo |= (uint32_t)(int32_t)i8Disp;
600 break;
601 }
602 case 2:
603 {
604 uint32_t u32Disp;
605 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
606 u64EffAddr += (int32_t)u32Disp;
607 uInfo |= u32Disp;
608 break;
609 }
610 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
611 }
612
613 }
614
615 *puInfo = uInfo;
616 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
617 {
618 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
619 return u64EffAddr;
620 }
621 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
622 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
623 return u64EffAddr & UINT32_MAX;
624}
625
626
627/**
628 * Helper for indicating that we've branched.
629 */
630DECL_FORCE_INLINE(void) iemThreadedSetBranched(PVMCPUCC pVCpu, uint8_t fTbBranched)
631{
632 pVCpu->iem.s.fTbBranched = fTbBranched;
633 pVCpu->iem.s.GCPhysTbBranchSrcBuf = pVCpu->iem.s.GCPhysInstrBuf;
634 pVCpu->iem.s.GCVirtTbBranchSrcBuf = pVCpu->iem.s.uInstrBufPc;
635}
636
637
638/*
639 * Include the "annotated" IEMAllInst*.cpp.h files.
640 */
641#include "IEMThreadedInstructions.cpp.h"
642
643/*
644 * Translation block management.
645 */
646typedef struct IEMTBCACHE
647{
648 uint32_t cHash;
649 uint32_t uHashMask;
650 PIEMTB apHash[_64K];
651} IEMTBCACHE;
652
653static IEMTBCACHE g_TbCache = { _64K, 0xffff, }; /**< Quick and dirty. */
654
655#define IEMTBCACHE_HASH(a_paCache, a_fTbFlags, a_GCPhysPc) \
656 ( ((uint32_t)(a_GCPhysPc) ^ (a_fTbFlags)) & (a_paCache)->uHashMask)
657
658
659/**
660 * Allocate a translation block for threadeded recompilation.
661 *
662 * @returns Pointer to the translation block on success, NULL on failure.
663 * @param pVM The cross context virtual machine structure.
664 * @param pVCpu The cross context virtual CPU structure of the calling
665 * thread.
666 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
667 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
668 */
669static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
670{
671 /*
672 * Just using the heap for now. Will make this more efficient and
673 * complicated later, don't worry. :-)
674 */
675 PIEMTB pTb = (PIEMTB)RTMemAlloc(sizeof(IEMTB));
676 if (pTb)
677 {
678 unsigned const cCalls = 128;
679 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
680 if (pTb->Thrd.paCalls)
681 {
682 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16); /* This will be reallocated later. */
683 if (pTb->pabOpcodes)
684 {
685 pTb->Thrd.cAllocated = cCalls;
686 pTb->cbOpcodesAllocated = cCalls * 16;
687 pTb->Thrd.cCalls = 0;
688 pTb->cbOpcodes = 0;
689 pTb->pNext = NULL;
690 RTListInit(&pTb->LocalList);
691 pTb->GCPhysPc = GCPhysPc;
692 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
693 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
694 pTb->cInstructions = 0;
695
696 /* Init the first opcode range. */
697 pTb->cRanges = 1;
698 pTb->aRanges[0].cbOpcodes = 0;
699 pTb->aRanges[0].offOpcodes = 0;
700 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
701 pTb->aRanges[0].u2Unused = 0;
702 pTb->aRanges[0].idxPhysPage = 0;
703 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
704 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
705
706 pVCpu->iem.s.cTbAllocs++;
707 return pTb;
708 }
709 RTMemFree(pTb->Thrd.paCalls);
710 }
711 RTMemFree(pTb);
712 }
713 RT_NOREF(pVM);
714 return NULL;
715}
716
717
718/**
719 * Frees pTb.
720 *
721 * @param pVM The cross context virtual machine structure.
722 * @param pVCpu The cross context virtual CPU structure of the calling
723 * thread.
724 * @param pTb The translation block to free..
725 */
726static void iemThreadedTbFree(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
727{
728 RT_NOREF(pVM);
729 AssertPtr(pTb);
730
731 AssertCompile(IEMTB_F_STATE_OBSOLETE == IEMTB_F_STATE_MASK);
732 pTb->fFlags |= IEMTB_F_STATE_OBSOLETE; /* works, both bits set */
733
734 /* Unlink it from the hash table: */
735 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
736 PIEMTB pTbCur = g_TbCache.apHash[idxHash];
737 if (pTbCur == pTb)
738 g_TbCache.apHash[idxHash] = pTb->pNext;
739 else
740 while (pTbCur)
741 {
742 PIEMTB const pNextTb = pTbCur->pNext;
743 if (pNextTb == pTb)
744 {
745 pTbCur->pNext = pTb->pNext;
746 break;
747 }
748 pTbCur = pNextTb;
749 }
750
751 /* Free it. */
752 RTMemFree(pTb->Thrd.paCalls);
753 pTb->Thrd.paCalls = NULL;
754
755 RTMemFree(pTb->pabOpcodes);
756 pTb->pabOpcodes = NULL;
757
758 RTMemFree(pTb);
759 pVCpu->iem.s.cTbFrees++;
760}
761
762
763/**
764 * Called by opcode verifier functions when they detect a problem.
765 */
766void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb)
767{
768 iemThreadedTbFree(pVCpu->CTX_SUFF(pVM), pVCpu, pTb);
769}
770
771
772static PIEMTB iemThreadedTbLookup(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
773{
774 uint32_t const fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags | IEMTB_F_STATE_READY;
775 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, fFlags, GCPhysPc);
776 Log10(("TB lookup: idxHash=%#x fFlags=%#x GCPhysPc=%RGp\n", idxHash, fFlags, GCPhysPc));
777 PIEMTB pTb = g_TbCache.apHash[idxHash];
778 while (pTb)
779 {
780 if (pTb->GCPhysPc == GCPhysPc)
781 {
782 if (pTb->fFlags == fFlags)
783 {
784 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
785 {
786#ifdef VBOX_WITH_STATISTICS
787 pVCpu->iem.s.cTbLookupHits++;
788#endif
789 return pTb;
790 }
791 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
792 }
793 else
794 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
795 }
796 else
797 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
798
799 pTb = pTb->pNext;
800 }
801 RT_NOREF(pVM);
802 pVCpu->iem.s.cTbLookupMisses++;
803 return pTb;
804}
805
806
807static void iemThreadedTbAdd(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
808{
809 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
810 pTb->pNext = g_TbCache.apHash[idxHash];
811 g_TbCache.apHash[idxHash] = pTb;
812 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
813 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
814 if (LogIs12Enabled())
815 {
816 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
817 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, idxHash, pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
818 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
819 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
820 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
821 pTb->aRanges[idxRange].idxPhysPage == 0
822 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
823 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
824 }
825 RT_NOREF(pVM);
826}
827
828
829/*
830 * Real code.
831 */
832
833#ifdef LOG_ENABLED
834/**
835 * Logs the current instruction.
836 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
837 * @param pszFunction The IEM function doing the execution.
838 */
839static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction) RT_NOEXCEPT
840{
841# ifdef IN_RING3
842 if (LogIs2Enabled())
843 {
844 char szInstr[256];
845 uint32_t cbInstr = 0;
846 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
847 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
848 szInstr, sizeof(szInstr), &cbInstr);
849
850 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
851 Log2(("**** %s fExec=%x pTb=%p\n"
852 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
853 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
854 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
855 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
856 " %s\n"
857 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3,
858 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
859 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
860 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
861 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
862 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
863 szInstr));
864
865 if (LogIs3Enabled())
866 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
867 }
868 else
869# endif
870 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
871 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
872}
873#endif /* LOG_ENABLED */
874
875
876static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
877{
878 RT_NOREF(pVM, pVCpu);
879 return rcStrict;
880}
881
882
883/**
884 * Initializes the decoder state when compiling TBs.
885 *
886 * This presumes that fExec has already be initialized.
887 *
888 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
889 * to apply fixes to them as well.
890 *
891 * @param pVCpu The cross context virtual CPU structure of the calling
892 * thread.
893 * @param fReInit Clear for the first call for a TB, set for subsequent calls
894 * from inside the compile loop where we can skip a couple of
895 * things.
896 */
897DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit)
898{
899 /* ASSUMES: That iemInitExec was already called and that anyone changing
900 CPU state affecting the fExec bits since then will have updated fExec! */
901 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
902 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
903
904 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
905
906 /* Decoder state: */
907 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
908 pVCpu->iem.s.enmEffAddrMode = enmMode;
909 if (enmMode != IEMMODE_64BIT)
910 {
911 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
912 pVCpu->iem.s.enmEffOpSize = enmMode;
913 }
914 else
915 {
916 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
917 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
918 }
919 pVCpu->iem.s.fPrefixes = 0;
920 pVCpu->iem.s.uRexReg = 0;
921 pVCpu->iem.s.uRexB = 0;
922 pVCpu->iem.s.uRexIndex = 0;
923 pVCpu->iem.s.idxPrefix = 0;
924 pVCpu->iem.s.uVex3rdReg = 0;
925 pVCpu->iem.s.uVexLength = 0;
926 pVCpu->iem.s.fEvexStuff = 0;
927 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
928 pVCpu->iem.s.offModRm = 0;
929 pVCpu->iem.s.iNextMapping = 0;
930
931 if (!fReInit)
932 {
933 pVCpu->iem.s.cActiveMappings = 0;
934 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
935 pVCpu->iem.s.fEndTb = false;
936 pVCpu->iem.s.fTbCheckOpcodes = false;
937 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
938 pVCpu->iem.s.fTbCrossedPage = false;
939 }
940 else
941 {
942 Assert(pVCpu->iem.s.cActiveMappings == 0);
943 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
944 Assert(pVCpu->iem.s.fEndTb == false);
945 Assert(pVCpu->iem.s.fTbCrossedPage == false);
946 }
947
948#ifdef DBGFTRACE_ENABLED
949 switch (IEM_GET_CPU_MODE(pVCpu))
950 {
951 case IEMMODE_64BIT:
952 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
953 break;
954 case IEMMODE_32BIT:
955 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
956 break;
957 case IEMMODE_16BIT:
958 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
959 break;
960 }
961#endif
962}
963
964
965/**
966 * Initializes the opcode fetcher when starting the compilation.
967 *
968 * @param pVCpu The cross context virtual CPU structure of the calling
969 * thread.
970 */
971DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
972{
973 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
974#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
975 pVCpu->iem.s.offOpcode = 0;
976#else
977 RT_NOREF(pVCpu);
978#endif
979}
980
981
982/**
983 * Re-initializes the opcode fetcher between instructions while compiling.
984 *
985 * @param pVCpu The cross context virtual CPU structure of the calling
986 * thread.
987 */
988DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
989{
990 if (pVCpu->iem.s.pbInstrBuf)
991 {
992 uint64_t off = pVCpu->cpum.GstCtx.rip;
993 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
994 off += pVCpu->cpum.GstCtx.cs.u64Base;
995 off -= pVCpu->iem.s.uInstrBufPc;
996 if (off < pVCpu->iem.s.cbInstrBufTotal)
997 {
998 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
999 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1000 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1001 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1002 else
1003 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1004 }
1005 else
1006 {
1007 pVCpu->iem.s.pbInstrBuf = NULL;
1008 pVCpu->iem.s.offInstrNextByte = 0;
1009 pVCpu->iem.s.offCurInstrStart = 0;
1010 pVCpu->iem.s.cbInstrBuf = 0;
1011 pVCpu->iem.s.cbInstrBufTotal = 0;
1012 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1013 }
1014 }
1015 else
1016 {
1017 pVCpu->iem.s.offInstrNextByte = 0;
1018 pVCpu->iem.s.offCurInstrStart = 0;
1019 pVCpu->iem.s.cbInstrBuf = 0;
1020 pVCpu->iem.s.cbInstrBufTotal = 0;
1021#ifdef VBOX_STRICT
1022 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1023#endif
1024 }
1025#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1026 pVCpu->iem.s.offOpcode = 0;
1027#endif
1028}
1029
1030
1031DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1032{
1033 switch (cbInstr)
1034 {
1035 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1036 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1037 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1038 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1039 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1040 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1041 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1042 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1043 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1044 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1045 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1046 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1047 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1048 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1049 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1050 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1051 }
1052}
1053
1054
1055/**
1056 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1057 *
1058 * - CS LIM check required.
1059 * - Must recheck opcode bytes.
1060 * - Previous instruction branched.
1061 * - TLB load detected, probably due to page crossing.
1062 *
1063 * @returns true if everything went well, false if we're out of space in the TB
1064 * (e.g. opcode ranges).
1065 * @param pVCpu The cross context virtual CPU structure of the calling
1066 * thread.
1067 * @param pTb The translation block being compiled.
1068 */
1069static bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1070{
1071 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1072
1073 /*
1074 * Prepare call now, even before we know if can accept the instruction in this TB.
1075 * This allows us amending parameters w/o making every case suffer.
1076 */
1077 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1078 uint16_t const offOpcode = pTb->cbOpcodes;
1079 uint8_t idxRange = pTb->cRanges - 1;
1080
1081 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1082 pCall->offOpcode = offOpcode;
1083 pCall->idxRange = idxRange;
1084 pCall->cbOpcode = cbInstr;
1085 pCall->auParams[0] = cbInstr;
1086 pCall->auParams[1] = idxRange;
1087 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1088
1089/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1090 * gotten onto. If we do, stop */
1091
1092 /*
1093 * Case 1: We've branched (RIP changed).
1094 *
1095 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1096 * Req: 1 extra range, no extra phys.
1097 *
1098 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1099 * necessary (fTbCrossedPage is true).
1100 * Req: 1 extra range, probably 1 extra phys page entry.
1101 *
1102 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1103 * but in addition we cross into the following page and require
1104 * another TLB load.
1105 * Req: 2 extra ranges, probably 2 extra phys page entries.
1106 *
1107 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1108 * the following page (thus fTbCrossedPage is true).
1109 * Req: 2 extra ranges, probably 1 extra phys page entry.
1110 *
1111 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1112 * it may trigger "spuriously" from the CPU point of view because of
1113 * physical page changes that'll invalid the physical TLB and trigger a
1114 * call to the function. In theory this be a big deal, just a bit
1115 * performance loss as we'll pick the LoadingTlb variants.
1116 *
1117 * Note! We do not currently optimize branching to the next instruction (sorry
1118 * 32-bit PIC code). We could maybe do that in the branching code that
1119 * sets (or not) fTbBranched.
1120 */
1121 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1122 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1123 * code. This'll require filtering out far jmps and calls, as they
1124 * load CS which should technically be considered indirect since the
1125 * GDT/LDT entry's base address can be modified independently from
1126 * the code. */
1127 if (pVCpu->iem.s.fTbBranched != 0)
1128 {
1129 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1130 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1131 {
1132 /* 1a + 1b - instruction fully within the branched to page. */
1133 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1134 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1135
1136 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1137 {
1138 /* Check that we've got a free range. */
1139 idxRange += 1;
1140 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1141 { /* likely */ }
1142 else
1143 {
1144 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1145 return false;
1146 }
1147 pCall->idxRange = idxRange;
1148 pCall->auParams[1] = idxRange;
1149 pCall->auParams[2] = 0;
1150
1151 /* Check that we've got a free page slot. */
1152 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1153 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1154 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1155 pTb->aRanges[idxRange].idxPhysPage = 0;
1156 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1157 || pTb->aGCPhysPages[0] == GCPhysNew)
1158 {
1159 pTb->aGCPhysPages[0] = GCPhysNew;
1160 pTb->aRanges[idxRange].idxPhysPage = 1;
1161 }
1162 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1163 || pTb->aGCPhysPages[1] == GCPhysNew)
1164 {
1165 pTb->aGCPhysPages[1] = GCPhysNew;
1166 pTb->aRanges[idxRange].idxPhysPage = 2;
1167 }
1168 else
1169 {
1170 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1171 return false;
1172 }
1173
1174 /* Finish setting up the new range. */
1175 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1176 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1177 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1178 pTb->aRanges[idxRange].u2Unused = 0;
1179 pTb->cRanges++;
1180 }
1181 else
1182 {
1183 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1184 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1185 }
1186
1187 /* Determin which function we need to load & check.
1188 Note! For jumps to a new page, we'll set both fTbBranched and
1189 fTbCrossedPage to avoid unnecessary TLB work for intra
1190 page branching */
1191 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1192 || pVCpu->iem.s.fTbCrossedPage)
1193 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1194 ? kIemThreadedFunc_CheckCsLimAndOpcodesLoadingTlb
1195 : kIemThreadedFunc_CheckOpcodesLoadingTlb;
1196 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1197 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1198 ? kIemThreadedFunc_CheckCsLimAndPcAndOpcodes
1199 : kIemThreadedFunc_CheckPcAndOpcodes;
1200 else
1201 {
1202 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1203 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1204 ? kIemThreadedFunc_CheckCsLimAndOpcodes
1205 : kIemThreadedFunc_CheckOpcodes;
1206 }
1207 }
1208 else
1209 {
1210 /* 1c + 1d - instruction crosses pages. */
1211 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1212 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1213
1214 /* Lazy bird: Check that this isn't case 1c, since we've already
1215 load the first physical address. End the TB and
1216 make it a case 2b instead.
1217
1218 Hmm. Too much bother to detect, so just do the same
1219 with case 1d as well. */
1220#if 0 /** @todo get back to this later when we've got the actual branch code in
1221 * place. */
1222 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1223
1224 /* Check that we've got two free ranges. */
1225 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1226 { /* likely */ }
1227 else
1228 return false;
1229 idxRange += 1;
1230 pCall->idxRange = idxRange;
1231 pCall->auParams[1] = idxRange;
1232 pCall->auParams[2] = 0;
1233
1234 /* ... */
1235
1236#else
1237 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1238 return false;
1239#endif
1240 }
1241 }
1242
1243 /*
1244 * Case 2: Page crossing.
1245 *
1246 * Sub-case 2a: The instruction starts on the first byte in the next page.
1247 *
1248 * Sub-case 2b: The instruction has opcode bytes in both the current and
1249 * following page.
1250 *
1251 * Both cases requires a new range table entry and probably a new physical
1252 * page entry. The difference is in which functions to emit and whether to
1253 * add bytes to the current range.
1254 */
1255 else if (pVCpu->iem.s.fTbCrossedPage)
1256 {
1257 /* Check that we've got a free range. */
1258 idxRange += 1;
1259 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1260 { /* likely */ }
1261 else
1262 {
1263 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1264 return false;
1265 }
1266
1267 /* Check that we've got a free page slot. */
1268 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1269 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1270 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1271 pTb->aRanges[idxRange].idxPhysPage = 0;
1272 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1273 || pTb->aGCPhysPages[0] == GCPhysNew)
1274 {
1275 pTb->aGCPhysPages[0] = GCPhysNew;
1276 pTb->aRanges[idxRange].idxPhysPage = 1;
1277 }
1278 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1279 || pTb->aGCPhysPages[1] == GCPhysNew)
1280 {
1281 pTb->aGCPhysPages[1] = GCPhysNew;
1282 pTb->aRanges[idxRange].idxPhysPage = 2;
1283 }
1284 else
1285 {
1286 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1287 return false;
1288 }
1289
1290 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1291 {
1292 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1293 pCall->idxRange = idxRange;
1294 pCall->auParams[1] = idxRange;
1295 pCall->auParams[2] = 0;
1296
1297 /* Finish setting up the new range. */
1298 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1299 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1300 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1301 pTb->aRanges[idxRange].u2Unused = 0;
1302 pTb->cRanges++;
1303
1304 /* Determin which function we need to load & check. */
1305 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1306 ? kIemThreadedFunc_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1307 : kIemThreadedFunc_CheckOpcodesOnNewPageLoadingTlb;
1308 }
1309 else
1310 {
1311 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1312 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1313 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1314 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1315
1316 /* We've good. Split the instruction over the old and new range table entries. */
1317 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1318
1319 pTb->aRanges[idxRange].offPhysPage = 0;
1320 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1321 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1322 pTb->aRanges[idxRange].u2Unused = 0;
1323 pTb->cRanges++;
1324
1325 /* Determin which function we need to load & check. */
1326 if (pVCpu->iem.s.fTbCheckOpcodes)
1327 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1328 ? kIemThreadedFunc_CheckCsLimAndOpcodesAcrossPageLoadingTlb
1329 : kIemThreadedFunc_CheckOpcodesAcrossPageLoadingTlb;
1330 else
1331 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1332 ? kIemThreadedFunc_CheckCsLimAndOpcodesOnNextPageLoadingTlb
1333 : kIemThreadedFunc_CheckOpcodesOnNextPageLoadingTlb;
1334 }
1335 }
1336
1337 /*
1338 * Regular case: No new range required.
1339 */
1340 else
1341 {
1342 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
1343 if (pVCpu->iem.s.fTbCheckOpcodes)
1344 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1345 ? kIemThreadedFunc_CheckCsLimAndOpcodes
1346 : kIemThreadedFunc_CheckOpcodes;
1347 else
1348 pCall->enmFunction = kIemThreadedFunc_CheckCsLim;
1349
1350 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1351 pTb->cbOpcodes = offOpcode + cbInstr;
1352 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1353 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1354 }
1355
1356 /*
1357 * Commit the call.
1358 */
1359 pTb->Thrd.cCalls++;
1360
1361 /*
1362 * Clear state.
1363 */
1364 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1365 pVCpu->iem.s.fTbCrossedPage = false;
1366 pVCpu->iem.s.fTbCheckOpcodes = false;
1367
1368 /*
1369 * Copy opcode bytes.
1370 */
1371 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
1372 pTb->cbOpcodes = offOpcode + cbInstr;
1373 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);
1374
1375 return true;
1376}
1377
1378
1379
1380/**
1381 * Compiles a new TB and executes it.
1382 *
1383 * We combine compilation and execution here as it makes it simpler code flow
1384 * in the main loop and it allows interpreting while compiling if we want to
1385 * explore that option.
1386 *
1387 * @returns Strict VBox status code.
1388 * @param pVM The cross context virtual machine structure.
1389 * @param pVCpu The cross context virtual CPU structure of the calling
1390 * thread.
1391 * @param GCPhysPc The physical address corresponding to the current
1392 * RIP+CS.BASE.
1393 * @param fExtraFlags Extra translation block flags: IEMTB_F_TYPE_THREADED and
1394 * maybe IEMTB_F_RIP_CHECKS.
1395 */
1396static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1397{
1398 /*
1399 * Allocate a new translation block.
1400 */
1401 PIEMTB pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags | IEMTB_F_STATE_COMPILING);
1402 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
1403
1404 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
1405 functions may get at it. */
1406 pVCpu->iem.s.pCurTbR3 = pTb;
1407
1408 /*
1409 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
1410 */
1411 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/);
1412 iemThreadedCompileInitOpcodeFetching(pVCpu);
1413 VBOXSTRICTRC rcStrict;
1414 for (;;)
1415 {
1416 /* Process the next instruction. */
1417#ifdef LOG_ENABLED
1418 iemThreadedLogCurInstr(pVCpu, "CC");
1419 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
1420 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
1421#endif
1422 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
1423 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
1424
1425 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
1426 if ( rcStrict == VINF_SUCCESS
1427 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
1428 && !pVCpu->iem.s.fEndTb)
1429 {
1430 Assert(pTb->Thrd.cCalls > cCallsPrev);
1431 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
1432
1433 pVCpu->iem.s.cInstructions++;
1434 }
1435 else
1436 {
1437 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
1438 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
1439 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
1440 rcStrict = VINF_SUCCESS;
1441
1442 if (pTb->Thrd.cCalls > 0)
1443 {
1444 if (cCallsPrev != pTb->Thrd.cCalls)
1445 pVCpu->iem.s.cInstructions++;
1446 break;
1447 }
1448
1449 pVCpu->iem.s.pCurTbR3 = NULL;
1450 iemThreadedTbFree(pVM, pVCpu, pTb);
1451 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1452 }
1453
1454 /* Still space in the TB? */
1455 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
1456 && pTb->cbOpcodes + 16 <= pTb->cbOpcodesAllocated)
1457 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/);
1458 else
1459 {
1460 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
1461 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
1462 break;
1463 }
1464 iemThreadedCompileReInitOpcodeFetching(pVCpu);
1465 }
1466
1467 /*
1468 * Complete the TB and link it.
1469 */
1470 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_STATE_MASK) | IEMTB_F_STATE_READY;
1471 iemThreadedTbAdd(pVM, pVCpu, pTb);
1472
1473#ifdef IEM_COMPILE_ONLY_MODE
1474 /*
1475 * Execute the translation block.
1476 */
1477#endif
1478
1479 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1480}
1481
1482
1483/**
1484 * Executes a translation block.
1485 *
1486 * @returns Strict VBox status code.
1487 * @param pVCpu The cross context virtual CPU structure of the calling
1488 * thread.
1489 * @param pTb The translation block to execute.
1490 */
1491static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb)
1492{
1493 /* Check the opcodes in the first page before starting execution. */
1494 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
1495 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
1496 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
1497 { /* likely */ }
1498 else
1499 {
1500 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
1501 iemThreadedTbFree(pVCpu->pVMR3, pVCpu, pTb);
1502 return VINF_SUCCESS;
1503 }
1504
1505 /* Set the current TB so CIMPL function may get at it. */
1506 pVCpu->iem.s.pCurTbR3 = pTb;
1507 pVCpu->iem.s.cTbExec++;
1508
1509 /* The execution loop. */
1510 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
1511 uint32_t cCallsLeft = pTb->Thrd.cCalls;
1512 while (cCallsLeft-- > 0)
1513 {
1514#ifdef LOG_ENABLED
1515 iemThreadedLogCurInstr(pVCpu, "EX");
1516 Log9(("%04x:%08RX64: #%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1517 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->enmFunction, g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
1518#endif
1519 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
1520 pCallEntry->auParams[0],
1521 pCallEntry->auParams[1],
1522 pCallEntry->auParams[2]);
1523
1524 if (RT_LIKELY( rcStrict == VINF_SUCCESS
1525 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
1526 pCallEntry++;
1527 else
1528 {
1529 pVCpu->iem.s.pCurTbR3 = NULL;
1530
1531 /* Some status codes are just to get us out of this loop and
1532 continue in a different translation block. */
1533 if (rcStrict == VINF_IEM_REEXEC_MODE_CHANGED)
1534 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
1535 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
1536 }
1537 }
1538
1539 pVCpu->iem.s.pCurTbR3 = NULL;
1540 return VINF_SUCCESS;
1541}
1542
1543
1544/**
1545 * This is called when the PC doesn't match the current pbInstrBuf.
1546 *
1547 * Upon return, we're ready for opcode fetching. But please note that
1548 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
1549 * MMIO or unassigned).
1550 */
1551static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
1552{
1553 pVCpu->iem.s.pbInstrBuf = NULL;
1554 pVCpu->iem.s.offCurInstrStart = 0;
1555 pVCpu->iem.s.offInstrNextByte = 0;
1556 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
1557 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
1558}
1559
1560
1561/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
1562DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
1563{
1564 /*
1565 * Set uCurTbStartPc to RIP and calc the effective PC.
1566 */
1567 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
1568 pVCpu->iem.s.uCurTbStartPc = uPc;
1569 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1570 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
1571
1572 /*
1573 * Advance within the current buffer (PAGE) when possible.
1574 */
1575 if (pVCpu->iem.s.pbInstrBuf)
1576 {
1577 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
1578 if (off < pVCpu->iem.s.cbInstrBufTotal)
1579 {
1580 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1581 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1582 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1583 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1584 else
1585 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1586
1587 return pVCpu->iem.s.GCPhysInstrBuf + off;
1588 }
1589 }
1590 return iemGetPcWithPhysAndCodeMissed(pVCpu);
1591}
1592
1593
1594/**
1595 * Determines the extra IEMTB_F_XXX flags.
1596 *
1597 * @returns IEMTB_F_TYPE_THREADED and maybe IEMTB_F_RIP_CHECKS.
1598 * @param pVCpu The cross context virtual CPU structure of the calling
1599 * thread.
1600 */
1601DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
1602{
1603 /*
1604 * Return IEMTB_F_RIP_CHECKS if the current PC is invalid or if it is
1605 * likely to go invalid before the end of the translation block.
1606 */
1607 if (IEM_IS_64BIT_CODE(pVCpu))
1608 return IEMTB_F_TYPE_THREADED;
1609
1610 if (RT_LIKELY( pVCpu->cpum.GstCtx.eip < pVCpu->cpum.GstCtx.cs.u32Limit
1611 && pVCpu->cpum.GstCtx.eip - pVCpu->cpum.GstCtx.cs.u32Limit >= X86_PAGE_SIZE))
1612 return IEMTB_F_TYPE_THREADED;
1613
1614 return IEMTB_F_TYPE_THREADED | IEMTB_F_CS_LIM_CHECKS;
1615}
1616
1617
1618VMMDECL(VBOXSTRICTRC) IEMExecRecompilerThreaded(PVMCC pVM, PVMCPUCC pVCpu)
1619{
1620 /*
1621 * See if there is an interrupt pending in TRPM, inject it if we can.
1622 */
1623 if (!TRPMHasTrap(pVCpu))
1624 { /* likely */ }
1625 else
1626 {
1627 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
1628 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
1629 { /*likely */ }
1630 else
1631 return rcStrict;
1632 }
1633
1634 /*
1635 * Init the execution environment.
1636 */
1637 iemInitExec(pVCpu, 0 /*fExecOpts*/);
1638
1639 /*
1640 * Run-loop.
1641 *
1642 * If we're using setjmp/longjmp we combine all the catching here to avoid
1643 * having to call setjmp for each block we're executing.
1644 */
1645 for (;;)
1646 {
1647 PIEMTB pTb = NULL;
1648 VBOXSTRICTRC rcStrict;
1649 IEM_TRY_SETJMP(pVCpu, rcStrict)
1650 {
1651 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
1652 for (uint32_t iIterations = 0; ; iIterations++)
1653 {
1654 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
1655 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
1656 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
1657
1658 pTb = iemThreadedTbLookup(pVM, pVCpu, GCPhysPc, fExtraFlags);
1659 if (pTb)
1660 rcStrict = iemThreadedTbExec(pVCpu, pTb);
1661 else
1662 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
1663 if (rcStrict == VINF_SUCCESS)
1664 {
1665 Assert(pVCpu->iem.s.cActiveMappings == 0);
1666
1667 uint64_t fCpu = pVCpu->fLocalForcedActions;
1668 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
1669 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
1670 | VMCPU_FF_TLB_FLUSH
1671 | VMCPU_FF_UNHALT );
1672 if (RT_LIKELY( ( !fCpu
1673 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
1674 && !pVCpu->cpum.GstCtx.rflags.Bits.u1IF) )
1675 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
1676 {
1677 if (RT_LIKELY( (iIterations & cPollRate) != 0
1678 || !TMTimerPollBool(pVM, pVCpu)))
1679 {
1680
1681 }
1682 else
1683 return VINF_SUCCESS;
1684 }
1685 else
1686 return VINF_SUCCESS;
1687 }
1688 else
1689 return rcStrict;
1690 }
1691 }
1692 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
1693 {
1694 pVCpu->iem.s.cLongJumps++;
1695 if (pVCpu->iem.s.cActiveMappings > 0)
1696 iemMemRollback(pVCpu);
1697
1698 /* If pTb isn't NULL we're in iemThreadedTbExec. */
1699 if (!pTb)
1700 {
1701 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
1702 pTb = pVCpu->iem.s.pCurTbR3;
1703 if (pTb)
1704 {
1705 /* If the pCurTbR3 block is in compiling state, we're in iemThreadedCompile,
1706 otherwise it's iemThreadedTbExec inside iemThreadedCompile (compile option). */
1707 if ((pTb->fFlags & IEMTB_F_STATE_MASK) == IEMTB_F_STATE_COMPILING)
1708 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
1709 }
1710 }
1711 return rcStrict;
1712 }
1713 IEM_CATCH_LONGJMP_END(pVCpu);
1714 }
1715}
1716
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette