VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp

Last change on this file was 104407, checked in by vboxsync, 4 weeks ago

VMM/IEM: Adjusted the TB exit statistics a bit more, adding a few new one, making more of the release stats that doesn't go into the TB, and organizing them to try avoid counting the same exit more than once. bugref:10376 bugref:10653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 129.9 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 104407 2024-04-23 23:16:04Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117
118/**
119 * Calculates the effective address of a ModR/M memory operand, extended version
120 * for use in the recompilers.
121 *
122 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
123 *
124 * May longjmp on internal error.
125 *
126 * @return The effective address.
127 * @param pVCpu The cross context virtual CPU structure of the calling thread.
128 * @param bRm The ModRM byte.
129 * @param cbImmAndRspOffset - First byte: The size of any immediate
130 * following the effective address opcode bytes
131 * (only for RIP relative addressing).
132 * - Second byte: RSP displacement (for POP [ESP]).
133 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
134 * SIB byte (bits 39:32).
135 *
136 * @note This must be defined in a source file with matching
137 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
138 * or implemented differently...
139 */
140RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
141{
142 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
143# define SET_SS_DEF() \
144 do \
145 { \
146 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
147 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
148 } while (0)
149
150 if (!IEM_IS_64BIT_CODE(pVCpu))
151 {
152/** @todo Check the effective address size crap! */
153 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
154 {
155 uint16_t u16EffAddr;
156
157 /* Handle the disp16 form with no registers first. */
158 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
159 {
160 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
161 *puInfo = u16EffAddr;
162 }
163 else
164 {
165 /* Get the displacment. */
166 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
167 {
168 case 0: u16EffAddr = 0; break;
169 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
170 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
171 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
172 }
173 *puInfo = u16EffAddr;
174
175 /* Add the base and index registers to the disp. */
176 switch (bRm & X86_MODRM_RM_MASK)
177 {
178 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
179 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
180 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
181 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
182 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
183 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
184 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
185 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
186 }
187 }
188
189 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
190 return u16EffAddr;
191 }
192
193 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
194 uint32_t u32EffAddr;
195 uint64_t uInfo;
196
197 /* Handle the disp32 form with no registers first. */
198 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
199 {
200 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
201 uInfo = u32EffAddr;
202 }
203 else
204 {
205 /* Get the register (or SIB) value. */
206 uInfo = 0;
207 switch ((bRm & X86_MODRM_RM_MASK))
208 {
209 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
210 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
211 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
212 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
213 case 4: /* SIB */
214 {
215 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
216 uInfo = (uint64_t)bSib << 32;
217
218 /* Get the index and scale it. */
219 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
220 {
221 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
222 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
223 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
224 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
225 case 4: u32EffAddr = 0; /*none */ break;
226 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
227 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
228 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
229 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
230 }
231 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
232
233 /* add base */
234 switch (bSib & X86_SIB_BASE_MASK)
235 {
236 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
237 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
238 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
239 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
240 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
241 case 5:
242 if ((bRm & X86_MODRM_MOD_MASK) != 0)
243 {
244 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
245 SET_SS_DEF();
246 }
247 else
248 {
249 uint32_t u32Disp;
250 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
251 u32EffAddr += u32Disp;
252 uInfo |= u32Disp;
253 }
254 break;
255 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
256 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
257 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
258 }
259 break;
260 }
261 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
262 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
263 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
264 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
265 }
266
267 /* Get and add the displacement. */
268 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
269 {
270 case 0:
271 break;
272 case 1:
273 {
274 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
275 u32EffAddr += i8Disp;
276 uInfo |= (uint32_t)(int32_t)i8Disp;
277 break;
278 }
279 case 2:
280 {
281 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
282 u32EffAddr += u32Disp;
283 uInfo |= u32Disp;
284 break;
285 }
286 default:
287 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
288 }
289 }
290
291 *puInfo = uInfo;
292 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
293 return u32EffAddr;
294 }
295
296 uint64_t u64EffAddr;
297 uint64_t uInfo;
298
299 /* Handle the rip+disp32 form with no registers first. */
300 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
301 {
302 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
303 uInfo = (uint32_t)u64EffAddr;
304 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
305 }
306 else
307 {
308 /* Get the register (or SIB) value. */
309 uInfo = 0;
310 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
311 {
312 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
313 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
314 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
315 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
316 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
317 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
318 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
319 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
320 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
321 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
322 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
323 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
324 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
325 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
326 /* SIB */
327 case 4:
328 case 12:
329 {
330 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
331 uInfo = (uint64_t)bSib << 32;
332
333 /* Get the index and scale it. */
334 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
335 {
336 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
337 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
338 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
339 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
340 case 4: u64EffAddr = 0; /*none */ break;
341 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
342 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
343 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
344 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
345 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
346 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
347 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
348 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
349 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
350 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
351 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
352 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
353 }
354 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
355
356 /* add base */
357 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
358 {
359 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
360 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
361 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
362 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
363 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
364 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
365 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
366 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
367 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
368 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
369 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
370 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
371 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
372 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
373 /* complicated encodings */
374 case 5:
375 case 13:
376 if ((bRm & X86_MODRM_MOD_MASK) != 0)
377 {
378 if (!pVCpu->iem.s.uRexB)
379 {
380 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
381 SET_SS_DEF();
382 }
383 else
384 u64EffAddr += pVCpu->cpum.GstCtx.r13;
385 }
386 else
387 {
388 uint32_t u32Disp;
389 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
390 u64EffAddr += (int32_t)u32Disp;
391 uInfo |= u32Disp;
392 }
393 break;
394 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
395 }
396 break;
397 }
398 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
399 }
400
401 /* Get and add the displacement. */
402 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
403 {
404 case 0:
405 break;
406 case 1:
407 {
408 int8_t i8Disp;
409 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
410 u64EffAddr += i8Disp;
411 uInfo |= (uint32_t)(int32_t)i8Disp;
412 break;
413 }
414 case 2:
415 {
416 uint32_t u32Disp;
417 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
418 u64EffAddr += (int32_t)u32Disp;
419 uInfo |= u32Disp;
420 break;
421 }
422 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
423 }
424
425 }
426
427 *puInfo = uInfo;
428 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
429 {
430 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
431 return u64EffAddr;
432 }
433 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
434 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
435 return u64EffAddr & UINT32_MAX;
436}
437
438
439/*********************************************************************************************************************************
440* Translation Block Cache. *
441*********************************************************************************************************************************/
442
443/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
444static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
445{
446 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
447 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
448 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
449 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
450 if (cMsSinceUse1 != cMsSinceUse2)
451 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
452 if (pTb1->cUsed != pTb2->cUsed)
453 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
454 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
455 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
456 return 0;
457}
458
459#ifdef VBOX_STRICT
460/**
461 * Assertion helper that checks a collisions list count.
462 */
463static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
464{
465 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
466 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
467 while (pTb)
468 {
469 pTb = pTb->pNext;
470 cLeft--;
471 }
472 AssertMsg(cLeft == 0,
473 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
474 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
475}
476#endif
477
478
479DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
480{
481 STAM_PROFILE_START(&pTbCache->StatPrune, a);
482
483 /*
484 * First convert the collision list to an array.
485 */
486 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
487 uintptr_t cInserted = 0;
488 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
489
490 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
491
492 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
493 {
494 apSortedTbs[cInserted++] = pTbCollision;
495 pTbCollision = pTbCollision->pNext;
496 }
497
498 /* Free any excess (impossible). */
499 if (RT_LIKELY(!pTbCollision))
500 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
501 else
502 do
503 {
504 PIEMTB pTbToFree = pTbCollision;
505 pTbCollision = pTbToFree->pNext;
506 iemTbAllocatorFree(pVCpu, pTbToFree);
507 } while (pTbCollision);
508
509 /*
510 * Sort it by most recently used and usage count.
511 */
512 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
513
514 /* We keep half the list for now. Perhaps a bit aggressive... */
515 uintptr_t const cKeep = cInserted / 2;
516
517 /* First free up the TBs we don't wish to keep (before creating the new
518 list because otherwise the free code will scan the list for each one
519 without ever finding it). */
520 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
521 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
522
523 /* Then chain the new TB together with the ones we like to keep of the
524 existing ones and insert this list into the hash table. */
525 pTbCollision = pTb;
526 for (uintptr_t idx = 0; idx < cKeep; idx++)
527 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
528 pTbCollision->pNext = NULL;
529
530 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
531#ifdef VBOX_STRICT
532 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
533#endif
534
535 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
536}
537
538
539static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
540{
541 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
542 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
543 if (!pTbOldHead)
544 {
545 pTb->pNext = NULL;
546 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
547 }
548 else
549 {
550 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
551 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
552 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
553 {
554 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
555 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
556#ifdef VBOX_STRICT
557 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
558#endif
559 }
560 else
561 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
562 }
563}
564
565
566/**
567 * Unlinks @a pTb from the hash table if found in it.
568 *
569 * @returns true if unlinked, false if not present.
570 * @param pTbCache The hash table.
571 * @param pTb The TB to remove.
572 */
573static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
574{
575 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
576 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
577 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
578
579 /*
580 * At the head of the collision list?
581 */
582 if (pTbHash == pTb)
583 {
584 if (!pTb->pNext)
585 pTbCache->apHash[idxHash] = NULL;
586 else
587 {
588 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
589 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
590#ifdef VBOX_STRICT
591 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
592#endif
593 }
594 return true;
595 }
596
597 /*
598 * Search the collision list.
599 */
600 PIEMTB const pTbHead = pTbHash;
601 while (pTbHash)
602 {
603 PIEMTB const pNextTb = pTbHash->pNext;
604 if (pNextTb == pTb)
605 {
606 pTbHash->pNext = pTb->pNext;
607 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
608#ifdef VBOX_STRICT
609 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
610#endif
611 return true;
612 }
613 pTbHash = pNextTb;
614 }
615 return false;
616}
617
618
619/**
620 * Looks up a TB for the given PC and flags in the cache.
621 *
622 * @returns Pointer to TB on success, NULL if not found.
623 * @param pVCpu The cross context virtual CPU structure of the
624 * calling thread.
625 * @param pTbCache The translation block cache.
626 * @param GCPhysPc The PC to look up a TB for.
627 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
628 * the lookup.
629 * @thread EMT(pVCpu)
630 */
631static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
632 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
633{
634 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
635
636 /*
637 * First consult the lookup table entry.
638 */
639 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
640 PIEMTB pTb = *ppTbLookup;
641 if (pTb)
642 {
643 if (pTb->GCPhysPc == GCPhysPc)
644 {
645 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
646 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
647 {
648 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
649 {
650 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
651 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
652 pTb->cUsed++;
653#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
654 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
655 {
656 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
657 return pTb;
658 }
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return iemNativeRecompile(pVCpu, pTb);
661#else
662 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
663 return pTb;
664#endif
665 }
666 }
667 }
668 }
669
670 /*
671 * Then consult the hash table.
672 */
673 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
674#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
675 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
676#endif
677 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
678 while (pTb)
679 {
680 if (pTb->GCPhysPc == GCPhysPc)
681 {
682 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
683 {
684 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
685 {
686 STAM_COUNTER_INC(&pTbCache->cLookupHits);
687 AssertMsg(cLeft > 0, ("%d\n", cLeft));
688
689 *ppTbLookup = pTb;
690 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
691 pTb->cUsed++;
692#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
693 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
694 {
695 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
696 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
697 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
698 return pTb;
699 }
700 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
701 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
702 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
703 return iemNativeRecompile(pVCpu, pTb);
704#else
705 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
706 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
707 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
708 return pTb;
709#endif
710 }
711 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
712 }
713 else
714 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
715 }
716 else
717 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
718
719 pTb = pTb->pNext;
720#ifdef VBOX_STRICT
721 cLeft--;
722#endif
723 }
724 AssertMsg(cLeft == 0, ("%d\n", cLeft));
725 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
726 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
727 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
728 return pTb;
729}
730
731
732/*********************************************************************************************************************************
733* Translation Block Allocator.
734*********************************************************************************************************************************/
735/*
736 * Translation block allocationmanagement.
737 */
738
739#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
740# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
741 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
742# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
743 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
744# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
745 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
746#else
747# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
748 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
749# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
750 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
751# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
752 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
753#endif
754/** Makes a TB index from a chunk index and TB index within that chunk. */
755#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
756 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
757
758
759/**
760 * Initializes the TB allocator and cache for an EMT.
761 *
762 * @returns VBox status code.
763 * @param pVM The VM handle.
764 * @param cInitialTbs The initial number of translation blocks to
765 * preallocator.
766 * @param cMaxTbs The max number of translation blocks allowed.
767 * @param cbInitialExec The initial size of the executable memory allocator.
768 * @param cbMaxExec The max size of the executable memory allocator.
769 * @param cbChunkExec The chunk size for executable memory allocator. Zero
770 * or UINT32_MAX for automatically determining this.
771 * @thread EMT
772 */
773DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
774 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
775{
776 PVMCPUCC pVCpu = VMMGetCpu(pVM);
777 Assert(!pVCpu->iem.s.pTbCacheR3);
778 Assert(!pVCpu->iem.s.pTbAllocatorR3);
779
780 /*
781 * Calculate the chunk size of the TB allocator.
782 * The minimum chunk size is 2MiB.
783 */
784 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
785 uint32_t cbPerChunk = _2M;
786 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
787#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
788 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
789 uint8_t cChunkShift = 21 - cTbShift;
790 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
791#endif
792 for (;;)
793 {
794 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
795 break;
796 cbPerChunk *= 2;
797 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
798#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
799 cChunkShift += 1;
800#endif
801 }
802
803 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
804 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
805 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
806
807 cMaxTbs = cMaxChunks * cTbsPerChunk;
808
809 /*
810 * Allocate and initalize it.
811 */
812 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
813 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
814 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
815 if (!pTbAllocator)
816 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
817 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
818 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
819 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
820 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
821 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
822 pTbAllocator->cbPerChunk = cbPerChunk;
823 pTbAllocator->cMaxTbs = cMaxTbs;
824#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
825 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
826 pTbAllocator->cChunkShift = cChunkShift;
827 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
828#endif
829
830 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
831 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
832
833 /*
834 * Allocate the initial chunks.
835 */
836 for (uint32_t idxChunk = 0; ; idxChunk++)
837 {
838 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
839 if (!paTbs)
840 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
841 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
842 cbPerChunk, idxChunk, pVCpu->idCpu);
843
844 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
845 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
846 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
847 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
848 pTbAllocator->cTotalTbs += cTbsPerChunk;
849
850 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
851 break;
852 }
853
854 /*
855 * Calculate the size of the hash table. We double the max TB count and
856 * round it up to the nearest power of two.
857 */
858 uint32_t cCacheEntries = cMaxTbs * 2;
859 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
860 {
861 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
862 cCacheEntries = RT_BIT_32(iBitTop);
863 Assert(cCacheEntries >= cMaxTbs * 2);
864 }
865
866 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
867 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
868 if (!pTbCache)
869 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
870 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
871 cbTbCache, cCacheEntries, pVCpu->idCpu);
872
873 /*
874 * Initialize it (assumes zeroed by the allocator).
875 */
876 pTbCache->uMagic = IEMTBCACHE_MAGIC;
877 pTbCache->cHash = cCacheEntries;
878 pTbCache->uHashMask = cCacheEntries - 1;
879 Assert(pTbCache->cHash > pTbCache->uHashMask);
880 pVCpu->iem.s.pTbCacheR3 = pTbCache;
881
882 /*
883 * Initialize the native executable memory allocator.
884 */
885#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
886 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
887 AssertLogRelRCReturn(rc, rc);
888#else
889 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
890#endif
891
892 return VINF_SUCCESS;
893}
894
895
896/**
897 * Inner free worker.
898 */
899static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
900 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
901{
902 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
903 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
904 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
905 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
906#ifdef VBOX_STRICT
907 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
908 Assert(pTbOther != pTb);
909#endif
910
911 /*
912 * Unlink the TB from the hash table.
913 */
914 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
915
916 /*
917 * Free the TB itself.
918 */
919 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
920 {
921 case IEMTB_F_TYPE_THREADED:
922 pTbAllocator->cThreadedTbs -= 1;
923 RTMemFree(pTb->Thrd.paCalls);
924 break;
925#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
926 case IEMTB_F_TYPE_NATIVE:
927 pTbAllocator->cNativeTbs -= 1;
928 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
929 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
930 pVCpu, pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
931 break;
932#endif
933 default:
934 AssertFailed();
935 }
936
937 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
938
939 pTb->pNext = NULL;
940 pTb->fFlags = 0;
941 pTb->GCPhysPc = UINT64_MAX;
942 pTb->Gen.uPtr = 0;
943 pTb->Gen.uData = 0;
944 pTb->cTbLookupEntries = 0;
945 pTb->cbOpcodes = 0;
946 pTb->pabOpcodes = NULL;
947
948 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
949 Assert(pTbAllocator->cInUseTbs > 0);
950
951 pTbAllocator->cInUseTbs -= 1;
952 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
953}
954
955
956/**
957 * Frees the given TB.
958 *
959 * @param pVCpu The cross context virtual CPU structure of the calling
960 * thread.
961 * @param pTb The translation block to free.
962 * @thread EMT(pVCpu)
963 */
964DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
965{
966 /*
967 * Validate state.
968 */
969 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
970 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
971 uint8_t const idxChunk = pTb->idxAllocChunk;
972 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
973 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
974 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
975
976 /*
977 * Invalidate the TB lookup pointer and call the inner worker.
978 */
979 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
980 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
981}
982
983
984/**
985 * Schedules a TB for freeing when it's not longer being executed and/or part of
986 * the caller's call stack.
987 *
988 * The TB will be removed from the translation block cache, though, so it isn't
989 * possible to executed it again and the IEMTB::pNext member can be used to link
990 * it together with other TBs awaiting freeing.
991 *
992 * @param pVCpu The cross context virtual CPU structure of the calling
993 * thread.
994 * @param pTb The translation block to schedule for freeing.
995 */
996static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
997{
998 /*
999 * Validate state.
1000 */
1001 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1002 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1003 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1004 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1005 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
1006 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
1007 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
1008 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1009 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1010#ifdef VBOX_STRICT
1011 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1012 Assert(pTbOther != pTb);
1013#endif
1014
1015 /*
1016 * Remove it from the cache and prepend it to the allocator's todo list.
1017 *
1018 * Note! It could still be in various lookup tables, so we trash the GCPhys
1019 * and CS attribs to ensure it won't be reused.
1020 */
1021 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1022 pTb->GCPhysPc = NIL_RTGCPHYS;
1023 pTb->x86.fAttr = UINT16_MAX;
1024
1025 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1026 pTbAllocator->pDelayedFreeHead = pTb;
1027}
1028
1029
1030/**
1031 * Processes the delayed frees.
1032 *
1033 * This is called by the allocator function as well as the native recompile
1034 * function before making any TB or executable memory allocations respectively.
1035 */
1036void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1037{
1038 /** @todo r-bird: these have already been removed from the cache,
1039 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1040 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1041 pTbAllocator->pDelayedFreeHead = NULL;
1042 while (pTb)
1043 {
1044 PIEMTB const pTbNext = pTb->pNext;
1045 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1046 iemTbAllocatorFree(pVCpu, pTb);
1047 pTb = pTbNext;
1048 }
1049}
1050
1051
1052/**
1053 * Grow the translation block allocator with another chunk.
1054 */
1055static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1056{
1057 /*
1058 * Validate state.
1059 */
1060 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1061 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1062 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1063 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1064 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1065
1066 /*
1067 * Allocate a new chunk and add it to the allocator.
1068 */
1069 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1070 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1071 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1072
1073 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1074 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1075 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1076 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1077 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1078 pTbAllocator->cTotalTbs += cTbsPerChunk;
1079 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1080
1081 return VINF_SUCCESS;
1082}
1083
1084
1085/**
1086 * Allocates a TB from allocator with free block.
1087 *
1088 * This is common code to both the fast and slow allocator code paths.
1089 */
1090DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1091{
1092 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1093
1094 int idxTb;
1095 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1096 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1097 pTbAllocator->cTotalTbs,
1098 pTbAllocator->iStartHint & ~(uint32_t)63);
1099 else
1100 idxTb = -1;
1101 if (idxTb < 0)
1102 {
1103 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1104 AssertLogRelReturn(idxTb >= 0, NULL);
1105 }
1106 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1107 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1108
1109 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1110 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1111 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1112 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1113 Assert(pTb->idxAllocChunk == idxChunk);
1114
1115 pTbAllocator->cInUseTbs += 1;
1116 if (fThreaded)
1117 pTbAllocator->cThreadedTbs += 1;
1118 else
1119 pTbAllocator->cNativeTbs += 1;
1120 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1121 return pTb;
1122}
1123
1124
1125/**
1126 * Slow path for iemTbAllocatorAlloc.
1127 */
1128static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1129{
1130 /*
1131 * With some luck we can add another chunk.
1132 */
1133 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1134 {
1135 int rc = iemTbAllocatorGrow(pVCpu);
1136 if (RT_SUCCESS(rc))
1137 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1138 }
1139
1140 /*
1141 * We have to prune stuff. Sigh.
1142 *
1143 * This requires scanning for older TBs and kick them out. Not sure how to
1144 * best do this as we don't want to maintain any list of TBs ordered by last
1145 * usage time. But one reasonably simple approach would be that each time we
1146 * get here we continue a sequential scan of the allocation chunks,
1147 * considering just a smallish number of TBs and freeing a fixed portion of
1148 * them. Say, we consider the next 128 TBs, freeing the least recently used
1149 * in out of groups of 4 TBs, resulting in 32 free TBs.
1150 */
1151 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1152 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1153 uint32_t const cTbsToPrune = 128;
1154 uint32_t const cTbsPerGroup = 4;
1155 uint32_t cFreedTbs = 0;
1156#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1157 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1158#else
1159 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1160#endif
1161 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1162 idxTbPruneFrom = 0;
1163 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1164 {
1165 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1166 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1167 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1168 uint32_t cMsAge = msNow - pTb->msLastUsed;
1169 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1170
1171 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1172 {
1173#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1174 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1175 { /* likely */ }
1176 else
1177 {
1178 idxInChunk2 = 0;
1179 idxChunk2 += 1;
1180 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1181 idxChunk2 = 0;
1182 }
1183#endif
1184 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1185 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1186 if ( cMsAge2 > cMsAge
1187 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1188 {
1189 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1190 pTb = pTb2;
1191 idxChunk = idxChunk2;
1192 idxInChunk = idxInChunk2;
1193 cMsAge = cMsAge2;
1194 }
1195 }
1196
1197 /* Free the TB. */
1198 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1199 cFreedTbs++; /* paranoia */
1200 }
1201 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1202 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1203
1204 /* Flush the TB lookup entry pointer. */
1205 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1206
1207 /*
1208 * Allocate a TB from the ones we've pruned.
1209 */
1210 if (cFreedTbs)
1211 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1212 return NULL;
1213}
1214
1215
1216/**
1217 * Allocate a translation block.
1218 *
1219 * @returns Pointer to block on success, NULL if we're out and is unable to
1220 * free up an existing one (very unlikely once implemented).
1221 * @param pVCpu The cross context virtual CPU structure of the calling
1222 * thread.
1223 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1224 * For statistics.
1225 */
1226DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1227{
1228 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1229 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1230
1231 /* Free any pending TBs before we proceed. */
1232 if (!pTbAllocator->pDelayedFreeHead)
1233 { /* probably likely */ }
1234 else
1235 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1236
1237 /* If the allocator is full, take slow code path.*/
1238 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1239 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1240 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1241}
1242
1243
1244/**
1245 * This is called when we're out of space for native TBs.
1246 *
1247 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1248 * The difference is that we only prune native TBs and will only free any if
1249 * there are least two in a group. The conditions under which we're called are
1250 * different - there will probably be free TBs in the table when we're called.
1251 * Therefore we increase the group size and max scan length, though we'll stop
1252 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1253 * up at least 8 TBs.
1254 */
1255void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1256{
1257 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1258 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1259
1260 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1261
1262 /*
1263 * Flush the delayed free list before we start freeing TBs indiscriminately.
1264 */
1265 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1266
1267 /*
1268 * Scan and free TBs.
1269 */
1270 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1271 uint32_t const cTbsToPrune = 128 * 8;
1272 uint32_t const cTbsPerGroup = 4 * 4;
1273 uint32_t cFreedTbs = 0;
1274 uint32_t cMaxInstrs = 0;
1275 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1276 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1277 {
1278 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1279 idxTbPruneFrom = 0;
1280 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1281 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1282 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1283 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1284 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1285
1286 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1287 {
1288 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1289 { /* likely */ }
1290 else
1291 {
1292 idxInChunk2 = 0;
1293 idxChunk2 += 1;
1294 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1295 idxChunk2 = 0;
1296 }
1297 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1298 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1299 {
1300 cNativeTbs += 1;
1301 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1302 if ( cMsAge2 > cMsAge
1303 || ( cMsAge2 == cMsAge
1304 && ( pTb2->cUsed < pTb->cUsed
1305 || ( pTb2->cUsed == pTb->cUsed
1306 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1307 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1308 {
1309 pTb = pTb2;
1310 idxChunk = idxChunk2;
1311 idxInChunk = idxInChunk2;
1312 cMsAge = cMsAge2;
1313 }
1314 }
1315 }
1316
1317 /* Free the TB if we found at least two native one in this group. */
1318 if (cNativeTbs >= 2)
1319 {
1320 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1321 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1322 cFreedTbs++;
1323 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1324 break;
1325 }
1326 }
1327 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1328
1329 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1330}
1331
1332
1333/*********************************************************************************************************************************
1334* Threaded Recompiler Core *
1335*********************************************************************************************************************************/
1336/**
1337 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1338 * @returns pszBuf.
1339 * @param fFlags The flags.
1340 * @param pszBuf The output buffer.
1341 * @param cbBuf The output buffer size. At least 32 bytes.
1342 */
1343DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1344{
1345 Assert(cbBuf >= 32);
1346 static RTSTRTUPLE const s_aModes[] =
1347 {
1348 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1349 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1350 /* [02] = */ { RT_STR_TUPLE("!2!") },
1351 /* [03] = */ { RT_STR_TUPLE("!3!") },
1352 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1353 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1354 /* [06] = */ { RT_STR_TUPLE("!6!") },
1355 /* [07] = */ { RT_STR_TUPLE("!7!") },
1356 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1357 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1358 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1359 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1360 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1361 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1362 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1363 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1364 /* [10] = */ { RT_STR_TUPLE("!10!") },
1365 /* [11] = */ { RT_STR_TUPLE("!11!") },
1366 /* [12] = */ { RT_STR_TUPLE("!12!") },
1367 /* [13] = */ { RT_STR_TUPLE("!13!") },
1368 /* [14] = */ { RT_STR_TUPLE("!14!") },
1369 /* [15] = */ { RT_STR_TUPLE("!15!") },
1370 /* [16] = */ { RT_STR_TUPLE("!16!") },
1371 /* [17] = */ { RT_STR_TUPLE("!17!") },
1372 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1373 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1374 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1375 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1376 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1377 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1378 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1379 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1380 };
1381 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1382 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1383 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1384
1385 pszBuf[off++] = ' ';
1386 pszBuf[off++] = 'C';
1387 pszBuf[off++] = 'P';
1388 pszBuf[off++] = 'L';
1389 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1390 Assert(off < 32);
1391
1392 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1393
1394 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1395 {
1396 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1397 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1398 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1399 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1400 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1401 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1402 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1403 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1404 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1405 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1406 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1407 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1408 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1409 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1410 };
1411 if (fFlags)
1412 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1413 if (s_aFlags[i].fFlag & fFlags)
1414 {
1415 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1416 pszBuf[off++] = ' ';
1417 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1418 off += s_aFlags[i].cchName;
1419 fFlags &= ~s_aFlags[i].fFlag;
1420 if (!fFlags)
1421 break;
1422 }
1423 pszBuf[off] = '\0';
1424
1425 return pszBuf;
1426}
1427
1428
1429/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1430static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1431{
1432 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1433 pDis->cbCachedInstr += cbMaxRead;
1434 RT_NOREF(cbMinRead);
1435 return VERR_NO_DATA;
1436}
1437
1438
1439/**
1440 * Worker for iemThreadedDisassembleTb.
1441 */
1442static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1443 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1444{
1445 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1446 {
1447 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1448 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1449 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1450 {
1451 PIEMTB pLookupTb = papTbLookup[iLookup];
1452 if (pLookupTb)
1453 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1454 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1455 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1456 : "invalid");
1457 else
1458 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1459 }
1460 pHlp->pfnPrintf(pHlp, "\n");
1461 }
1462 else
1463 {
1464 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1465 idxFirst, cEntries, pTb->cTbLookupEntries);
1466 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1467 }
1468}
1469
1470
1471DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1472{
1473 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1474
1475 char szDisBuf[512];
1476
1477 /*
1478 * Print TB info.
1479 */
1480 pHlp->pfnPrintf(pHlp,
1481 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1482 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1483 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1484 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1485
1486 /*
1487 * This disassembly is driven by the debug info which follows the native
1488 * code and indicates when it starts with the next guest instructions,
1489 * where labels are and such things.
1490 */
1491 DISSTATE Dis;
1492 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1493 uint32_t const cCalls = pTb->Thrd.cCalls;
1494 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1495 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1496 : DISCPUMODE_64BIT;
1497 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1498 uint8_t idxRange = UINT8_MAX;
1499 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1500 uint32_t offRange = 0;
1501 uint32_t offOpcodes = 0;
1502 uint32_t const cbOpcodes = pTb->cbOpcodes;
1503 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1504 bool fTbLookupSeen0 = false;
1505
1506 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1507 {
1508 /*
1509 * New opcode range?
1510 */
1511 if ( idxRange == UINT8_MAX
1512 || idxRange >= cRanges
1513 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1514 {
1515 idxRange += 1;
1516 if (idxRange < cRanges)
1517 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1518 else
1519 continue;
1520 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1521 + (pTb->aRanges[idxRange].idxPhysPage == 0
1522 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1523 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1524 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1525 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1526 pTb->aRanges[idxRange].idxPhysPage);
1527 GCPhysPc += offRange;
1528 }
1529
1530 /*
1531 * Disassemble another guest instruction?
1532 */
1533 if ( paCalls[iCall].offOpcode != offOpcodes
1534 && paCalls[iCall].cbOpcode > 0
1535 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1536 {
1537 offOpcodes = paCalls[iCall].offOpcode;
1538 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1539 uint32_t cbInstr = 1;
1540 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1541 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1542 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1543 if (RT_SUCCESS(rc))
1544 {
1545 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1546 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1547 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1548 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1549 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1550 }
1551 else
1552 {
1553 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1554 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1555 cbInstr = paCalls[iCall].cbOpcode;
1556 }
1557 GCPhysPc += cbInstr;
1558 offRange += cbInstr;
1559 }
1560
1561 /*
1562 * Dump call details.
1563 */
1564 pHlp->pfnPrintf(pHlp,
1565 " Call #%u to %s (%u args)\n",
1566 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1567 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1568 if (paCalls[iCall].uTbLookup != 0)
1569 {
1570 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1571 fTbLookupSeen0 = idxFirst == 0;
1572 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1573 }
1574
1575 /*
1576 * Snoop fExec.
1577 */
1578 switch (paCalls[iCall].enmFunction)
1579 {
1580 default:
1581 break;
1582 case kIemThreadedFunc_BltIn_CheckMode:
1583 fExec = paCalls[iCall].auParams[0];
1584 break;
1585 }
1586 }
1587
1588 if (!fTbLookupSeen0)
1589 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1590}
1591
1592
1593
1594/**
1595 * Allocate a translation block for threadeded recompilation.
1596 *
1597 * This is allocated with maxed out call table and storage for opcode bytes,
1598 * because it's only supposed to be called once per EMT to allocate the TB
1599 * pointed to by IEMCPU::pThrdCompileTbR3.
1600 *
1601 * @returns Pointer to the translation block on success, NULL on failure.
1602 * @param pVM The cross context virtual machine structure.
1603 * @param pVCpu The cross context virtual CPU structure of the calling
1604 * thread.
1605 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1606 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1607 */
1608static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1609{
1610 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1611 if (pTb)
1612 {
1613 unsigned const cCalls = 256;
1614 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1615 if (pTb->Thrd.paCalls)
1616 {
1617 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1618 if (pTb->pabOpcodes)
1619 {
1620 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1621 pTb->Thrd.cAllocated = cCalls;
1622 pTb->Thrd.cCalls = 0;
1623 pTb->cbOpcodes = 0;
1624 pTb->pNext = NULL;
1625 pTb->cUsed = 0;
1626 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1627 pTb->idxAllocChunk = UINT8_MAX;
1628 pTb->GCPhysPc = GCPhysPc;
1629 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1630 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1631 pTb->cInstructions = 0;
1632 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1633
1634 /* Init the first opcode range. */
1635 pTb->cRanges = 1;
1636 pTb->aRanges[0].cbOpcodes = 0;
1637 pTb->aRanges[0].offOpcodes = 0;
1638 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1639 pTb->aRanges[0].u2Unused = 0;
1640 pTb->aRanges[0].idxPhysPage = 0;
1641 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1642 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1643
1644 return pTb;
1645 }
1646 RTMemFree(pTb->Thrd.paCalls);
1647 }
1648 RTMemFree(pTb);
1649 }
1650 RT_NOREF(pVM);
1651 return NULL;
1652}
1653
1654
1655/**
1656 * Called on the TB that are dedicated for recompilation before it's reused.
1657 *
1658 * @param pVCpu The cross context virtual CPU structure of the calling
1659 * thread.
1660 * @param pTb The translation block to reuse.
1661 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1662 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1663 */
1664static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1665{
1666 pTb->GCPhysPc = GCPhysPc;
1667 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1668 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1669 pTb->Thrd.cCalls = 0;
1670 pTb->cbOpcodes = 0;
1671 pTb->cInstructions = 0;
1672 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1673
1674 /* Init the first opcode range. */
1675 pTb->cRanges = 1;
1676 pTb->aRanges[0].cbOpcodes = 0;
1677 pTb->aRanges[0].offOpcodes = 0;
1678 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1679 pTb->aRanges[0].u2Unused = 0;
1680 pTb->aRanges[0].idxPhysPage = 0;
1681 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1682 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1683}
1684
1685
1686/**
1687 * Used to duplicate a threded translation block after recompilation is done.
1688 *
1689 * @returns Pointer to the translation block on success, NULL on failure.
1690 * @param pVM The cross context virtual machine structure.
1691 * @param pVCpu The cross context virtual CPU structure of the calling
1692 * thread.
1693 * @param pTbSrc The TB to duplicate.
1694 */
1695static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1696{
1697 /*
1698 * Just using the heap for now. Will make this more efficient and
1699 * complicated later, don't worry. :-)
1700 */
1701 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1702 if (pTb)
1703 {
1704 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1705 memcpy(pTb, pTbSrc, sizeof(*pTb));
1706 pTb->idxAllocChunk = idxAllocChunk;
1707
1708 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1709 Assert(cCalls > 0);
1710 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1711 if (pTb->Thrd.paCalls)
1712 {
1713 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1714 Assert(cbTbLookup > 0);
1715 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1716 Assert(cbOpcodes > 0);
1717 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1718 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1719 if (pbBoth)
1720 {
1721 RT_BZERO(pbBoth, cbTbLookup);
1722 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1723 pTb->Thrd.cAllocated = cCalls;
1724 pTb->pNext = NULL;
1725 pTb->cUsed = 0;
1726 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1727 pTb->fFlags = pTbSrc->fFlags;
1728
1729 return pTb;
1730 }
1731 RTMemFree(pTb->Thrd.paCalls);
1732 }
1733 iemTbAllocatorFree(pVCpu, pTb);
1734 }
1735 RT_NOREF(pVM);
1736 return NULL;
1737
1738}
1739
1740
1741/**
1742 * Adds the given TB to the hash table.
1743 *
1744 * @param pVCpu The cross context virtual CPU structure of the calling
1745 * thread.
1746 * @param pTbCache The cache to add it to.
1747 * @param pTb The translation block to add.
1748 */
1749static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1750{
1751 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1752
1753 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1754 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1755 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1756 if (LogIs12Enabled())
1757 {
1758 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1759 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1760 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1761 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1762 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1763 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1764 pTb->aRanges[idxRange].idxPhysPage == 0
1765 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1766 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1767 }
1768}
1769
1770
1771/**
1772 * Called by opcode verifier functions when they detect a problem.
1773 */
1774void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1775{
1776 /* We cannot free the current TB (indicated by fSafeToFree) because:
1777 - A threaded TB will have its current call entry accessed
1778 to update pVCpu->iem.s.cInstructions.
1779 - A native TB will have code left to execute. */
1780 if (fSafeToFree)
1781 iemTbAllocatorFree(pVCpu, pTb);
1782 else
1783 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1784}
1785
1786
1787/*
1788 * Real code.
1789 */
1790
1791#ifdef LOG_ENABLED
1792/**
1793 * Logs the current instruction.
1794 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1795 * @param pszFunction The IEM function doing the execution.
1796 * @param idxInstr The instruction number in the block.
1797 */
1798static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1799{
1800# ifdef IN_RING3
1801 if (LogIs2Enabled())
1802 {
1803 char szInstr[256];
1804 uint32_t cbInstr = 0;
1805 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1806 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1807 szInstr, sizeof(szInstr), &cbInstr);
1808
1809 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1810 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1811 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1812 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1813 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1814 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1815 " %s\n"
1816 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1817 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1818 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1819 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1820 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1821 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1822 szInstr));
1823
1824 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1825 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1826 }
1827 else
1828# endif
1829 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1830 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1831}
1832#endif /* LOG_ENABLED */
1833
1834
1835#if 0
1836static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1837{
1838 RT_NOREF(pVM, pVCpu);
1839 return rcStrict;
1840}
1841#endif
1842
1843
1844/**
1845 * Initializes the decoder state when compiling TBs.
1846 *
1847 * This presumes that fExec has already be initialized.
1848 *
1849 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1850 * to apply fixes to them as well.
1851 *
1852 * @param pVCpu The cross context virtual CPU structure of the calling
1853 * thread.
1854 * @param fReInit Clear for the first call for a TB, set for subsequent
1855 * calls from inside the compile loop where we can skip a
1856 * couple of things.
1857 * @param fExtraFlags The extra translation block flags when @a fReInit is
1858 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1859 * checked.
1860 */
1861DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1862{
1863 /* ASSUMES: That iemInitExec was already called and that anyone changing
1864 CPU state affecting the fExec bits since then will have updated fExec! */
1865 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1866 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1867
1868 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1869
1870 /* Decoder state: */
1871 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1872 pVCpu->iem.s.enmEffAddrMode = enmMode;
1873 if (enmMode != IEMMODE_64BIT)
1874 {
1875 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1876 pVCpu->iem.s.enmEffOpSize = enmMode;
1877 }
1878 else
1879 {
1880 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1881 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1882 }
1883 pVCpu->iem.s.fPrefixes = 0;
1884 pVCpu->iem.s.uRexReg = 0;
1885 pVCpu->iem.s.uRexB = 0;
1886 pVCpu->iem.s.uRexIndex = 0;
1887 pVCpu->iem.s.idxPrefix = 0;
1888 pVCpu->iem.s.uVex3rdReg = 0;
1889 pVCpu->iem.s.uVexLength = 0;
1890 pVCpu->iem.s.fEvexStuff = 0;
1891 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1892 pVCpu->iem.s.offModRm = 0;
1893 pVCpu->iem.s.iNextMapping = 0;
1894
1895 if (!fReInit)
1896 {
1897 pVCpu->iem.s.cActiveMappings = 0;
1898 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1899 pVCpu->iem.s.fEndTb = false;
1900 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1901 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1902 pVCpu->iem.s.fTbCrossedPage = false;
1903 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1904 pVCpu->iem.s.fTbCurInstrIsSti = false;
1905 /* Force RF clearing and TF checking on first instruction in the block
1906 as we don't really know what came before and should assume the worst: */
1907 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1908 }
1909 else
1910 {
1911 Assert(pVCpu->iem.s.cActiveMappings == 0);
1912 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1913 Assert(pVCpu->iem.s.fEndTb == false);
1914 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1915 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1916 }
1917 pVCpu->iem.s.fTbCurInstr = 0;
1918
1919#ifdef DBGFTRACE_ENABLED
1920 switch (IEM_GET_CPU_MODE(pVCpu))
1921 {
1922 case IEMMODE_64BIT:
1923 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1924 break;
1925 case IEMMODE_32BIT:
1926 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1927 break;
1928 case IEMMODE_16BIT:
1929 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1930 break;
1931 }
1932#endif
1933}
1934
1935
1936/**
1937 * Initializes the opcode fetcher when starting the compilation.
1938 *
1939 * @param pVCpu The cross context virtual CPU structure of the calling
1940 * thread.
1941 */
1942DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1943{
1944 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1945#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1946 pVCpu->iem.s.offOpcode = 0;
1947#else
1948 RT_NOREF(pVCpu);
1949#endif
1950}
1951
1952
1953/**
1954 * Re-initializes the opcode fetcher between instructions while compiling.
1955 *
1956 * @param pVCpu The cross context virtual CPU structure of the calling
1957 * thread.
1958 */
1959DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1960{
1961 if (pVCpu->iem.s.pbInstrBuf)
1962 {
1963 uint64_t off = pVCpu->cpum.GstCtx.rip;
1964 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1965 off += pVCpu->cpum.GstCtx.cs.u64Base;
1966 off -= pVCpu->iem.s.uInstrBufPc;
1967 if (off < pVCpu->iem.s.cbInstrBufTotal)
1968 {
1969 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1970 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1971 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1972 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1973 else
1974 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1975 }
1976 else
1977 {
1978 pVCpu->iem.s.pbInstrBuf = NULL;
1979 pVCpu->iem.s.offInstrNextByte = 0;
1980 pVCpu->iem.s.offCurInstrStart = 0;
1981 pVCpu->iem.s.cbInstrBuf = 0;
1982 pVCpu->iem.s.cbInstrBufTotal = 0;
1983 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1984 }
1985 }
1986 else
1987 {
1988 pVCpu->iem.s.offInstrNextByte = 0;
1989 pVCpu->iem.s.offCurInstrStart = 0;
1990 pVCpu->iem.s.cbInstrBuf = 0;
1991 pVCpu->iem.s.cbInstrBufTotal = 0;
1992#ifdef VBOX_STRICT
1993 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1994#endif
1995 }
1996#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1997 pVCpu->iem.s.offOpcode = 0;
1998#endif
1999}
2000
2001#ifdef LOG_ENABLED
2002
2003/**
2004 * Inserts a NOP call.
2005 *
2006 * This is for debugging.
2007 *
2008 * @returns true on success, false if we're out of call entries.
2009 * @param pTb The translation block being compiled.
2010 */
2011bool iemThreadedCompileEmitNop(PIEMTB pTb)
2012{
2013 /* Emit the call. */
2014 uint32_t const idxCall = pTb->Thrd.cCalls;
2015 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2016 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2017 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2018 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2019 pCall->idxInstr = pTb->cInstructions - 1;
2020 pCall->cbOpcode = 0;
2021 pCall->offOpcode = 0;
2022 pCall->uTbLookup = 0;
2023 pCall->uUnused0 = 0;
2024 pCall->auParams[0] = 0;
2025 pCall->auParams[1] = 0;
2026 pCall->auParams[2] = 0;
2027 return true;
2028}
2029
2030
2031/**
2032 * Called by iemThreadedCompile if cpu state logging is desired.
2033 *
2034 * @returns true on success, false if we're out of call entries.
2035 * @param pTb The translation block being compiled.
2036 */
2037bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2038{
2039 /* Emit the call. */
2040 uint32_t const idxCall = pTb->Thrd.cCalls;
2041 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2042 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2043 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2044 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2045 pCall->idxInstr = pTb->cInstructions - 1;
2046 pCall->cbOpcode = 0;
2047 pCall->offOpcode = 0;
2048 pCall->uTbLookup = 0;
2049 pCall->uUnused0 = 0;
2050 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2051 pCall->auParams[1] = 0;
2052 pCall->auParams[2] = 0;
2053 return true;
2054}
2055
2056#endif /* LOG_ENABLED */
2057
2058DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2059{
2060 switch (cbInstr)
2061 {
2062 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2063 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2064 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2065 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2066 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2067 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2068 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2069 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2070 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2071 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2072 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2073 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2074 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2075 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2076 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2077 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2078 }
2079}
2080
2081
2082/**
2083 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2084 *
2085 * - CS LIM check required.
2086 * - Must recheck opcode bytes.
2087 * - Previous instruction branched.
2088 * - TLB load detected, probably due to page crossing.
2089 *
2090 * @returns true if everything went well, false if we're out of space in the TB
2091 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2092 * @param pVCpu The cross context virtual CPU structure of the calling
2093 * thread.
2094 * @param pTb The translation block being compiled.
2095 */
2096bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2097{
2098 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2099 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2100#if 0
2101 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2102 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2103#endif
2104
2105 /*
2106 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2107 * see if it's needed to start checking.
2108 */
2109 bool fConsiderCsLimChecking;
2110 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2111 if ( fMode == IEM_F_MODE_X86_64BIT
2112 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2113 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2114 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2115 fConsiderCsLimChecking = false; /* already enabled or not needed */
2116 else
2117 {
2118 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2119 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2120 fConsiderCsLimChecking = true; /* likely */
2121 else
2122 {
2123 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2124 return false;
2125 }
2126 }
2127
2128 /*
2129 * Prepare call now, even before we know if can accept the instruction in this TB.
2130 * This allows us amending parameters w/o making every case suffer.
2131 */
2132 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2133 uint16_t const offOpcode = pTb->cbOpcodes;
2134 uint8_t idxRange = pTb->cRanges - 1;
2135
2136 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2137 pCall->idxInstr = pTb->cInstructions;
2138 pCall->cbOpcode = cbInstr;
2139 pCall->offOpcode = offOpcode;
2140 pCall->uTbLookup = 0;
2141 pCall->uUnused0 = 0;
2142 pCall->auParams[0] = (uint32_t)cbInstr
2143 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2144 /* The upper dword is sometimes used for cbStartPage. */;
2145 pCall->auParams[1] = idxRange;
2146 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2147
2148/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2149 * gotten onto. If we do, stop */
2150
2151 /*
2152 * Case 1: We've branched (RIP changed).
2153 *
2154 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2155 * TB, end the TB here as it is most likely a loop and if it
2156 * made sense to unroll it, the guest code compiler should've
2157 * done it already.
2158 *
2159 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2160 * Req: 1 extra range, no extra phys.
2161 *
2162 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2163 * necessary (fTbCrossedPage is true).
2164 * Req: 1 extra range, probably 1 extra phys page entry.
2165 *
2166 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2167 * but in addition we cross into the following page and require
2168 * another TLB load.
2169 * Req: 2 extra ranges, probably 2 extra phys page entries.
2170 *
2171 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2172 * the following page (thus fTbCrossedPage is true).
2173 * Req: 2 extra ranges, probably 1 extra phys page entry.
2174 *
2175 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2176 * it may trigger "spuriously" from the CPU point of view because of
2177 * physical page changes that'll invalid the physical TLB and trigger a
2178 * call to the function. In theory this be a big deal, just a bit
2179 * performance loss as we'll pick the LoadingTlb variants.
2180 *
2181 * Note! We do not currently optimize branching to the next instruction (sorry
2182 * 32-bit PIC code). We could maybe do that in the branching code that
2183 * sets (or not) fTbBranched.
2184 */
2185 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2186 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2187 * code. This'll require filtering out far jmps and calls, as they
2188 * load CS which should technically be considered indirect since the
2189 * GDT/LDT entry's base address can be modified independently from
2190 * the code. */
2191 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2192 {
2193 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2194 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2195 {
2196 /* 1a + 1b - instruction fully within the branched to page. */
2197 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2198 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2199
2200 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2201 {
2202 /* Check that we've got a free range. */
2203 idxRange += 1;
2204 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2205 { /* likely */ }
2206 else
2207 {
2208 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2209 return false;
2210 }
2211 pCall->auParams[1] = idxRange;
2212 pCall->auParams[2] = 0;
2213
2214 /* Check that we've got a free page slot. */
2215 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2216 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2217 uint8_t idxPhysPage;
2218 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2219 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2220 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2221 {
2222 pTb->aGCPhysPages[0] = GCPhysNew;
2223 pTb->aRanges[idxRange].idxPhysPage = 1;
2224 idxPhysPage = UINT8_MAX;
2225 }
2226 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2227 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2228 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2229 {
2230 pTb->aGCPhysPages[1] = GCPhysNew;
2231 pTb->aRanges[idxRange].idxPhysPage = 2;
2232 idxPhysPage = UINT8_MAX;
2233 }
2234 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2235 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2236 else
2237 {
2238 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2239 return false;
2240 }
2241
2242 /* Loop check: We weave the loop check in here to optimize the lookup. */
2243 if (idxPhysPage != UINT8_MAX)
2244 {
2245 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2246 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2247 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2248 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2249 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2250 {
2251 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2252 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2253 return false;
2254 }
2255 }
2256
2257 /* Finish setting up the new range. */
2258 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2259 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2260 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2261 pTb->aRanges[idxRange].u2Unused = 0;
2262 pTb->cRanges++;
2263 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2264 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2265 pTb->aRanges[idxRange].offOpcodes));
2266 }
2267 else
2268 {
2269 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2270 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2271 }
2272
2273 /* Determin which function we need to load & check.
2274 Note! For jumps to a new page, we'll set both fTbBranched and
2275 fTbCrossedPage to avoid unnecessary TLB work for intra
2276 page branching */
2277 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2278 || pVCpu->iem.s.fTbCrossedPage)
2279 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2280 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2281 : !fConsiderCsLimChecking
2282 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2283 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2284 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2285 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2286 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2287 : !fConsiderCsLimChecking
2288 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2289 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2290 else
2291 {
2292 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2293 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2294 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2295 : !fConsiderCsLimChecking
2296 ? kIemThreadedFunc_BltIn_CheckOpcodes
2297 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2298 }
2299 }
2300 else
2301 {
2302 /* 1c + 1d - instruction crosses pages. */
2303 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2304 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2305
2306 /* Lazy bird: Check that this isn't case 1c, since we've already
2307 load the first physical address. End the TB and
2308 make it a case 2b instead.
2309
2310 Hmm. Too much bother to detect, so just do the same
2311 with case 1d as well. */
2312#if 0 /** @todo get back to this later when we've got the actual branch code in
2313 * place. */
2314 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2315
2316 /* Check that we've got two free ranges. */
2317 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2318 { /* likely */ }
2319 else
2320 return false;
2321 idxRange += 1;
2322 pCall->auParams[1] = idxRange;
2323 pCall->auParams[2] = 0;
2324
2325 /* ... */
2326
2327#else
2328 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2329 return false;
2330#endif
2331 }
2332 }
2333
2334 /*
2335 * Case 2: Page crossing.
2336 *
2337 * Sub-case 2a: The instruction starts on the first byte in the next page.
2338 *
2339 * Sub-case 2b: The instruction has opcode bytes in both the current and
2340 * following page.
2341 *
2342 * Both cases requires a new range table entry and probably a new physical
2343 * page entry. The difference is in which functions to emit and whether to
2344 * add bytes to the current range.
2345 */
2346 else if (pVCpu->iem.s.fTbCrossedPage)
2347 {
2348 /* Check that we've got a free range. */
2349 idxRange += 1;
2350 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2351 { /* likely */ }
2352 else
2353 {
2354 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2355 return false;
2356 }
2357
2358 /* Check that we've got a free page slot. */
2359 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2360 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2361 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2362 pTb->aRanges[idxRange].idxPhysPage = 0;
2363 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2364 || pTb->aGCPhysPages[0] == GCPhysNew)
2365 {
2366 pTb->aGCPhysPages[0] = GCPhysNew;
2367 pTb->aRanges[idxRange].idxPhysPage = 1;
2368 }
2369 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2370 || pTb->aGCPhysPages[1] == GCPhysNew)
2371 {
2372 pTb->aGCPhysPages[1] = GCPhysNew;
2373 pTb->aRanges[idxRange].idxPhysPage = 2;
2374 }
2375 else
2376 {
2377 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2378 return false;
2379 }
2380
2381 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2382 {
2383 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2384 pCall->auParams[1] = idxRange;
2385 pCall->auParams[2] = 0;
2386
2387 /* Finish setting up the new range. */
2388 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2389 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2390 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2391 pTb->aRanges[idxRange].u2Unused = 0;
2392 pTb->cRanges++;
2393 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2394 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2395 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2396
2397 /* Determin which function we need to load & check. */
2398 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2399 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2400 : !fConsiderCsLimChecking
2401 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2402 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2403 }
2404 else
2405 {
2406 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2407 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2408 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2409 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2410
2411 /* We've good. Split the instruction over the old and new range table entries. */
2412 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2413
2414 pTb->aRanges[idxRange].offPhysPage = 0;
2415 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2416 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2417 pTb->aRanges[idxRange].u2Unused = 0;
2418 pTb->cRanges++;
2419 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2420 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2421 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2422
2423 /* Determin which function we need to load & check. */
2424 if (pVCpu->iem.s.fTbCheckOpcodes)
2425 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2426 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2427 : !fConsiderCsLimChecking
2428 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2429 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2430 else
2431 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2432 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2433 : !fConsiderCsLimChecking
2434 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2435 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2436 }
2437 }
2438
2439 /*
2440 * Regular case: No new range required.
2441 */
2442 else
2443 {
2444 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2445 if (pVCpu->iem.s.fTbCheckOpcodes)
2446 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2447 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2448 : kIemThreadedFunc_BltIn_CheckOpcodes;
2449 else
2450 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2451
2452 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2453 pTb->cbOpcodes = offOpcode + cbInstr;
2454 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2455 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2456 }
2457
2458 /*
2459 * Commit the call.
2460 */
2461 pTb->Thrd.cCalls++;
2462
2463 /*
2464 * Clear state.
2465 */
2466 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2467 pVCpu->iem.s.fTbCrossedPage = false;
2468 pVCpu->iem.s.fTbCheckOpcodes = false;
2469
2470 /*
2471 * Copy opcode bytes.
2472 */
2473 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2474 pTb->cbOpcodes = offOpcode + cbInstr;
2475 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2476
2477 return true;
2478}
2479
2480
2481/**
2482 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2483 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2484 *
2485 * @returns true if anything is pending, false if not.
2486 * @param pVCpu The cross context virtual CPU structure of the calling
2487 * thread.
2488 */
2489DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2490{
2491 uint64_t fCpu = pVCpu->fLocalForcedActions;
2492 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2493#if 1
2494 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2495 if (RT_LIKELY( !fCpu
2496 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2497 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2498 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2499 return false;
2500 return true;
2501#else
2502 return false;
2503#endif
2504
2505}
2506
2507
2508/**
2509 * Called by iemThreadedCompile when a block requires a mode check.
2510 *
2511 * @returns true if we should continue, false if we're out of call entries.
2512 * @param pVCpu The cross context virtual CPU structure of the calling
2513 * thread.
2514 * @param pTb The translation block being compiled.
2515 */
2516static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2517{
2518 /* Emit the call. */
2519 uint32_t const idxCall = pTb->Thrd.cCalls;
2520 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2521 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2522 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2523 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2524 pCall->idxInstr = pTb->cInstructions - 1;
2525 pCall->cbOpcode = 0;
2526 pCall->offOpcode = 0;
2527 pCall->uTbLookup = 0;
2528 pCall->uUnused0 = 0;
2529 pCall->auParams[0] = pVCpu->iem.s.fExec;
2530 pCall->auParams[1] = 0;
2531 pCall->auParams[2] = 0;
2532 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2533 return true;
2534}
2535
2536
2537/**
2538 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2539 * set.
2540 *
2541 * @returns true if we should continue, false if an IRQ is deliverable or a
2542 * relevant force flag is pending.
2543 * @param pVCpu The cross context virtual CPU structure of the calling
2544 * thread.
2545 * @param pTb The translation block being compiled.
2546 * @sa iemThreadedCompileCheckIrq
2547 */
2548bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2549{
2550 /*
2551 * Skip this we've already emitted a call after the previous instruction
2552 * or if it's the first call, as we're always checking FFs between blocks.
2553 */
2554 uint32_t const idxCall = pTb->Thrd.cCalls;
2555 if ( idxCall > 0
2556 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2557 {
2558 /* Emit the call. */
2559 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2560 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2561 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2562 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2563 pCall->idxInstr = pTb->cInstructions;
2564 pCall->offOpcode = 0;
2565 pCall->cbOpcode = 0;
2566 pCall->uTbLookup = 0;
2567 pCall->uUnused0 = 0;
2568 pCall->auParams[0] = 0;
2569 pCall->auParams[1] = 0;
2570 pCall->auParams[2] = 0;
2571 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2572
2573 /* Reset the IRQ check value. */
2574 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2575
2576 /*
2577 * Check for deliverable IRQs and pending force flags.
2578 */
2579 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2580 }
2581 return true; /* continue */
2582}
2583
2584
2585/**
2586 * Emits an IRQ check call and checks for pending IRQs.
2587 *
2588 * @returns true if we should continue, false if an IRQ is deliverable or a
2589 * relevant force flag is pending.
2590 * @param pVCpu The cross context virtual CPU structure of the calling
2591 * thread.
2592 * @param pTb The transation block.
2593 * @sa iemThreadedCompileBeginEmitCallsComplications
2594 */
2595static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2596{
2597 /* Check again in a little bit, unless it is immediately following an STI
2598 in which case we *must* check immediately after the next instruction
2599 as well in case it's executed with interrupt inhibition. We could
2600 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2601 bs3-timers-1 which is doing sti + sti + cli. */
2602 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2603 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2604 else
2605 {
2606 pVCpu->iem.s.fTbCurInstrIsSti = false;
2607 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2608 }
2609 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2610
2611 /*
2612 * Emit the call.
2613 */
2614 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2615 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2616 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2617 pCall->idxInstr = pTb->cInstructions;
2618 pCall->offOpcode = 0;
2619 pCall->cbOpcode = 0;
2620 pCall->uTbLookup = 0;
2621 pCall->uUnused0 = 0;
2622 pCall->auParams[0] = 0;
2623 pCall->auParams[1] = 0;
2624 pCall->auParams[2] = 0;
2625
2626 /*
2627 * Check for deliverable IRQs and pending force flags.
2628 */
2629 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2630}
2631
2632
2633/**
2634 * Compiles a new TB and executes it.
2635 *
2636 * We combine compilation and execution here as it makes it simpler code flow
2637 * in the main loop and it allows interpreting while compiling if we want to
2638 * explore that option.
2639 *
2640 * @returns Strict VBox status code.
2641 * @param pVM The cross context virtual machine structure.
2642 * @param pVCpu The cross context virtual CPU structure of the calling
2643 * thread.
2644 * @param GCPhysPc The physical address corresponding to the current
2645 * RIP+CS.BASE.
2646 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2647 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2648 */
2649static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2650{
2651 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2652 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2653
2654 /*
2655 * Get the TB we use for the recompiling. This is a maxed-out TB so
2656 * that'll we'll make a more efficient copy of when we're done compiling.
2657 */
2658 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2659 if (pTb)
2660 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2661 else
2662 {
2663 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2664 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2665 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2666 }
2667
2668 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2669 functions may get at it. */
2670 pVCpu->iem.s.pCurTbR3 = pTb;
2671
2672#if 0
2673 /* Make sure the CheckIrq condition matches the one in EM. */
2674 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2675 const uint32_t cZeroCalls = 1;
2676#else
2677 const uint32_t cZeroCalls = 0;
2678#endif
2679
2680 /*
2681 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2682 */
2683 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2684 iemThreadedCompileInitOpcodeFetching(pVCpu);
2685 VBOXSTRICTRC rcStrict;
2686 for (;;)
2687 {
2688 /* Process the next instruction. */
2689#ifdef LOG_ENABLED
2690 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2691 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2692 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2693 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2694#endif
2695 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2696 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2697
2698 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2699#if 0
2700 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2701 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2702 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2703#endif
2704 if ( rcStrict == VINF_SUCCESS
2705 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2706 && !pVCpu->iem.s.fEndTb)
2707 {
2708 Assert(pTb->Thrd.cCalls > cCallsPrev);
2709 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2710
2711 pVCpu->iem.s.cInstructions++;
2712
2713 /* Check for mode change _after_ certain CIMPL calls, so check that
2714 we continue executing with the same mode value. */
2715 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2716 { /* probable */ }
2717 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2718 { /* extremely likely */ }
2719 else
2720 break;
2721
2722#if defined(LOG_ENABLED) && 0 /* for debugging */
2723 //iemThreadedCompileEmitNop(pTb);
2724 iemThreadedCompileEmitLogCpuState(pTb);
2725#endif
2726 }
2727 else
2728 {
2729 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2730 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2731 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2732 rcStrict = VINF_SUCCESS;
2733
2734 if (pTb->Thrd.cCalls > cZeroCalls)
2735 {
2736 if (cCallsPrev != pTb->Thrd.cCalls)
2737 pVCpu->iem.s.cInstructions++;
2738 break;
2739 }
2740
2741 pVCpu->iem.s.pCurTbR3 = NULL;
2742 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2743 }
2744
2745 /* Check for IRQs? */
2746 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2747 pVCpu->iem.s.cInstrTillIrqCheck--;
2748 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2749 break;
2750
2751 /* Still space in the TB? */
2752 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2753 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2754 && pTb->cTbLookupEntries < 127)
2755 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2756 else
2757 {
2758 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2759 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2760 break;
2761 }
2762 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2763 }
2764
2765 /*
2766 * Reserve lookup space for the final call entry if necessary.
2767 */
2768 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2769 if (pTb->Thrd.cCalls > 1)
2770 {
2771 if (pFinalCall->uTbLookup == 0)
2772 {
2773 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2774 pTb->cTbLookupEntries += 1;
2775 }
2776 }
2777 else if (pFinalCall->uTbLookup != 0)
2778 {
2779 Assert(pTb->cTbLookupEntries > 1);
2780 pFinalCall->uTbLookup -= 1;
2781 pTb->cTbLookupEntries -= 1;
2782 }
2783
2784 /*
2785 * Duplicate the TB into a completed one and link it.
2786 */
2787 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2788 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2789
2790 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2791
2792#ifdef IEM_COMPILE_ONLY_MODE
2793 /*
2794 * Execute the translation block.
2795 */
2796#endif
2797
2798 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2799}
2800
2801
2802
2803/*********************************************************************************************************************************
2804* Recompiled Execution Core *
2805*********************************************************************************************************************************/
2806
2807/** Helper for iemTbExec. */
2808DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
2809{
2810 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
2811 Assert(idx < pTb->cTbLookupEntries);
2812 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
2813}
2814
2815
2816/**
2817 * Executes a translation block.
2818 *
2819 * @returns Strict VBox status code.
2820 * @param pVCpu The cross context virtual CPU structure of the calling
2821 * thread.
2822 * @param pTb The translation block to execute.
2823 */
2824static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2825{
2826 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2827
2828 /*
2829 * Set the current TB so CIMPL functions may get at it.
2830 */
2831 pVCpu->iem.s.pCurTbR3 = pTb;
2832 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
2833
2834 /*
2835 * Execute the block.
2836 */
2837#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2838 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2839 {
2840 pVCpu->iem.s.cTbExecNative++;
2841# ifdef LOG_ENABLED
2842 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2843# endif
2844
2845# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
2846# ifdef RT_ARCH_AMD64
2847 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2848# else
2849 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2850# endif
2851# else
2852# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2853 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
2854# endif
2855# ifdef RT_ARCH_AMD64
2856 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
2857# else
2858 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
2859# endif
2860# endif
2861
2862# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2863 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2864# endif
2865# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2866 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2867# endif
2868 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2869 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2870 { /* likely */ }
2871 else
2872 {
2873 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2874 pVCpu->iem.s.pCurTbR3 = NULL;
2875
2876 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2877 only to break out of TB execution early. */
2878 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2879 {
2880 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
2881 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2882 }
2883
2884 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
2885 only to break out of TB execution early due to pending FFs. */
2886 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
2887 {
2888 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
2889 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2890 }
2891
2892 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2893 and converted to VINF_SUCCESS or whatever is appropriate. */
2894 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2895 {
2896 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
2897 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2898 }
2899
2900 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
2901 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2902 }
2903 }
2904 else
2905#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2906 {
2907 /*
2908 * The threaded execution loop.
2909 */
2910 pVCpu->iem.s.cTbExecThreaded++;
2911#ifdef LOG_ENABLED
2912 uint64_t uRipPrev = UINT64_MAX;
2913#endif
2914 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2915 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2916 while (cCallsLeft-- > 0)
2917 {
2918#ifdef LOG_ENABLED
2919 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2920 {
2921 uRipPrev = pVCpu->cpum.GstCtx.rip;
2922 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2923 }
2924 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2925 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2926 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2927#endif
2928#ifdef VBOX_WITH_STATISTICS
2929 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2930 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2931#endif
2932 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2933 pCallEntry->auParams[0],
2934 pCallEntry->auParams[1],
2935 pCallEntry->auParams[2]);
2936 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2937 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2938 pCallEntry++;
2939 else
2940 {
2941 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2942 pVCpu->iem.s.pCurTbR3 = NULL;
2943 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
2944 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
2945
2946 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2947 only to break out of TB execution early. */
2948 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2949 {
2950#ifdef VBOX_WITH_STATISTICS
2951 if (pCallEntry->uTbLookup)
2952 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
2953 else
2954 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
2955#endif
2956 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2957 }
2958 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2959 }
2960 }
2961
2962 /* Update the lookup entry. */
2963 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
2964 }
2965
2966 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2967 pVCpu->iem.s.pCurTbR3 = NULL;
2968 return VINF_SUCCESS;
2969}
2970
2971
2972/**
2973 * This is called when the PC doesn't match the current pbInstrBuf.
2974 *
2975 * Upon return, we're ready for opcode fetching. But please note that
2976 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2977 * MMIO or unassigned).
2978 */
2979static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2980{
2981 pVCpu->iem.s.pbInstrBuf = NULL;
2982 pVCpu->iem.s.offCurInstrStart = 0;
2983 pVCpu->iem.s.offInstrNextByte = 0;
2984 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2985 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2986}
2987
2988
2989/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2990DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2991{
2992 /*
2993 * Set uCurTbStartPc to RIP and calc the effective PC.
2994 */
2995 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2996 pVCpu->iem.s.uCurTbStartPc = uPc;
2997 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2998 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2999
3000 /*
3001 * Advance within the current buffer (PAGE) when possible.
3002 */
3003 if (pVCpu->iem.s.pbInstrBuf)
3004 {
3005 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3006 if (off < pVCpu->iem.s.cbInstrBufTotal)
3007 {
3008 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3009 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3010 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3011 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3012 else
3013 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3014
3015 return pVCpu->iem.s.GCPhysInstrBuf + off;
3016 }
3017 }
3018 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3019}
3020
3021
3022/**
3023 * Determines the extra IEMTB_F_XXX flags.
3024 *
3025 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3026 * IEMTB_F_CS_LIM_CHECKS (or zero).
3027 * @param pVCpu The cross context virtual CPU structure of the calling
3028 * thread.
3029 */
3030DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3031{
3032 uint32_t fRet = 0;
3033
3034 /*
3035 * Determine the inhibit bits.
3036 */
3037 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
3038 { /* typical */ }
3039 else
3040 {
3041 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3042 fRet |= IEMTB_F_INHIBIT_SHADOW;
3043 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3044 fRet |= IEMTB_F_INHIBIT_NMI;
3045 }
3046
3047 /*
3048 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3049 * likely to go invalid before the end of the translation block.
3050 */
3051 if (IEM_IS_64BIT_CODE(pVCpu))
3052 return fRet;
3053
3054 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3055 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3056 return fRet;
3057 return fRet | IEMTB_F_CS_LIM_CHECKS;
3058}
3059
3060
3061VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
3062{
3063 /*
3064 * See if there is an interrupt pending in TRPM, inject it if we can.
3065 */
3066 if (!TRPMHasTrap(pVCpu))
3067 { /* likely */ }
3068 else
3069 {
3070 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3071 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3072 { /*likely */ }
3073 else
3074 return rcStrict;
3075 }
3076
3077 /*
3078 * Init the execution environment.
3079 */
3080#if 1 /** @todo this seems like a good idea, however if we ever share memory
3081 * directly with other threads on the host, it isn't necessarily... */
3082 if (pVM->cCpus == 1)
3083 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3084 else
3085#endif
3086 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3087 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
3088 { }
3089 else
3090 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
3091 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3092
3093 /*
3094 * Run-loop.
3095 *
3096 * If we're using setjmp/longjmp we combine all the catching here to avoid
3097 * having to call setjmp for each block we're executing.
3098 */
3099 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3100 for (;;)
3101 {
3102 VBOXSTRICTRC rcStrict;
3103 IEM_TRY_SETJMP(pVCpu, rcStrict)
3104 {
3105 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
3106 for (uint32_t iIterations = 0; ; iIterations++)
3107 {
3108 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3109 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3110 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3111 {
3112 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3113 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3114 if (pTb)
3115 rcStrict = iemTbExec(pVCpu, pTb);
3116 else
3117 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3118 }
3119 else
3120 {
3121 /* This can only happen if the current PC cannot be translated into a
3122 host pointer, which means we're in MMIO or unmapped memory... */
3123#if defined(VBOX_STRICT) && defined(IN_RING3)
3124 rcStrict = DBGFSTOP(pVM);
3125 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3126 return rcStrict;
3127#endif
3128 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
3129 }
3130 if (rcStrict == VINF_SUCCESS)
3131 {
3132 Assert(pVCpu->iem.s.cActiveMappings == 0);
3133
3134 uint64_t fCpu = pVCpu->fLocalForcedActions;
3135 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3136 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3137 | VMCPU_FF_TLB_FLUSH
3138 | VMCPU_FF_UNHALT );
3139 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3140 if (RT_LIKELY( ( !fCpu
3141 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3142 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3143 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3144 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3145 {
3146 if (RT_LIKELY( (iIterations & cPollRate) != 0
3147 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
3148 { /* likely */ }
3149 else
3150 return VINF_SUCCESS;
3151 }
3152 else
3153 return VINF_SUCCESS;
3154 }
3155 else
3156 return rcStrict;
3157 }
3158 }
3159 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3160 {
3161 pVCpu->iem.s.cLongJumps++;
3162#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3163 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3164#endif
3165 if (pVCpu->iem.s.cActiveMappings > 0)
3166 iemMemRollback(pVCpu);
3167
3168#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3169 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3170 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3171 {
3172 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3173# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3174 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3175 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3176# endif
3177 }
3178#endif
3179
3180#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3181 /* If pTb isn't NULL we're in iemTbExec. */
3182 if (!pTb)
3183 {
3184 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3185 pTb = pVCpu->iem.s.pCurTbR3;
3186 if (pTb)
3187 {
3188 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3189 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3190 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3191 }
3192 }
3193#endif
3194 pVCpu->iem.s.pCurTbR3 = NULL;
3195 return rcStrict;
3196 }
3197 IEM_CATCH_LONGJMP_END(pVCpu);
3198 }
3199}
3200
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use