VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

Last change on this file was 104420, checked in by vboxsync, 4 weeks ago

VMM/IEM: Add native emitters for the IEM_MC_REL_CALL_S16_AND_FINISH/IEM_MC_REL_CALL_S32_AND_FINISH/IEM_MC_REL_CALL_S64_AND_FINISH, IEM_MC_IND_CALL_U16_AND_FINISH/IEM_MC_IND_CALL_U32_AND_FINISH/IEM_MC_IND_CALL_U64_AND_FINISH and IEM_MC_RETN_AND_FINISH IEM MC statements, bugref:10376

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 477.6 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104420 2024-04-24 14:34:11Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** The VINF_SUCCESS dummy. */
348template<int const a_rcNormal>
349DECL_FORCE_INLINE(uint32_t)
350iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
351{
352 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
353 if (a_rcNormal != VINF_SUCCESS)
354 {
355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
357#else
358 RT_NOREF_PV(pCallEntry);
359#endif
360
361 /* As this code returns from the TB any pending register writes must be flushed. */
362 off = iemNativeRegFlushPendingWrites(pReNative, off);
363
364 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
365 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
366 Assert(idxTbLookupFirst < pReNative->pTbOrg->cTbLookupEntries);
367 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pReNative->pTbOrg, idxTbLookupFirst);
368 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
369 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
370 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
371
372 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
373 }
374 return off;
375}
376
377
378#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
379 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
380 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
381
382#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
383 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
384 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
385 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
386
387/** Same as iemRegAddToRip64AndFinishingNoFlags. */
388DECL_INLINE_THROW(uint32_t)
389iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
390{
391#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
392# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
393 if (!pReNative->Core.offPc)
394 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395# endif
396
397 /* Allocate a temporary PC register. */
398 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
399
400 /* Perform the addition and store the result. */
401 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
402 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
403
404 /* Free but don't flush the PC register. */
405 iemNativeRegFreeTmp(pReNative, idxPcReg);
406#endif
407
408#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
409 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
410
411 pReNative->Core.offPc += cbInstr;
412# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
413 off = iemNativePcAdjustCheck(pReNative, off);
414# endif
415 if (pReNative->cCondDepth)
416 off = iemNativeEmitPcWriteback(pReNative, off);
417 else
418 pReNative->Core.cInstrPcUpdateSkipped++;
419#endif
420
421 return off;
422}
423
424
425#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
426 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
427 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
428
429#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
430 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
431 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
432 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
433
434/** Same as iemRegAddToEip32AndFinishingNoFlags. */
435DECL_INLINE_THROW(uint32_t)
436iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
437{
438#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
439# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
440 if (!pReNative->Core.offPc)
441 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
442# endif
443
444 /* Allocate a temporary PC register. */
445 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
446
447 /* Perform the addition and store the result. */
448 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
449 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
450
451 /* Free but don't flush the PC register. */
452 iemNativeRegFreeTmp(pReNative, idxPcReg);
453#endif
454
455#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
456 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
457
458 pReNative->Core.offPc += cbInstr;
459# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
460 off = iemNativePcAdjustCheck(pReNative, off);
461# endif
462 if (pReNative->cCondDepth)
463 off = iemNativeEmitPcWriteback(pReNative, off);
464 else
465 pReNative->Core.cInstrPcUpdateSkipped++;
466#endif
467
468 return off;
469}
470
471
472#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
473 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
474 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
475
476#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
477 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
479 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
480
481/** Same as iemRegAddToIp16AndFinishingNoFlags. */
482DECL_INLINE_THROW(uint32_t)
483iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
484{
485#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
486# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
487 if (!pReNative->Core.offPc)
488 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
489# endif
490
491 /* Allocate a temporary PC register. */
492 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
493
494 /* Perform the addition and store the result. */
495 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
496 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
497 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
498
499 /* Free but don't flush the PC register. */
500 iemNativeRegFreeTmp(pReNative, idxPcReg);
501#endif
502
503#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
504 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
505
506 pReNative->Core.offPc += cbInstr;
507# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
508 off = iemNativePcAdjustCheck(pReNative, off);
509# endif
510 if (pReNative->cCondDepth)
511 off = iemNativeEmitPcWriteback(pReNative, off);
512 else
513 pReNative->Core.cInstrPcUpdateSkipped++;
514#endif
515
516 return off;
517}
518
519
520
521/*********************************************************************************************************************************
522* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
523*********************************************************************************************************************************/
524
525#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
526 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
527 (a_enmEffOpSize), pCallEntry->idxInstr); \
528 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
529
530#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
531 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
532 (a_enmEffOpSize), pCallEntry->idxInstr); \
533 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
534 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
535
536#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
537 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
538 IEMMODE_16BIT, pCallEntry->idxInstr); \
539 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
540
541#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
542 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
543 IEMMODE_16BIT, pCallEntry->idxInstr); \
544 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
545 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
546
547#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
548 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
549 IEMMODE_64BIT, pCallEntry->idxInstr); \
550 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
551
552#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
553 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
554 IEMMODE_64BIT, pCallEntry->idxInstr); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
557
558/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
559 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
560 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
561DECL_INLINE_THROW(uint32_t)
562iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
563 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
564{
565 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
566
567 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
568 off = iemNativeRegFlushPendingWrites(pReNative, off);
569
570#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
571 Assert(pReNative->Core.offPc == 0);
572
573 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
574#endif
575
576 /* Allocate a temporary PC register. */
577 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
578
579 /* Perform the addition. */
580 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
581
582 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
583 {
584 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
585 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
586 }
587 else
588 {
589 /* Just truncate the result to 16-bit IP. */
590 Assert(enmEffOpSize == IEMMODE_16BIT);
591 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
592 }
593 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
594
595 /* Free but don't flush the PC register. */
596 iemNativeRegFreeTmp(pReNative, idxPcReg);
597
598 return off;
599}
600
601
602#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
603 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
604 (a_enmEffOpSize), pCallEntry->idxInstr); \
605 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
606
607#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
608 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
609 (a_enmEffOpSize), pCallEntry->idxInstr); \
610 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
611 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
612
613#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
614 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
615 IEMMODE_16BIT, pCallEntry->idxInstr); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
617
618#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
620 IEMMODE_16BIT, pCallEntry->idxInstr); \
621 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
622 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
623
624#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
625 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
626 IEMMODE_32BIT, pCallEntry->idxInstr); \
627 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
628
629#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
630 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
631 IEMMODE_32BIT, pCallEntry->idxInstr); \
632 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
633 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
634
635/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
636 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
637 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
638DECL_INLINE_THROW(uint32_t)
639iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
640 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
641{
642 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
643
644 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
645 off = iemNativeRegFlushPendingWrites(pReNative, off);
646
647#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
648 Assert(pReNative->Core.offPc == 0);
649
650 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
651#endif
652
653 /* Allocate a temporary PC register. */
654 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
655
656 /* Perform the addition. */
657 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
658
659 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
660 if (enmEffOpSize == IEMMODE_16BIT)
661 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
662
663 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
664/** @todo we can skip this in 32-bit FLAT mode. */
665 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
666
667 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
668
669 /* Free but don't flush the PC register. */
670 iemNativeRegFreeTmp(pReNative, idxPcReg);
671
672 return off;
673}
674
675
676#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
677 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
678 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
679
680#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
681 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
682 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
683 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
684
685#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
686 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
687 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
688
689#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
690 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
691 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
692 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
693
694#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
695 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
697
698#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
699 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
700 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
701 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry)
702
703/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
704DECL_INLINE_THROW(uint32_t)
705iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
706 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
707{
708 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
709 off = iemNativeRegFlushPendingWrites(pReNative, off);
710
711#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
712 Assert(pReNative->Core.offPc == 0);
713
714 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
715#endif
716
717 /* Allocate a temporary PC register. */
718 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
719
720 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
721 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
722 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
723 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
724 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
725
726 /* Free but don't flush the PC register. */
727 iemNativeRegFreeTmp(pReNative, idxPcReg);
728
729 return off;
730}
731
732
733
734/*********************************************************************************************************************************
735* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
736*********************************************************************************************************************************/
737
738/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
739#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
740 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
741
742/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
743#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
744 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
745
746/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
747#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
748 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
749
750/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
751 * clears flags. */
752#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
753 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
754 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
755
756/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
757 * clears flags. */
758#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
759 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
760 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
761
762/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
763 * clears flags. */
764#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
765 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
766 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
767
768#undef IEM_MC_SET_RIP_U16_AND_FINISH
769
770
771/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
772#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
773 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
774
775/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
776#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
777 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
778
779/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
780 * clears flags. */
781#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
782 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
783 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
784
785/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
786 * and clears flags. */
787#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
788 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
789 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
790
791#undef IEM_MC_SET_RIP_U32_AND_FINISH
792
793
794/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
795#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
796 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
797
798/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
799 * and clears flags. */
800#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
801 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
802 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
803
804#undef IEM_MC_SET_RIP_U64_AND_FINISH
805
806
807/** Same as iemRegRipJumpU16AndFinishNoFlags,
808 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
809DECL_INLINE_THROW(uint32_t)
810iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
811 uint8_t idxInstr, uint8_t cbVar)
812{
813 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
814 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
815
816 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
817 off = iemNativeRegFlushPendingWrites(pReNative, off);
818
819#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
820 Assert(pReNative->Core.offPc == 0);
821
822 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
823#endif
824
825 /* Get a register with the new PC loaded from idxVarPc.
826 Note! This ASSUMES that the high bits of the GPR is zeroed. */
827 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
828
829 /* Check limit (may #GP(0) + exit TB). */
830 if (!f64Bit)
831/** @todo we can skip this test in FLAT 32-bit mode. */
832 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
833 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
834 else if (cbVar > sizeof(uint32_t))
835 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
836
837 /* Store the result. */
838 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
839
840 iemNativeVarRegisterRelease(pReNative, idxVarPc);
841 /** @todo implictly free the variable? */
842
843 return off;
844}
845
846
847
848/*********************************************************************************************************************************
849* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
850*********************************************************************************************************************************/
851
852/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
853 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
854DECL_FORCE_INLINE_THROW(uint32_t)
855iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
856{
857 /* Use16BitSp: */
858#ifdef RT_ARCH_AMD64
859 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
860 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
861#else
862 /* sub regeff, regrsp, #cbMem */
863 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
864 /* and regeff, regeff, #0xffff */
865 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
866 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
867 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
868 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
869#endif
870 return off;
871}
872
873
874DECL_FORCE_INLINE(uint32_t)
875iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
876{
877 /* Use32BitSp: */
878 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
879 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
880 return off;
881}
882
883
884DECL_INLINE_THROW(uint32_t)
885iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
886 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
887{
888 /*
889 * Assert sanity.
890 */
891#ifdef VBOX_STRICT
892 if (RT_BYTE2(cBitsVarAndFlat) != 0)
893 {
894 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
895 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
896 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
897 Assert( pfnFunction
898 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
899 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
900 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
901 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
902 : UINT64_C(0xc000b000a0009000) ));
903 }
904 else
905 Assert( pfnFunction
906 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
907 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
908 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
909 : UINT64_C(0xc000b000a0009000) ));
910#endif
911
912#ifdef VBOX_STRICT
913 /*
914 * Check that the fExec flags we've got make sense.
915 */
916 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
917#endif
918
919 /*
920 * To keep things simple we have to commit any pending writes first as we
921 * may end up making calls.
922 */
923 /** @todo we could postpone this till we make the call and reload the
924 * registers after returning from the call. Not sure if that's sensible or
925 * not, though. */
926 off = iemNativeRegFlushPendingWrites(pReNative, off);
927
928 /*
929 * First we calculate the new RSP and the effective stack pointer value.
930 * For 64-bit mode and flat 32-bit these two are the same.
931 * (Code structure is very similar to that of PUSH)
932 */
933 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
934 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
935 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
936 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
937 ? cbMem : sizeof(uint16_t);
938 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
939 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
940 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
941 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
942 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
943 if (cBitsFlat != 0)
944 {
945 Assert(idxRegEffSp == idxRegRsp);
946 Assert(cBitsFlat == 32 || cBitsFlat == 64);
947 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
948 if (cBitsFlat == 64)
949 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
950 else
951 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
952 }
953 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
954 {
955 Assert(idxRegEffSp != idxRegRsp);
956 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
957 kIemNativeGstRegUse_ReadOnly);
958#ifdef RT_ARCH_AMD64
959 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
960#else
961 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
962#endif
963 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
964 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
965 offFixupJumpToUseOtherBitSp = off;
966 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
967 {
968 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
969 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
970 }
971 else
972 {
973 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
974 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
975 }
976 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
977 }
978 /* SpUpdateEnd: */
979 uint32_t const offLabelSpUpdateEnd = off;
980
981 /*
982 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
983 * we're skipping lookup).
984 */
985 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
986 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
987 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
988 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
989 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
990 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
991 : UINT32_MAX;
992 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
993
994
995 if (!TlbState.fSkip)
996 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
997 else
998 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
999
1000 /*
1001 * Use16BitSp:
1002 */
1003 if (cBitsFlat == 0)
1004 {
1005#ifdef RT_ARCH_AMD64
1006 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1007#else
1008 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1009#endif
1010 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1011 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1012 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1013 else
1014 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1015 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1016 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1017 }
1018
1019 /*
1020 * TlbMiss:
1021 *
1022 * Call helper to do the pushing.
1023 */
1024 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1025
1026#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1027 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1028#else
1029 RT_NOREF(idxInstr);
1030#endif
1031
1032 /* Save variables in volatile registers. */
1033 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1034 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1035 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1036 | (RT_BIT_32(idxRegPc));
1037 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1038
1039 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1040 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1041 {
1042 /* Swap them using ARG0 as temp register: */
1043 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1044 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1045 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1046 }
1047 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1048 {
1049 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1050 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1051
1052 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1053 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1054 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1055 }
1056 else
1057 {
1058 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1059 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1060
1061 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1062 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1063 }
1064
1065 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1066 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1067
1068 /* Done setting up parameters, make the call. */
1069 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1070
1071 /* Restore variables and guest shadow registers to volatile registers. */
1072 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1073 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1074
1075#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1076 if (!TlbState.fSkip)
1077 {
1078 /* end of TlbMiss - Jump to the done label. */
1079 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1080 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1081
1082 /*
1083 * TlbLookup:
1084 */
1085 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1086 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1087
1088 /*
1089 * Emit code to do the actual storing / fetching.
1090 */
1091 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1092# ifdef VBOX_WITH_STATISTICS
1093 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1094 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1095# endif
1096 switch (cbMemAccess)
1097 {
1098 case 2:
1099 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1100 break;
1101 case 4:
1102 if (!fIsIntelSeg)
1103 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1104 else
1105 {
1106 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1107 PUSH FS in real mode, so we have to try emulate that here.
1108 We borrow the now unused idxReg1 from the TLB lookup code here. */
1109 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1110 kIemNativeGstReg_EFlags);
1111 if (idxRegEfl != UINT8_MAX)
1112 {
1113#ifdef ARCH_AMD64
1114 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1115 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1116 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1117#else
1118 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1119 off, TlbState.idxReg1, idxRegEfl,
1120 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1121#endif
1122 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1123 }
1124 else
1125 {
1126 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1127 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1128 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1129 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1130 }
1131 /* ASSUMES the upper half of idxRegPc is ZERO. */
1132 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1133 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1134 }
1135 break;
1136 case 8:
1137 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1138 break;
1139 default:
1140 AssertFailed();
1141 }
1142
1143 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1144 TlbState.freeRegsAndReleaseVars(pReNative);
1145
1146 /*
1147 * TlbDone:
1148 *
1149 * Commit the new RSP value.
1150 */
1151 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1152 }
1153#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1154
1155#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1156 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1157#endif
1158 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1159 if (idxRegEffSp != idxRegRsp)
1160 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1161
1162 return off;
1163}
1164
1165
1166/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1167#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1168 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1169
1170/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1171 * clears flags. */
1172#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1173 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1174 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1175
1176/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1177#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1178 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1179
1180/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1181 * clears flags. */
1182#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1183 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1184 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1185
1186#undef IEM_MC_IND_CALL_U16_AND_FINISH
1187
1188
1189/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1190#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1191 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1192
1193/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1194 * clears flags. */
1195#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1196 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1197 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1198
1199#undef IEM_MC_IND_CALL_U32_AND_FINISH
1200
1201
1202/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1203 * an extra parameter, for use in 64-bit code. */
1204#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1205 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1206
1207
1208/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1209 * an extra parameter, for use in 64-bit code and we need to check and clear
1210 * flags. */
1211#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1212 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1213 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1214
1215#undef IEM_MC_IND_CALL_U64_AND_FINISH
1216
1217/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1218 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1219DECL_INLINE_THROW(uint32_t)
1220iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1221 uint8_t idxInstr, uint8_t cbVar)
1222{
1223 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1224 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1225
1226 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1227 off = iemNativeRegFlushPendingWrites(pReNative, off);
1228
1229#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1230 Assert(pReNative->Core.offPc == 0);
1231
1232 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1233#endif
1234
1235 /* Get a register with the new PC loaded from idxVarPc.
1236 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1237 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1238
1239 /* Check limit (may #GP(0) + exit TB). */
1240 if (!f64Bit)
1241/** @todo we can skip this test in FLAT 32-bit mode. */
1242 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1243 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1244 else if (cbVar > sizeof(uint32_t))
1245 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1246
1247#if 1
1248 /* Allocate a temporary PC register, we don't want it shadowed. */
1249 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1250 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1251#else
1252 /* Allocate a temporary PC register. */
1253 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1254 true /*fNoVolatileRegs*/);
1255#endif
1256
1257 /* Perform the addition and push the variable to the guest stack. */
1258 /** @todo Flat variants for PC32 variants. */
1259 switch (cbVar)
1260 {
1261 case sizeof(uint16_t):
1262 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1263 /* Truncate the result to 16-bit IP. */
1264 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1265 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1266 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1267 break;
1268 case sizeof(uint32_t):
1269 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1270 /** @todo In FLAT mode we can use the flat variant. */
1271 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1272 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1273 break;
1274 case sizeof(uint64_t):
1275 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1276 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1277 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1278 break;
1279 default:
1280 AssertFailed();
1281 }
1282
1283 /* RSP got changed, so do this again. */
1284 off = iemNativeRegFlushPendingWrites(pReNative, off);
1285
1286 /* Store the result. */
1287 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1288
1289#if 1
1290 /* Need to transfer the shadow information to the new RIP register. */
1291 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1292#else
1293 /* Sync the new PC. */
1294 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1295#endif
1296 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1297 iemNativeRegFreeTmp(pReNative, idxPcReg);
1298 /** @todo implictly free the variable? */
1299
1300 return off;
1301}
1302
1303
1304/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1305 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1306#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1307 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1308
1309/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1310 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1311 * flags. */
1312#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1313 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1314 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1315
1316/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1317 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1318#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1319 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1320
1321/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1322 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1323 * flags. */
1324#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1325 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1326 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1327
1328/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1329 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1330#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1331 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1332
1333/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1334 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1335 * flags. */
1336#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1337 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1338 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1339
1340#undef IEM_MC_REL_CALL_S16_AND_FINISH
1341
1342/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1343 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1344DECL_INLINE_THROW(uint32_t)
1345iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1346 uint8_t idxInstr)
1347{
1348 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1349 off = iemNativeRegFlushPendingWrites(pReNative, off);
1350
1351#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1352 Assert(pReNative->Core.offPc == 0);
1353
1354 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1355#endif
1356
1357 /* Allocate a temporary PC register. */
1358 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1359 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1360 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1361
1362 /* Calculate the new RIP. */
1363 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1364 /* Truncate the result to 16-bit IP. */
1365 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1366 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1367 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1368
1369 /* Truncate the result to 16-bit IP. */
1370 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1371
1372 /* Check limit (may #GP(0) + exit TB). */
1373 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1374
1375 /* Perform the addition and push the variable to the guest stack. */
1376 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1377 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1378
1379 /* RSP got changed, so flush again. */
1380 off = iemNativeRegFlushPendingWrites(pReNative, off);
1381
1382 /* Store the result. */
1383 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1384
1385 /* Need to transfer the shadow information to the new RIP register. */
1386 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1387 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1388 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1389
1390 return off;
1391}
1392
1393
1394/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1395 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1396#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1397 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1398
1399/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1400 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1401 * flags. */
1402#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1403 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1404 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1405
1406#undef IEM_MC_REL_CALL_S32_AND_FINISH
1407
1408/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1409 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1410DECL_INLINE_THROW(uint32_t)
1411iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1412 uint8_t idxInstr)
1413{
1414 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1415 off = iemNativeRegFlushPendingWrites(pReNative, off);
1416
1417#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1418 Assert(pReNative->Core.offPc == 0);
1419
1420 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1421#endif
1422
1423 /* Allocate a temporary PC register. */
1424 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1425 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1426 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1427
1428 /* Update the EIP to get the return address. */
1429 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1430
1431 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1432 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1433 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1434 /** @todo we can skip this test in FLAT 32-bit mode. */
1435 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1436
1437 /* Perform Perform the return address to the guest stack. */
1438 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1439 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1440 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1441
1442 /* RSP got changed, so do this again. */
1443 off = iemNativeRegFlushPendingWrites(pReNative, off);
1444
1445 /* Store the result. */
1446 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1447
1448 /* Need to transfer the shadow information to the new RIP register. */
1449 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1450 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1451 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1452
1453 return off;
1454}
1455
1456
1457/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1458 * an extra parameter, for use in 64-bit code. */
1459#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1460 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1461
1462/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1463 * an extra parameter, for use in 64-bit code and we need to check and clear
1464 * flags. */
1465#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1466 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1467 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1468
1469#undef IEM_MC_REL_CALL_S64_AND_FINISH
1470
1471/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1472 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1473DECL_INLINE_THROW(uint32_t)
1474iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1475 uint8_t idxInstr)
1476{
1477 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1478 off = iemNativeRegFlushPendingWrites(pReNative, off);
1479
1480#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1481 Assert(pReNative->Core.offPc == 0);
1482
1483 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1484#endif
1485
1486 /* Allocate a temporary PC register. */
1487 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1488 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1489 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1490
1491 /* Update the RIP to get the return address. */
1492 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1493
1494 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1495 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1496 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1497 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1498
1499 /* Perform Perform the return address to the guest stack. */
1500 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1501 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1502
1503 /* RSP got changed, so do this again. */
1504 off = iemNativeRegFlushPendingWrites(pReNative, off);
1505
1506 /* Store the result. */
1507 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1508
1509 /* Need to transfer the shadow information to the new RIP register. */
1510 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1511 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1512 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1513
1514 return off;
1515}
1516
1517
1518/*********************************************************************************************************************************
1519* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1520*********************************************************************************************************************************/
1521
1522DECL_FORCE_INLINE_THROW(uint32_t)
1523iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1524 uint16_t cbPopAdd, uint8_t idxRegTmp)
1525{
1526 /* Use16BitSp: */
1527#ifdef RT_ARCH_AMD64
1528 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1529 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1530 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1531 RT_NOREF(idxRegTmp);
1532#elif defined(RT_ARCH_ARM64)
1533 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1534 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1535 /* add tmp, regrsp, #cbMem */
1536 uint16_t const cbCombined = cbMem + cbPopAdd;
1537 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1538 if (cbCombined >= RT_BIT_32(12))
1539 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1540 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1541 /* and tmp, tmp, #0xffff */
1542 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1543 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1544 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1545 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1546#else
1547# error "Port me"
1548#endif
1549 return off;
1550}
1551
1552
1553DECL_FORCE_INLINE_THROW(uint32_t)
1554iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1555 uint16_t cbPopAdd)
1556{
1557 /* Use32BitSp: */
1558 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1559 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1560 return off;
1561}
1562
1563
1564/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1565#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1566 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1567
1568/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1569#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1570 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1571
1572/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1573#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1574 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1575
1576/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1577 * clears flags. */
1578#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1579 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1580 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1581
1582/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1583 * clears flags. */
1584#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1585 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1586 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1587
1588/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1589 * clears flags. */
1590#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1591 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1592 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1593
1594/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1595DECL_INLINE_THROW(uint32_t)
1596iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1597 IEMMODE enmEffOpSize, uint8_t idxInstr)
1598{
1599 RT_NOREF(cbInstr);
1600
1601#ifdef VBOX_STRICT
1602 /*
1603 * Check that the fExec flags we've got make sense.
1604 */
1605 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1606#endif
1607
1608 /*
1609 * To keep things simple we have to commit any pending writes first as we
1610 * may end up making calls.
1611 */
1612 off = iemNativeRegFlushPendingWrites(pReNative, off);
1613
1614 /*
1615 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1616 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1617 * directly as the effective stack pointer.
1618 * (Code structure is very similar to that of PUSH)
1619 *
1620 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1621 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1622 * aren't commonly used (or useful) and thus not in need of optimizing.
1623 *
1624 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1625 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1626 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1627 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1628 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1629 */
1630 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1631 ? sizeof(uint64_t)
1632 : enmEffOpSize == IEMMODE_32BIT
1633 ? sizeof(uint32_t)
1634 : sizeof(uint16_t);
1635 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1636 uintptr_t const pfnFunction = fFlat
1637 ? enmEffOpSize == IEMMODE_64BIT
1638 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1639 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1640 : enmEffOpSize == IEMMODE_32BIT
1641 ? (uintptr_t)iemNativeHlpStackFetchU32
1642 : (uintptr_t)iemNativeHlpStackFetchU16;
1643 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1644 fFlat ? kIemNativeGstRegUse_ForUpdate : kIemNativeGstRegUse_Calculation,
1645 true /*fNoVolatileRegs*/);
1646 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1647 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1648 * will be the resulting register value. */
1649 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1650
1651 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1652 if (fFlat)
1653 Assert(idxRegEffSp == idxRegRsp);
1654 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1655 {
1656 Assert(idxRegEffSp != idxRegRsp);
1657 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1658 kIemNativeGstRegUse_ReadOnly);
1659#ifdef RT_ARCH_AMD64
1660 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1661#else
1662 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1663#endif
1664 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1665 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1666 offFixupJumpToUseOtherBitSp = off;
1667 if (enmEffOpSize == IEMMODE_32BIT)
1668 {
1669 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1670 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1671 }
1672 else
1673 {
1674 Assert(enmEffOpSize == IEMMODE_16BIT);
1675 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1676 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1677 idxRegMemResult);
1678 }
1679 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1680 }
1681 /* SpUpdateEnd: */
1682 uint32_t const offLabelSpUpdateEnd = off;
1683
1684 /*
1685 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1686 * we're skipping lookup).
1687 */
1688 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1689 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1690 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1691 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1692 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1693 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1694 : UINT32_MAX;
1695
1696 if (!TlbState.fSkip)
1697 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1698 else
1699 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1700
1701 /*
1702 * Use16BitSp:
1703 */
1704 if (!fFlat)
1705 {
1706#ifdef RT_ARCH_AMD64
1707 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1708#else
1709 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1710#endif
1711 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1712 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1713 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1714 idxRegMemResult);
1715 else
1716 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1717 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1719 }
1720
1721 /*
1722 * TlbMiss:
1723 *
1724 * Call helper to do the pushing.
1725 */
1726 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1727
1728#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1729 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1730#else
1731 RT_NOREF(idxInstr);
1732#endif
1733
1734 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1735 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1736 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1737 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1738
1739
1740 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1741 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1742 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1743
1744 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1746
1747 /* Done setting up parameters, make the call. */
1748 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1749
1750 /* Move the return register content to idxRegMemResult. */
1751 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1753
1754 /* Restore variables and guest shadow registers to volatile registers. */
1755 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1756 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1757
1758#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1759 if (!TlbState.fSkip)
1760 {
1761 /* end of TlbMiss - Jump to the done label. */
1762 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1763 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1764
1765 /*
1766 * TlbLookup:
1767 */
1768 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1769 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1770
1771 /*
1772 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1773 */
1774 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1775# ifdef VBOX_WITH_STATISTICS
1776 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1777 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1778# endif
1779 switch (cbMem)
1780 {
1781 case 2:
1782 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1783 break;
1784 case 4:
1785 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1786 break;
1787 case 8:
1788 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1789 break;
1790 default:
1791 AssertFailed();
1792 }
1793
1794 TlbState.freeRegsAndReleaseVars(pReNative);
1795
1796 /*
1797 * TlbDone:
1798 *
1799 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1800 * commit the popped register value.
1801 */
1802 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1803 }
1804#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1805
1806 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1807 if (!f64Bit)
1808/** @todo we can skip this test in FLAT 32-bit mode. */
1809 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1810 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1811 else if (enmEffOpSize == IEMMODE_64BIT)
1812 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1813
1814 /* Complete RSP calculation for FLAT mode. */
1815 if (idxRegEffSp == idxRegRsp)
1816 {
1817 if (enmEffOpSize == IEMMODE_64BIT)
1818 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1819 else
1820 {
1821 Assert(enmEffOpSize == IEMMODE_32BIT);
1822 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1823 }
1824 }
1825
1826 /* Commit the result and clear any current guest shadows for RIP. */
1827 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1828 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1829 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1830
1831 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1832 if (!fFlat)
1833 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1834
1835 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1836 if (idxRegEffSp != idxRegRsp)
1837 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1838 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1839 return off;
1840}
1841
1842
1843/*********************************************************************************************************************************
1844* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1845*********************************************************************************************************************************/
1846
1847#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1848 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1849
1850/**
1851 * Emits code to check if a \#NM exception should be raised.
1852 *
1853 * @returns New code buffer offset, UINT32_MAX on failure.
1854 * @param pReNative The native recompile state.
1855 * @param off The code buffer offset.
1856 * @param idxInstr The current instruction.
1857 */
1858DECL_INLINE_THROW(uint32_t)
1859iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1860{
1861#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1862 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1863
1864 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1865 {
1866#endif
1867 /*
1868 * Make sure we don't have any outstanding guest register writes as we may
1869 * raise an #NM and all guest register must be up to date in CPUMCTX.
1870 */
1871 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1872 off = iemNativeRegFlushPendingWrites(pReNative, off);
1873
1874#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1875 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1876#else
1877 RT_NOREF(idxInstr);
1878#endif
1879
1880 /* Allocate a temporary CR0 register. */
1881 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
1882 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
1883
1884 /*
1885 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1886 * return raisexcpt();
1887 */
1888 /* Test and jump. */
1889 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
1890
1891 /* Free but don't flush the CR0 register. */
1892 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1893
1894#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1895 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1896 }
1897 else
1898 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1899#endif
1900
1901 return off;
1902}
1903
1904
1905#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1906 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1907
1908/**
1909 * Emits code to check if a \#NM exception should be raised.
1910 *
1911 * @returns New code buffer offset, UINT32_MAX on failure.
1912 * @param pReNative The native recompile state.
1913 * @param off The code buffer offset.
1914 * @param idxInstr The current instruction.
1915 */
1916DECL_INLINE_THROW(uint32_t)
1917iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1918{
1919#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1920 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
1921
1922 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
1923 {
1924#endif
1925 /*
1926 * Make sure we don't have any outstanding guest register writes as we may
1927 * raise an #NM and all guest register must be up to date in CPUMCTX.
1928 */
1929 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1930 off = iemNativeRegFlushPendingWrites(pReNative, off);
1931
1932#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1933 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1934#else
1935 RT_NOREF(idxInstr);
1936#endif
1937
1938 /* Allocate a temporary CR0 register. */
1939 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_Calculation);
1940
1941 /*
1942 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
1943 * return raisexcpt();
1944 */
1945 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
1946 /* Test and jump. */
1947 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS, kIemNativeLabelType_RaiseNm);
1948
1949 /* Free the CR0 register. */
1950 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1951
1952#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1953 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
1954 }
1955 else
1956 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
1957#endif
1958
1959 return off;
1960}
1961
1962
1963#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
1964 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
1965
1966/**
1967 * Emits code to check if a \#MF exception should be raised.
1968 *
1969 * @returns New code buffer offset, UINT32_MAX on failure.
1970 * @param pReNative The native recompile state.
1971 * @param off The code buffer offset.
1972 * @param idxInstr The current instruction.
1973 */
1974DECL_INLINE_THROW(uint32_t)
1975iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1976{
1977 /*
1978 * Make sure we don't have any outstanding guest register writes as we may
1979 * raise an #MF and all guest register must be up to date in CPUMCTX.
1980 */
1981 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
1982 off = iemNativeRegFlushPendingWrites(pReNative, off);
1983
1984#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1985 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1986#else
1987 RT_NOREF(idxInstr);
1988#endif
1989
1990 /* Allocate a temporary FSW register. */
1991 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
1992 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
1993
1994 /*
1995 * if (FSW & X86_FSW_ES != 0)
1996 * return raisexcpt();
1997 */
1998 /* Test and jump. */
1999 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, idxLabelRaiseMf);
2000
2001 /* Free but don't flush the FSW register. */
2002 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2003
2004 return off;
2005}
2006
2007
2008#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2009 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2010
2011/**
2012 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2013 *
2014 * @returns New code buffer offset, UINT32_MAX on failure.
2015 * @param pReNative The native recompile state.
2016 * @param off The code buffer offset.
2017 * @param idxInstr The current instruction.
2018 */
2019DECL_INLINE_THROW(uint32_t)
2020iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2021{
2022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2023 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2024
2025 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2026 {
2027#endif
2028 /*
2029 * Make sure we don't have any outstanding guest register writes as we may
2030 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2031 */
2032 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2033 off = iemNativeRegFlushPendingWrites(pReNative, off);
2034
2035#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2036 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2037#else
2038 RT_NOREF(idxInstr);
2039#endif
2040
2041 /* Allocate a temporary CR0 and CR4 register. */
2042 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
2043 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2044 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2045 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2046
2047 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2048#ifdef RT_ARCH_AMD64
2049 /*
2050 * We do a modified test here:
2051 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2052 * else { goto RaiseSseRelated; }
2053 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2054 * all targets except the 386, which doesn't support SSE, this should
2055 * be a safe assumption.
2056 */
2057 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2058 //pCodeBuf[off++] = 0xcc;
2059 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2060 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2061 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2062 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2063 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2064 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
2065
2066#elif defined(RT_ARCH_ARM64)
2067 /*
2068 * We do a modified test here:
2069 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2070 * else { goto RaiseSseRelated; }
2071 */
2072 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2073 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2074 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2075 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2076 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2077 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2078 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2079 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2080 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2081 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2082 idxLabelRaiseSseRelated);
2083
2084#else
2085# error "Port me!"
2086#endif
2087
2088 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2089 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2090 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2091 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2092
2093#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2094 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2095 }
2096 else
2097 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2098#endif
2099
2100 return off;
2101}
2102
2103
2104#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2105 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2106
2107/**
2108 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2109 *
2110 * @returns New code buffer offset, UINT32_MAX on failure.
2111 * @param pReNative The native recompile state.
2112 * @param off The code buffer offset.
2113 * @param idxInstr The current instruction.
2114 */
2115DECL_INLINE_THROW(uint32_t)
2116iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2117{
2118#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2119 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2120
2121 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2122 {
2123#endif
2124 /*
2125 * Make sure we don't have any outstanding guest register writes as we may
2126 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2127 */
2128 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2129 off = iemNativeRegFlushPendingWrites(pReNative, off);
2130
2131#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2132 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2133#else
2134 RT_NOREF(idxInstr);
2135#endif
2136
2137 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2138 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
2139 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2140 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2141 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2142 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2143
2144 /*
2145 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2146 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2147 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2148 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2149 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2150 * { likely }
2151 * else { goto RaiseAvxRelated; }
2152 */
2153#ifdef RT_ARCH_AMD64
2154 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2155 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2156 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2157 ^ 0x1a) ) { likely }
2158 else { goto RaiseAvxRelated; } */
2159 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2160 //pCodeBuf[off++] = 0xcc;
2161 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2162 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2163 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2164 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2165 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2166 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2167 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2168 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2169 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2170 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2171 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
2172
2173#elif defined(RT_ARCH_ARM64)
2174 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2175 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2176 else { goto RaiseAvxRelated; } */
2177 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2178 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2179 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2180 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2181 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2182 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2183 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2184 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2185 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2186 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2187 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2188 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2189 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2190 idxLabelRaiseAvxRelated);
2191
2192#else
2193# error "Port me!"
2194#endif
2195
2196 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2197 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2198 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2199 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2200#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2201 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2202 }
2203 else
2204 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2205#endif
2206
2207 return off;
2208}
2209
2210
2211#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2212#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
2213 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
2214
2215/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
2216DECL_INLINE_THROW(uint32_t)
2217iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2218{
2219 /*
2220 * Make sure we don't have any outstanding guest register writes as we may
2221 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
2222 */
2223 off = iemNativeRegFlushPendingWrites(pReNative, off);
2224
2225#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2226 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2227#else
2228 RT_NOREF(idxInstr);
2229#endif
2230
2231 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
2232 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
2233 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2234
2235 /* mov tmp, varmxcsr */
2236 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2237 /* tmp &= X86_MXCSR_XCPT_MASK */
2238 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
2239 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
2240 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
2241 /* tmp = ~tmp */
2242 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
2243 /* tmp &= mxcsr */
2244 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2245 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
2246 idxLabelRaiseSseAvxFpRelated);
2247
2248 /* Free but don't flush the MXCSR register. */
2249 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
2250 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2251
2252 return off;
2253}
2254#endif
2255
2256
2257#define IEM_MC_RAISE_DIVIDE_ERROR() \
2258 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2259
2260/**
2261 * Emits code to raise a \#DE.
2262 *
2263 * @returns New code buffer offset, UINT32_MAX on failure.
2264 * @param pReNative The native recompile state.
2265 * @param off The code buffer offset.
2266 * @param idxInstr The current instruction.
2267 */
2268DECL_INLINE_THROW(uint32_t)
2269iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2270{
2271 /*
2272 * Make sure we don't have any outstanding guest register writes as we may
2273 */
2274 off = iemNativeRegFlushPendingWrites(pReNative, off);
2275
2276#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2277 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2278#else
2279 RT_NOREF(idxInstr);
2280#endif
2281
2282 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
2283
2284 /* raise \#DE exception unconditionally. */
2285 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
2286
2287 return off;
2288}
2289
2290
2291#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2292 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2293
2294/**
2295 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2296 *
2297 * @returns New code buffer offset, UINT32_MAX on failure.
2298 * @param pReNative The native recompile state.
2299 * @param off The code buffer offset.
2300 * @param idxInstr The current instruction.
2301 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2302 * @param cbAlign The alignment in bytes to check against.
2303 */
2304DECL_INLINE_THROW(uint32_t)
2305iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
2306{
2307 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2308 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2309
2310 /*
2311 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2312 */
2313 off = iemNativeRegFlushPendingWrites(pReNative, off);
2314
2315#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2316 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2317#else
2318 RT_NOREF(idxInstr);
2319#endif
2320
2321 uint8_t const idxLabelRaiseGp0 = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseGp0);
2322 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2323
2324 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxVarReg, cbAlign - 1, idxLabelRaiseGp0);
2325
2326 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2327 return off;
2328}
2329
2330
2331/*********************************************************************************************************************************
2332* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2333*********************************************************************************************************************************/
2334
2335/**
2336 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2337 *
2338 * @returns Pointer to the condition stack entry on success, NULL on failure
2339 * (too many nestings)
2340 */
2341DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2342{
2343#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2344 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2345#endif
2346
2347 uint32_t const idxStack = pReNative->cCondDepth;
2348 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2349
2350 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2351 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2352
2353 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2354 pEntry->fInElse = false;
2355 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2356 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2357
2358 return pEntry;
2359}
2360
2361
2362/**
2363 * Start of the if-block, snapshotting the register and variable state.
2364 */
2365DECL_INLINE_THROW(void)
2366iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2367{
2368 Assert(offIfBlock != UINT32_MAX);
2369 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2370 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2371 Assert(!pEntry->fInElse);
2372
2373 /* Define the start of the IF block if request or for disassembly purposes. */
2374 if (idxLabelIf != UINT32_MAX)
2375 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2376#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2377 else
2378 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2379#else
2380 RT_NOREF(offIfBlock);
2381#endif
2382
2383#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2384 Assert(pReNative->Core.offPc == 0);
2385#endif
2386
2387 /* Copy the initial state so we can restore it in the 'else' block. */
2388 pEntry->InitialState = pReNative->Core;
2389}
2390
2391
2392#define IEM_MC_ELSE() } while (0); \
2393 off = iemNativeEmitElse(pReNative, off); \
2394 do {
2395
2396/** Emits code related to IEM_MC_ELSE. */
2397DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2398{
2399 /* Check sanity and get the conditional stack entry. */
2400 Assert(off != UINT32_MAX);
2401 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2402 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2403 Assert(!pEntry->fInElse);
2404
2405#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2406 /* Writeback any dirty shadow registers. */
2407 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2408 * in one of the branches and leave guest registers already dirty before the start of the if
2409 * block alone. */
2410 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2411#endif
2412
2413 /* Jump to the endif */
2414 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2415
2416 /* Define the else label and enter the else part of the condition. */
2417 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2418 pEntry->fInElse = true;
2419
2420#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2421 Assert(pReNative->Core.offPc == 0);
2422#endif
2423
2424 /* Snapshot the core state so we can do a merge at the endif and restore
2425 the snapshot we took at the start of the if-block. */
2426 pEntry->IfFinalState = pReNative->Core;
2427 pReNative->Core = pEntry->InitialState;
2428
2429 return off;
2430}
2431
2432
2433#define IEM_MC_ENDIF() } while (0); \
2434 off = iemNativeEmitEndIf(pReNative, off)
2435
2436/** Emits code related to IEM_MC_ENDIF. */
2437DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2438{
2439 /* Check sanity and get the conditional stack entry. */
2440 Assert(off != UINT32_MAX);
2441 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2442 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2443
2444#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2445 Assert(pReNative->Core.offPc == 0);
2446#endif
2447#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2448 /* Writeback any dirty shadow registers (else branch). */
2449 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2450 * in one of the branches and leave guest registers already dirty before the start of the if
2451 * block alone. */
2452 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2453#endif
2454
2455 /*
2456 * Now we have find common group with the core state at the end of the
2457 * if-final. Use the smallest common denominator and just drop anything
2458 * that isn't the same in both states.
2459 */
2460 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2461 * which is why we're doing this at the end of the else-block.
2462 * But we'd need more info about future for that to be worth the effort. */
2463 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2464#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2465 Assert( pOther->bmGstRegShadowDirty == 0
2466 && pReNative->Core.bmGstRegShadowDirty == 0);
2467#endif
2468
2469 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2470 {
2471 /* shadow guest stuff first. */
2472 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2473 if (fGstRegs)
2474 {
2475 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2476 do
2477 {
2478 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2479 fGstRegs &= ~RT_BIT_64(idxGstReg);
2480
2481 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2482 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2483 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2484 {
2485 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2486 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2487
2488#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2489 /* Writeback any dirty shadow registers we are about to unshadow. */
2490 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2491#endif
2492 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2493 }
2494 } while (fGstRegs);
2495 }
2496 else
2497 {
2498 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2499#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2500 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2501#endif
2502 }
2503
2504 /* Check variables next. For now we must require them to be identical
2505 or stuff we can recreate. */
2506 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2507 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2508 if (fVars)
2509 {
2510 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2511 do
2512 {
2513 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2514 fVars &= ~RT_BIT_32(idxVar);
2515
2516 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2517 {
2518 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2519 continue;
2520 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2521 {
2522 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2523 if (idxHstReg != UINT8_MAX)
2524 {
2525 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2526 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2527 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2528 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2529 }
2530 continue;
2531 }
2532 }
2533 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2534 continue;
2535
2536 /* Irreconcilable, so drop it. */
2537 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2538 if (idxHstReg != UINT8_MAX)
2539 {
2540 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2541 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2542 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2543 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2544 }
2545 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2546 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2547 } while (fVars);
2548 }
2549
2550 /* Finally, check that the host register allocations matches. */
2551 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2552 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2553 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2554 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2555 }
2556
2557 /*
2558 * Define the endif label and maybe the else one if we're still in the 'if' part.
2559 */
2560 if (!pEntry->fInElse)
2561 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2562 else
2563 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2564 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2565
2566 /* Pop the conditional stack.*/
2567 pReNative->cCondDepth -= 1;
2568
2569 return off;
2570}
2571
2572
2573#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2574 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2575 do {
2576
2577/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2578DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2579{
2580 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2581 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2582
2583 /* Get the eflags. */
2584 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2585 kIemNativeGstRegUse_ReadOnly);
2586
2587 /* Test and jump. */
2588 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2589
2590 /* Free but don't flush the EFlags register. */
2591 iemNativeRegFreeTmp(pReNative, idxEflReg);
2592
2593 /* Make a copy of the core state now as we start the if-block. */
2594 iemNativeCondStartIfBlock(pReNative, off);
2595
2596 return off;
2597}
2598
2599
2600#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2601 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2602 do {
2603
2604/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2605DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2606{
2607 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2608 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2609
2610 /* Get the eflags. */
2611 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2612 kIemNativeGstRegUse_ReadOnly);
2613
2614 /* Test and jump. */
2615 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2616
2617 /* Free but don't flush the EFlags register. */
2618 iemNativeRegFreeTmp(pReNative, idxEflReg);
2619
2620 /* Make a copy of the core state now as we start the if-block. */
2621 iemNativeCondStartIfBlock(pReNative, off);
2622
2623 return off;
2624}
2625
2626
2627#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2628 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2629 do {
2630
2631/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2632DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2633{
2634 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2635 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2636
2637 /* Get the eflags. */
2638 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2639 kIemNativeGstRegUse_ReadOnly);
2640
2641 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2642 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2643
2644 /* Test and jump. */
2645 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2646
2647 /* Free but don't flush the EFlags register. */
2648 iemNativeRegFreeTmp(pReNative, idxEflReg);
2649
2650 /* Make a copy of the core state now as we start the if-block. */
2651 iemNativeCondStartIfBlock(pReNative, off);
2652
2653 return off;
2654}
2655
2656
2657#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2658 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2659 do {
2660
2661/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2662DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2663{
2664 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2665 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2666
2667 /* Get the eflags. */
2668 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2669 kIemNativeGstRegUse_ReadOnly);
2670
2671 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2672 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2673
2674 /* Test and jump. */
2675 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2676
2677 /* Free but don't flush the EFlags register. */
2678 iemNativeRegFreeTmp(pReNative, idxEflReg);
2679
2680 /* Make a copy of the core state now as we start the if-block. */
2681 iemNativeCondStartIfBlock(pReNative, off);
2682
2683 return off;
2684}
2685
2686
2687#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2688 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2689 do {
2690
2691#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2692 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2693 do {
2694
2695/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2696DECL_INLINE_THROW(uint32_t)
2697iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2698 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2699{
2700 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2701 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2702
2703 /* Get the eflags. */
2704 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2705 kIemNativeGstRegUse_ReadOnly);
2706
2707 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2708 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2709
2710 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2711 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2712 Assert(iBitNo1 != iBitNo2);
2713
2714#ifdef RT_ARCH_AMD64
2715 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2716
2717 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2718 if (iBitNo1 > iBitNo2)
2719 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2720 else
2721 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2722 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2723
2724#elif defined(RT_ARCH_ARM64)
2725 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2726 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2727
2728 /* and tmpreg, eflreg, #1<<iBitNo1 */
2729 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2730
2731 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2732 if (iBitNo1 > iBitNo2)
2733 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2734 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2735 else
2736 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2737 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2738
2739 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2740
2741#else
2742# error "Port me"
2743#endif
2744
2745 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2746 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2747 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2748
2749 /* Free but don't flush the EFlags and tmp registers. */
2750 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2751 iemNativeRegFreeTmp(pReNative, idxEflReg);
2752
2753 /* Make a copy of the core state now as we start the if-block. */
2754 iemNativeCondStartIfBlock(pReNative, off);
2755
2756 return off;
2757}
2758
2759
2760#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2761 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2762 do {
2763
2764#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2765 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2766 do {
2767
2768/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2769 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2770DECL_INLINE_THROW(uint32_t)
2771iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2772 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2773{
2774 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2775 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2776
2777 /* We need an if-block label for the non-inverted variant. */
2778 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2779 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2780
2781 /* Get the eflags. */
2782 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2783 kIemNativeGstRegUse_ReadOnly);
2784
2785 /* Translate the flag masks to bit numbers. */
2786 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2787 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2788
2789 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2790 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2791 Assert(iBitNo1 != iBitNo);
2792
2793 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2794 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2795 Assert(iBitNo2 != iBitNo);
2796 Assert(iBitNo2 != iBitNo1);
2797
2798#ifdef RT_ARCH_AMD64
2799 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2800#elif defined(RT_ARCH_ARM64)
2801 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2802#endif
2803
2804 /* Check for the lone bit first. */
2805 if (!fInverted)
2806 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2807 else
2808 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2809
2810 /* Then extract and compare the other two bits. */
2811#ifdef RT_ARCH_AMD64
2812 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2813 if (iBitNo1 > iBitNo2)
2814 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2815 else
2816 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2817 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2818
2819#elif defined(RT_ARCH_ARM64)
2820 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2821
2822 /* and tmpreg, eflreg, #1<<iBitNo1 */
2823 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2824
2825 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2826 if (iBitNo1 > iBitNo2)
2827 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2828 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2829 else
2830 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2831 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2832
2833 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2834
2835#else
2836# error "Port me"
2837#endif
2838
2839 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2840 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2841 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2842
2843 /* Free but don't flush the EFlags and tmp registers. */
2844 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2845 iemNativeRegFreeTmp(pReNative, idxEflReg);
2846
2847 /* Make a copy of the core state now as we start the if-block. */
2848 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2849
2850 return off;
2851}
2852
2853
2854#define IEM_MC_IF_CX_IS_NZ() \
2855 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2856 do {
2857
2858/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2859DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2860{
2861 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2862
2863 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2864 kIemNativeGstRegUse_ReadOnly);
2865 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2866 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2867
2868 iemNativeCondStartIfBlock(pReNative, off);
2869 return off;
2870}
2871
2872
2873#define IEM_MC_IF_ECX_IS_NZ() \
2874 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2875 do {
2876
2877#define IEM_MC_IF_RCX_IS_NZ() \
2878 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2879 do {
2880
2881/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2882DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2883{
2884 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2885
2886 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2887 kIemNativeGstRegUse_ReadOnly);
2888 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2889 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2890
2891 iemNativeCondStartIfBlock(pReNative, off);
2892 return off;
2893}
2894
2895
2896#define IEM_MC_IF_CX_IS_NOT_ONE() \
2897 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2898 do {
2899
2900/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2901DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2902{
2903 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2904
2905 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2906 kIemNativeGstRegUse_ReadOnly);
2907#ifdef RT_ARCH_AMD64
2908 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2909#else
2910 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2911 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2912 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2913#endif
2914 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2915
2916 iemNativeCondStartIfBlock(pReNative, off);
2917 return off;
2918}
2919
2920
2921#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2922 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2923 do {
2924
2925#define IEM_MC_IF_RCX_IS_NOT_ONE() \
2926 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
2927 do {
2928
2929/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
2930DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2931{
2932 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2933
2934 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2935 kIemNativeGstRegUse_ReadOnly);
2936 if (f64Bit)
2937 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2938 else
2939 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2940 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2941
2942 iemNativeCondStartIfBlock(pReNative, off);
2943 return off;
2944}
2945
2946
2947#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2948 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
2949 do {
2950
2951#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2952 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
2953 do {
2954
2955/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
2956 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2957DECL_INLINE_THROW(uint32_t)
2958iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
2959{
2960 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2961 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2962
2963 /* We have to load both RCX and EFLAGS before we can start branching,
2964 otherwise we'll end up in the else-block with an inconsistent
2965 register allocator state.
2966 Doing EFLAGS first as it's more likely to be loaded, right? */
2967 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2968 kIemNativeGstRegUse_ReadOnly);
2969 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2970 kIemNativeGstRegUse_ReadOnly);
2971
2972 /** @todo we could reduce this to a single branch instruction by spending a
2973 * temporary register and some setnz stuff. Not sure if loops are
2974 * worth it. */
2975 /* Check CX. */
2976#ifdef RT_ARCH_AMD64
2977 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2978#else
2979 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2980 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2981 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2982#endif
2983
2984 /* Check the EFlags bit. */
2985 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2986 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2987 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
2988 !fCheckIfSet /*fJmpIfSet*/);
2989
2990 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2991 iemNativeRegFreeTmp(pReNative, idxEflReg);
2992
2993 iemNativeCondStartIfBlock(pReNative, off);
2994 return off;
2995}
2996
2997
2998#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2999 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3000 do {
3001
3002#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3003 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3004 do {
3005
3006#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3007 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3008 do {
3009
3010#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3011 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3012 do {
3013
3014/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3015 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3016 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3017 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3018DECL_INLINE_THROW(uint32_t)
3019iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3020 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3021{
3022 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3023 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3024
3025 /* We have to load both RCX and EFLAGS before we can start branching,
3026 otherwise we'll end up in the else-block with an inconsistent
3027 register allocator state.
3028 Doing EFLAGS first as it's more likely to be loaded, right? */
3029 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3030 kIemNativeGstRegUse_ReadOnly);
3031 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3032 kIemNativeGstRegUse_ReadOnly);
3033
3034 /** @todo we could reduce this to a single branch instruction by spending a
3035 * temporary register and some setnz stuff. Not sure if loops are
3036 * worth it. */
3037 /* Check RCX/ECX. */
3038 if (f64Bit)
3039 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3040 else
3041 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3042
3043 /* Check the EFlags bit. */
3044 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3045 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3046 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3047 !fCheckIfSet /*fJmpIfSet*/);
3048
3049 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3050 iemNativeRegFreeTmp(pReNative, idxEflReg);
3051
3052 iemNativeCondStartIfBlock(pReNative, off);
3053 return off;
3054}
3055
3056
3057#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3058 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3059 do {
3060
3061/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3062DECL_INLINE_THROW(uint32_t)
3063iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3064{
3065 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3066
3067 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3068 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3069 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3070 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3071
3072 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3073
3074 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3075
3076 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3077
3078 iemNativeCondStartIfBlock(pReNative, off);
3079 return off;
3080}
3081
3082
3083#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3084 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3085 do {
3086
3087/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3088DECL_INLINE_THROW(uint32_t)
3089iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3090{
3091 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3092 Assert(iGReg < 16);
3093
3094 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3095 kIemNativeGstRegUse_ReadOnly);
3096
3097 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3098
3099 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3100
3101 iemNativeCondStartIfBlock(pReNative, off);
3102 return off;
3103}
3104
3105
3106
3107/*********************************************************************************************************************************
3108* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3109*********************************************************************************************************************************/
3110
3111#define IEM_MC_NOREF(a_Name) \
3112 RT_NOREF_PV(a_Name)
3113
3114#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3115 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3116
3117#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3118 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3119
3120#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3121 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3122
3123#define IEM_MC_LOCAL(a_Type, a_Name) \
3124 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3125
3126#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3127 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3128
3129#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3130 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3131
3132
3133/**
3134 * Sets the host register for @a idxVarRc to @a idxReg.
3135 *
3136 * The register must not be allocated. Any guest register shadowing will be
3137 * implictly dropped by this call.
3138 *
3139 * The variable must not have any register associated with it (causes
3140 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3141 * implied.
3142 *
3143 * @returns idxReg
3144 * @param pReNative The recompiler state.
3145 * @param idxVar The variable.
3146 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3147 * @param off For recording in debug info.
3148 *
3149 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3150 */
3151DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3152{
3153 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3154 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3155 Assert(!pVar->fRegAcquired);
3156 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3157 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3158 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3159
3160 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3161 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3162
3163 iemNativeVarSetKindToStack(pReNative, idxVar);
3164 pVar->idxReg = idxReg;
3165
3166 return idxReg;
3167}
3168
3169
3170/**
3171 * A convenient helper function.
3172 */
3173DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3174 uint8_t idxReg, uint32_t *poff)
3175{
3176 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3177 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3178 return idxReg;
3179}
3180
3181
3182/**
3183 * This is called by IEM_MC_END() to clean up all variables.
3184 */
3185DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3186{
3187 uint32_t const bmVars = pReNative->Core.bmVars;
3188 if (bmVars != 0)
3189 iemNativeVarFreeAllSlow(pReNative, bmVars);
3190 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3191 Assert(pReNative->Core.bmStack == 0);
3192}
3193
3194
3195#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3196
3197/**
3198 * This is called by IEM_MC_FREE_LOCAL.
3199 */
3200DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3201{
3202 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3203 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3204 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3205}
3206
3207
3208#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3209
3210/**
3211 * This is called by IEM_MC_FREE_ARG.
3212 */
3213DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3214{
3215 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3216 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3217 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3218}
3219
3220
3221#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3222
3223/**
3224 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3225 */
3226DECL_INLINE_THROW(uint32_t)
3227iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3228{
3229 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3230 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3231 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3232 Assert( pVarDst->cbVar == sizeof(uint16_t)
3233 || pVarDst->cbVar == sizeof(uint32_t));
3234
3235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3236 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3237 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3238 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3239 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3240
3241 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3242
3243 /*
3244 * Special case for immediates.
3245 */
3246 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3247 {
3248 switch (pVarDst->cbVar)
3249 {
3250 case sizeof(uint16_t):
3251 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3252 break;
3253 case sizeof(uint32_t):
3254 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3255 break;
3256 default: AssertFailed(); break;
3257 }
3258 }
3259 else
3260 {
3261 /*
3262 * The generic solution for now.
3263 */
3264 /** @todo optimize this by having the python script make sure the source
3265 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3266 * statement. Then we could just transfer the register assignments. */
3267 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3268 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3269 switch (pVarDst->cbVar)
3270 {
3271 case sizeof(uint16_t):
3272 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3273 break;
3274 case sizeof(uint32_t):
3275 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3276 break;
3277 default: AssertFailed(); break;
3278 }
3279 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3280 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3281 }
3282 return off;
3283}
3284
3285
3286
3287/*********************************************************************************************************************************
3288* Emitters for IEM_MC_CALL_CIMPL_XXX *
3289*********************************************************************************************************************************/
3290
3291/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3292DECL_INLINE_THROW(uint32_t)
3293iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3294 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3295
3296{
3297 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3298
3299#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3300 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3301 when a calls clobber any of the relevatn control registers. */
3302# if 1
3303 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3304 {
3305 /* Likely as long as call+ret are done via cimpl. */
3306 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3307 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3308 }
3309 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3310 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3311 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3312 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3313 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3314 else
3315 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3316 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3317 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3318
3319# else
3320 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3321 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3322 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3323 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3324 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3325 || pfnCImpl == (uintptr_t)iemCImpl_callf
3326 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3327 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3328 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3329 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3330 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3331# endif
3332#endif
3333
3334 /*
3335 * Do all the call setup and cleanup.
3336 */
3337 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3338
3339 /*
3340 * Load the two or three hidden arguments.
3341 */
3342#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3343 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3344 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3345 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3346#else
3347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3348 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3349#endif
3350
3351 /*
3352 * Make the call and check the return code.
3353 *
3354 * Shadow PC copies are always flushed here, other stuff depends on flags.
3355 * Segment and general purpose registers are explictily flushed via the
3356 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3357 * macros.
3358 */
3359 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3360#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3361 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3362#endif
3363 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3364 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3365 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3366 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3367
3368 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3369}
3370
3371
3372#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3373 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3374
3375/** Emits code for IEM_MC_CALL_CIMPL_1. */
3376DECL_INLINE_THROW(uint32_t)
3377iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3378 uintptr_t pfnCImpl, uint8_t idxArg0)
3379{
3380 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3381 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3382}
3383
3384
3385#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3386 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3387
3388/** Emits code for IEM_MC_CALL_CIMPL_2. */
3389DECL_INLINE_THROW(uint32_t)
3390iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3391 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3392{
3393 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3394 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3395 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3396}
3397
3398
3399#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3400 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3401 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3402
3403/** Emits code for IEM_MC_CALL_CIMPL_3. */
3404DECL_INLINE_THROW(uint32_t)
3405iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3406 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3407{
3408 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3409 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3410 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3411 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3412}
3413
3414
3415#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3416 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3417 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3418
3419/** Emits code for IEM_MC_CALL_CIMPL_4. */
3420DECL_INLINE_THROW(uint32_t)
3421iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3422 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3423{
3424 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3425 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3426 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3427 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3428 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3429}
3430
3431
3432#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3433 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3434 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3435
3436/** Emits code for IEM_MC_CALL_CIMPL_4. */
3437DECL_INLINE_THROW(uint32_t)
3438iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3439 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3440{
3441 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3442 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3443 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3444 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3445 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3446 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3447}
3448
3449
3450/** Recompiler debugging: Flush guest register shadow copies. */
3451#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3452
3453
3454
3455/*********************************************************************************************************************************
3456* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3457*********************************************************************************************************************************/
3458
3459/**
3460 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3461 */
3462DECL_INLINE_THROW(uint32_t)
3463iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3464 uintptr_t pfnAImpl, uint8_t cArgs)
3465{
3466 if (idxVarRc != UINT8_MAX)
3467 {
3468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3469 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3470 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3471 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3472 }
3473
3474 /*
3475 * Do all the call setup and cleanup.
3476 *
3477 * It is only required to flush pending guest register writes in call volatile registers as
3478 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3479 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3480 * no matter the fFlushPendingWrites parameter.
3481 */
3482 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3483
3484 /*
3485 * Make the call and update the return code variable if we've got one.
3486 */
3487 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3488 if (idxVarRc != UINT8_MAX)
3489 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3490
3491 return off;
3492}
3493
3494
3495
3496#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3497 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3498
3499#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3500 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3501
3502/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3503DECL_INLINE_THROW(uint32_t)
3504iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3505{
3506 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3507}
3508
3509
3510#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3511 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3512
3513#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3514 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3515
3516/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3517DECL_INLINE_THROW(uint32_t)
3518iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3519{
3520 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3521 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3522}
3523
3524
3525#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3526 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3527
3528#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3529 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3530
3531/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3532DECL_INLINE_THROW(uint32_t)
3533iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3534 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3535{
3536 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3537 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3538 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3539}
3540
3541
3542#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3543 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3544
3545#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3546 IEM_MC_LOCAL(a_rcType, a_rc); \
3547 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3548
3549/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3550DECL_INLINE_THROW(uint32_t)
3551iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3552 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3553{
3554 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3555 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3557 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3558}
3559
3560
3561#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3562 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3563
3564#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3565 IEM_MC_LOCAL(a_rcType, a_rc); \
3566 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3567
3568/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3569DECL_INLINE_THROW(uint32_t)
3570iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3571 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3572{
3573 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3574 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3575 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3576 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3577 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3578}
3579
3580
3581
3582/*********************************************************************************************************************************
3583* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3584*********************************************************************************************************************************/
3585
3586#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3587 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3588
3589#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3590 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3591
3592#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3593 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3594
3595#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3596 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3597
3598
3599/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3600 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3601DECL_INLINE_THROW(uint32_t)
3602iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3603{
3604 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3605 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3606 Assert(iGRegEx < 20);
3607
3608 /* Same discussion as in iemNativeEmitFetchGregU16 */
3609 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3610 kIemNativeGstRegUse_ReadOnly);
3611
3612 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3613 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3614
3615 /* The value is zero-extended to the full 64-bit host register width. */
3616 if (iGRegEx < 16)
3617 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3618 else
3619 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3620
3621 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3622 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3623 return off;
3624}
3625
3626
3627#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3628 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3629
3630#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3631 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3632
3633#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3634 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3635
3636/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3637DECL_INLINE_THROW(uint32_t)
3638iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3639{
3640 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3641 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3642 Assert(iGRegEx < 20);
3643
3644 /* Same discussion as in iemNativeEmitFetchGregU16 */
3645 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3646 kIemNativeGstRegUse_ReadOnly);
3647
3648 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3649 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3650
3651 if (iGRegEx < 16)
3652 {
3653 switch (cbSignExtended)
3654 {
3655 case sizeof(uint16_t):
3656 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3657 break;
3658 case sizeof(uint32_t):
3659 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3660 break;
3661 case sizeof(uint64_t):
3662 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3663 break;
3664 default: AssertFailed(); break;
3665 }
3666 }
3667 else
3668 {
3669 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3670 switch (cbSignExtended)
3671 {
3672 case sizeof(uint16_t):
3673 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3674 break;
3675 case sizeof(uint32_t):
3676 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3677 break;
3678 case sizeof(uint64_t):
3679 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3680 break;
3681 default: AssertFailed(); break;
3682 }
3683 }
3684
3685 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3686 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3687 return off;
3688}
3689
3690
3691
3692#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3693 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3694
3695#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3696 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3697
3698#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3699 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3700
3701/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3702DECL_INLINE_THROW(uint32_t)
3703iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3704{
3705 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3706 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3707 Assert(iGReg < 16);
3708
3709 /*
3710 * We can either just load the low 16-bit of the GPR into a host register
3711 * for the variable, or we can do so via a shadow copy host register. The
3712 * latter will avoid having to reload it if it's being stored later, but
3713 * will waste a host register if it isn't touched again. Since we don't
3714 * know what going to happen, we choose the latter for now.
3715 */
3716 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3717 kIemNativeGstRegUse_ReadOnly);
3718
3719 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3720 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3721 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3722 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3723
3724 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3725 return off;
3726}
3727
3728
3729#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3730 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3731
3732#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3733 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3734
3735/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3736DECL_INLINE_THROW(uint32_t)
3737iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3738{
3739 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3740 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3741 Assert(iGReg < 16);
3742
3743 /*
3744 * We can either just load the low 16-bit of the GPR into a host register
3745 * for the variable, or we can do so via a shadow copy host register. The
3746 * latter will avoid having to reload it if it's being stored later, but
3747 * will waste a host register if it isn't touched again. Since we don't
3748 * know what going to happen, we choose the latter for now.
3749 */
3750 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3751 kIemNativeGstRegUse_ReadOnly);
3752
3753 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3754 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3755 if (cbSignExtended == sizeof(uint32_t))
3756 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3757 else
3758 {
3759 Assert(cbSignExtended == sizeof(uint64_t));
3760 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3761 }
3762 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3763
3764 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3765 return off;
3766}
3767
3768
3769#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3770 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3771
3772#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3773 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3774
3775/** Emits code for IEM_MC_FETCH_GREG_U32. */
3776DECL_INLINE_THROW(uint32_t)
3777iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3778{
3779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3780 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3781 Assert(iGReg < 16);
3782
3783 /*
3784 * We can either just load the low 16-bit of the GPR into a host register
3785 * for the variable, or we can do so via a shadow copy host register. The
3786 * latter will avoid having to reload it if it's being stored later, but
3787 * will waste a host register if it isn't touched again. Since we don't
3788 * know what going to happen, we choose the latter for now.
3789 */
3790 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3791 kIemNativeGstRegUse_ReadOnly);
3792
3793 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3794 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3795 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3796 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3797
3798 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3799 return off;
3800}
3801
3802
3803#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3804 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3805
3806/** Emits code for IEM_MC_FETCH_GREG_U32. */
3807DECL_INLINE_THROW(uint32_t)
3808iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3809{
3810 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3811 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3812 Assert(iGReg < 16);
3813
3814 /*
3815 * We can either just load the low 32-bit of the GPR into a host register
3816 * for the variable, or we can do so via a shadow copy host register. The
3817 * latter will avoid having to reload it if it's being stored later, but
3818 * will waste a host register if it isn't touched again. Since we don't
3819 * know what going to happen, we choose the latter for now.
3820 */
3821 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3822 kIemNativeGstRegUse_ReadOnly);
3823
3824 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3825 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3826 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3827 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3828
3829 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3830 return off;
3831}
3832
3833
3834#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3835 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3836
3837#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3838 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3839
3840/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3841 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3842DECL_INLINE_THROW(uint32_t)
3843iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3844{
3845 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3846 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3847 Assert(iGReg < 16);
3848
3849 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3850 kIemNativeGstRegUse_ReadOnly);
3851
3852 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3853 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3855 /** @todo name the register a shadow one already? */
3856 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3857
3858 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3859 return off;
3860}
3861
3862
3863#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3864#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3865 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3866
3867/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3868DECL_INLINE_THROW(uint32_t)
3869iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3870{
3871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3872 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3873 Assert(iGRegLo < 16 && iGRegHi < 16);
3874
3875 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3876 kIemNativeGstRegUse_ReadOnly);
3877 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3878 kIemNativeGstRegUse_ReadOnly);
3879
3880 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3881 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3882 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3883 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3884
3885 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3886 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3887 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3888 return off;
3889}
3890#endif
3891
3892
3893/*********************************************************************************************************************************
3894* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3895*********************************************************************************************************************************/
3896
3897#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3898 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3899
3900/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3901DECL_INLINE_THROW(uint32_t)
3902iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3903{
3904 Assert(iGRegEx < 20);
3905 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3906 kIemNativeGstRegUse_ForUpdate);
3907#ifdef RT_ARCH_AMD64
3908 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3909
3910 /* To the lowest byte of the register: mov r8, imm8 */
3911 if (iGRegEx < 16)
3912 {
3913 if (idxGstTmpReg >= 8)
3914 pbCodeBuf[off++] = X86_OP_REX_B;
3915 else if (idxGstTmpReg >= 4)
3916 pbCodeBuf[off++] = X86_OP_REX;
3917 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3918 pbCodeBuf[off++] = u8Value;
3919 }
3920 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
3921 else if (idxGstTmpReg < 4)
3922 {
3923 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
3924 pbCodeBuf[off++] = u8Value;
3925 }
3926 else
3927 {
3928 /* ror reg64, 8 */
3929 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3930 pbCodeBuf[off++] = 0xc1;
3931 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3932 pbCodeBuf[off++] = 8;
3933
3934 /* mov reg8, imm8 */
3935 if (idxGstTmpReg >= 8)
3936 pbCodeBuf[off++] = X86_OP_REX_B;
3937 else if (idxGstTmpReg >= 4)
3938 pbCodeBuf[off++] = X86_OP_REX;
3939 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3940 pbCodeBuf[off++] = u8Value;
3941
3942 /* rol reg64, 8 */
3943 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3944 pbCodeBuf[off++] = 0xc1;
3945 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3946 pbCodeBuf[off++] = 8;
3947 }
3948
3949#elif defined(RT_ARCH_ARM64)
3950 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
3951 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3952 if (iGRegEx < 16)
3953 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
3954 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
3955 else
3956 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
3957 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
3958 iemNativeRegFreeTmp(pReNative, idxImmReg);
3959
3960#else
3961# error "Port me!"
3962#endif
3963
3964 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3965
3966#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3967 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
3968#endif
3969
3970 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3971 return off;
3972}
3973
3974
3975#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
3976 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
3977
3978/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
3979DECL_INLINE_THROW(uint32_t)
3980iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
3981{
3982 Assert(iGRegEx < 20);
3983 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3984
3985 /*
3986 * If it's a constant value (unlikely) we treat this as a
3987 * IEM_MC_STORE_GREG_U8_CONST statement.
3988 */
3989 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3990 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3991 { /* likely */ }
3992 else
3993 {
3994 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3995 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3996 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
3997 }
3998
3999 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4000 kIemNativeGstRegUse_ForUpdate);
4001 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4002
4003#ifdef RT_ARCH_AMD64
4004 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4005 if (iGRegEx < 16)
4006 {
4007 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4008 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4009 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4010 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4011 pbCodeBuf[off++] = X86_OP_REX;
4012 pbCodeBuf[off++] = 0x8a;
4013 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4014 }
4015 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4016 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4017 {
4018 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4019 pbCodeBuf[off++] = 0x8a;
4020 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4021 }
4022 else
4023 {
4024 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4025
4026 /* ror reg64, 8 */
4027 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4028 pbCodeBuf[off++] = 0xc1;
4029 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4030 pbCodeBuf[off++] = 8;
4031
4032 /* mov reg8, reg8(r/m) */
4033 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4034 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4035 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4036 pbCodeBuf[off++] = X86_OP_REX;
4037 pbCodeBuf[off++] = 0x8a;
4038 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4039
4040 /* rol reg64, 8 */
4041 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4042 pbCodeBuf[off++] = 0xc1;
4043 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4044 pbCodeBuf[off++] = 8;
4045 }
4046
4047#elif defined(RT_ARCH_ARM64)
4048 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4049 or
4050 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4051 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4052 if (iGRegEx < 16)
4053 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4054 else
4055 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4056
4057#else
4058# error "Port me!"
4059#endif
4060 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4061
4062 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4063
4064#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4065 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4066#endif
4067 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4068 return off;
4069}
4070
4071
4072
4073#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4074 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4075
4076/** Emits code for IEM_MC_STORE_GREG_U16. */
4077DECL_INLINE_THROW(uint32_t)
4078iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4079{
4080 Assert(iGReg < 16);
4081 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4082 kIemNativeGstRegUse_ForUpdate);
4083#ifdef RT_ARCH_AMD64
4084 /* mov reg16, imm16 */
4085 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4086 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4087 if (idxGstTmpReg >= 8)
4088 pbCodeBuf[off++] = X86_OP_REX_B;
4089 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4090 pbCodeBuf[off++] = RT_BYTE1(uValue);
4091 pbCodeBuf[off++] = RT_BYTE2(uValue);
4092
4093#elif defined(RT_ARCH_ARM64)
4094 /* movk xdst, #uValue, lsl #0 */
4095 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4096 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4097
4098#else
4099# error "Port me!"
4100#endif
4101
4102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4103
4104#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4105 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4106#endif
4107 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4108 return off;
4109}
4110
4111
4112#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4113 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4114
4115/** Emits code for IEM_MC_STORE_GREG_U16. */
4116DECL_INLINE_THROW(uint32_t)
4117iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4118{
4119 Assert(iGReg < 16);
4120 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4121
4122 /*
4123 * If it's a constant value (unlikely) we treat this as a
4124 * IEM_MC_STORE_GREG_U16_CONST statement.
4125 */
4126 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4127 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4128 { /* likely */ }
4129 else
4130 {
4131 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4132 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4133 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4134 }
4135
4136 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4137 kIemNativeGstRegUse_ForUpdate);
4138
4139#ifdef RT_ARCH_AMD64
4140 /* mov reg16, reg16 or [mem16] */
4141 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4142 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4143 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4144 {
4145 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4146 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4147 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4148 pbCodeBuf[off++] = 0x8b;
4149 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4150 }
4151 else
4152 {
4153 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4154 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4155 if (idxGstTmpReg >= 8)
4156 pbCodeBuf[off++] = X86_OP_REX_R;
4157 pbCodeBuf[off++] = 0x8b;
4158 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4159 }
4160
4161#elif defined(RT_ARCH_ARM64)
4162 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4163 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4164 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4165 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4166 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4167
4168#else
4169# error "Port me!"
4170#endif
4171
4172 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4173
4174#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4175 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4176#endif
4177 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4178 return off;
4179}
4180
4181
4182#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4183 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4184
4185/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4186DECL_INLINE_THROW(uint32_t)
4187iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4188{
4189 Assert(iGReg < 16);
4190 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4191 kIemNativeGstRegUse_ForFullWrite);
4192 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4193#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4194 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4195#endif
4196 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4197 return off;
4198}
4199
4200
4201#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4202 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4203
4204/** Emits code for IEM_MC_STORE_GREG_U32. */
4205DECL_INLINE_THROW(uint32_t)
4206iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4207{
4208 Assert(iGReg < 16);
4209 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4210
4211 /*
4212 * If it's a constant value (unlikely) we treat this as a
4213 * IEM_MC_STORE_GREG_U32_CONST statement.
4214 */
4215 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4216 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4217 { /* likely */ }
4218 else
4219 {
4220 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4221 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4222 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4223 }
4224
4225 /*
4226 * For the rest we allocate a guest register for the variable and writes
4227 * it to the CPUMCTX structure.
4228 */
4229 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4230#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4231 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4232#else
4233 RT_NOREF(idxVarReg);
4234#endif
4235#ifdef VBOX_STRICT
4236 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4237#endif
4238 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4239 return off;
4240}
4241
4242
4243#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4244 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4245
4246/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4247DECL_INLINE_THROW(uint32_t)
4248iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4249{
4250 Assert(iGReg < 16);
4251 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4252 kIemNativeGstRegUse_ForFullWrite);
4253 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4254#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4255 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4256#endif
4257 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4258 return off;
4259}
4260
4261
4262#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4263 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4264
4265#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4266 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4267
4268/** Emits code for IEM_MC_STORE_GREG_U64. */
4269DECL_INLINE_THROW(uint32_t)
4270iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4271{
4272 Assert(iGReg < 16);
4273 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4274
4275 /*
4276 * If it's a constant value (unlikely) we treat this as a
4277 * IEM_MC_STORE_GREG_U64_CONST statement.
4278 */
4279 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4280 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4281 { /* likely */ }
4282 else
4283 {
4284 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4285 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4286 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4287 }
4288
4289 /*
4290 * For the rest we allocate a guest register for the variable and writes
4291 * it to the CPUMCTX structure.
4292 */
4293 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4294#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4295 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4296#else
4297 RT_NOREF(idxVarReg);
4298#endif
4299 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4300 return off;
4301}
4302
4303
4304#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4305 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4306
4307/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4308DECL_INLINE_THROW(uint32_t)
4309iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4310{
4311 Assert(iGReg < 16);
4312 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4313 kIemNativeGstRegUse_ForUpdate);
4314 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4315#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4316 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4317#endif
4318 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4319 return off;
4320}
4321
4322
4323#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4324#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4325 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4326
4327/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4328DECL_INLINE_THROW(uint32_t)
4329iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4330{
4331 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4332 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4333 Assert(iGRegLo < 16 && iGRegHi < 16);
4334
4335 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4336 kIemNativeGstRegUse_ForFullWrite);
4337 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4338 kIemNativeGstRegUse_ForFullWrite);
4339
4340 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4341 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4342 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4343 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4344
4345 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4346 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4347 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4348 return off;
4349}
4350#endif
4351
4352
4353/*********************************************************************************************************************************
4354* General purpose register manipulation (add, sub). *
4355*********************************************************************************************************************************/
4356
4357#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4358 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4359
4360/** Emits code for IEM_MC_ADD_GREG_U16. */
4361DECL_INLINE_THROW(uint32_t)
4362iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4363{
4364 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4365 kIemNativeGstRegUse_ForUpdate);
4366
4367#ifdef RT_ARCH_AMD64
4368 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4369 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4370 if (idxGstTmpReg >= 8)
4371 pbCodeBuf[off++] = X86_OP_REX_B;
4372 if (uAddend == 1)
4373 {
4374 pbCodeBuf[off++] = 0xff; /* inc */
4375 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4376 }
4377 else
4378 {
4379 pbCodeBuf[off++] = 0x81;
4380 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4381 pbCodeBuf[off++] = uAddend;
4382 pbCodeBuf[off++] = 0;
4383 }
4384
4385#else
4386 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4387 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4388
4389 /* sub tmp, gstgrp, uAddend */
4390 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4391
4392 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4393 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4394
4395 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4396#endif
4397
4398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4399
4400#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4402#endif
4403
4404 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4405 return off;
4406}
4407
4408
4409#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4410 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4411
4412#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4413 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4414
4415/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4416DECL_INLINE_THROW(uint32_t)
4417iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4418{
4419 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4420 kIemNativeGstRegUse_ForUpdate);
4421
4422#ifdef RT_ARCH_AMD64
4423 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4424 if (f64Bit)
4425 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4426 else if (idxGstTmpReg >= 8)
4427 pbCodeBuf[off++] = X86_OP_REX_B;
4428 if (uAddend == 1)
4429 {
4430 pbCodeBuf[off++] = 0xff; /* inc */
4431 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4432 }
4433 else if (uAddend < 128)
4434 {
4435 pbCodeBuf[off++] = 0x83; /* add */
4436 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4437 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4438 }
4439 else
4440 {
4441 pbCodeBuf[off++] = 0x81; /* add */
4442 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4443 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4444 pbCodeBuf[off++] = 0;
4445 pbCodeBuf[off++] = 0;
4446 pbCodeBuf[off++] = 0;
4447 }
4448
4449#else
4450 /* sub tmp, gstgrp, uAddend */
4451 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4452 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4453
4454#endif
4455
4456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4457
4458#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4459 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4460#endif
4461
4462 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4463 return off;
4464}
4465
4466
4467
4468#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4469 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4470
4471/** Emits code for IEM_MC_SUB_GREG_U16. */
4472DECL_INLINE_THROW(uint32_t)
4473iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4474{
4475 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4476 kIemNativeGstRegUse_ForUpdate);
4477
4478#ifdef RT_ARCH_AMD64
4479 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4480 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4481 if (idxGstTmpReg >= 8)
4482 pbCodeBuf[off++] = X86_OP_REX_B;
4483 if (uSubtrahend == 1)
4484 {
4485 pbCodeBuf[off++] = 0xff; /* dec */
4486 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4487 }
4488 else
4489 {
4490 pbCodeBuf[off++] = 0x81;
4491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4492 pbCodeBuf[off++] = uSubtrahend;
4493 pbCodeBuf[off++] = 0;
4494 }
4495
4496#else
4497 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4498 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4499
4500 /* sub tmp, gstgrp, uSubtrahend */
4501 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4502
4503 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4504 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4505
4506 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4507#endif
4508
4509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4510
4511#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4512 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4513#endif
4514
4515 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4516 return off;
4517}
4518
4519
4520#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4521 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4522
4523#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4524 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4525
4526/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4527DECL_INLINE_THROW(uint32_t)
4528iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4529{
4530 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4531 kIemNativeGstRegUse_ForUpdate);
4532
4533#ifdef RT_ARCH_AMD64
4534 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4535 if (f64Bit)
4536 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4537 else if (idxGstTmpReg >= 8)
4538 pbCodeBuf[off++] = X86_OP_REX_B;
4539 if (uSubtrahend == 1)
4540 {
4541 pbCodeBuf[off++] = 0xff; /* dec */
4542 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4543 }
4544 else if (uSubtrahend < 128)
4545 {
4546 pbCodeBuf[off++] = 0x83; /* sub */
4547 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4548 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4549 }
4550 else
4551 {
4552 pbCodeBuf[off++] = 0x81; /* sub */
4553 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4554 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4555 pbCodeBuf[off++] = 0;
4556 pbCodeBuf[off++] = 0;
4557 pbCodeBuf[off++] = 0;
4558 }
4559
4560#else
4561 /* sub tmp, gstgrp, uSubtrahend */
4562 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4563 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4564
4565#endif
4566
4567 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4568
4569#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4570 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4571#endif
4572
4573 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4574 return off;
4575}
4576
4577
4578#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4579 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4580
4581#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4582 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4583
4584#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4585 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4586
4587#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4588 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4589
4590/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4591DECL_INLINE_THROW(uint32_t)
4592iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4593{
4594#ifdef VBOX_STRICT
4595 switch (cbMask)
4596 {
4597 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4598 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4599 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4600 case sizeof(uint64_t): break;
4601 default: AssertFailedBreak();
4602 }
4603#endif
4604
4605 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4606 kIemNativeGstRegUse_ForUpdate);
4607
4608 switch (cbMask)
4609 {
4610 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4611 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4612 break;
4613 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4614 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4615 break;
4616 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4617 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4618 break;
4619 case sizeof(uint64_t):
4620 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4621 break;
4622 default: AssertFailedBreak();
4623 }
4624
4625 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4626
4627#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4628 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4629#endif
4630
4631 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4632 return off;
4633}
4634
4635
4636#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4637 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4638
4639#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4640 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4641
4642#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4643 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4644
4645#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4646 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4647
4648/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4649DECL_INLINE_THROW(uint32_t)
4650iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4651{
4652#ifdef VBOX_STRICT
4653 switch (cbMask)
4654 {
4655 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4656 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4657 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4658 case sizeof(uint64_t): break;
4659 default: AssertFailedBreak();
4660 }
4661#endif
4662
4663 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4664 kIemNativeGstRegUse_ForUpdate);
4665
4666 switch (cbMask)
4667 {
4668 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4669 case sizeof(uint16_t):
4670 case sizeof(uint64_t):
4671 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4672 break;
4673 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4674 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4675 break;
4676 default: AssertFailedBreak();
4677 }
4678
4679 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4680
4681#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4682 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4683#endif
4684
4685 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4686 return off;
4687}
4688
4689
4690/*********************************************************************************************************************************
4691* Local/Argument variable manipulation (add, sub, and, or). *
4692*********************************************************************************************************************************/
4693
4694#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4695 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4696
4697#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4698 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4699
4700#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4701 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4702
4703#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4704 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4705
4706
4707#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4708 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4709
4710#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4711 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4712
4713#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4714 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4715
4716/** Emits code for AND'ing a local and a constant value. */
4717DECL_INLINE_THROW(uint32_t)
4718iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4719{
4720#ifdef VBOX_STRICT
4721 switch (cbMask)
4722 {
4723 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4724 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4725 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4726 case sizeof(uint64_t): break;
4727 default: AssertFailedBreak();
4728 }
4729#endif
4730
4731 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4732 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4733
4734 if (cbMask <= sizeof(uint32_t))
4735 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4736 else
4737 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4738
4739 iemNativeVarRegisterRelease(pReNative, idxVar);
4740 return off;
4741}
4742
4743
4744#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4745 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4746
4747#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4748 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4749
4750#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4751 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4752
4753#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4754 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4755
4756/** Emits code for OR'ing a local and a constant value. */
4757DECL_INLINE_THROW(uint32_t)
4758iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4759{
4760#ifdef VBOX_STRICT
4761 switch (cbMask)
4762 {
4763 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4764 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4765 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4766 case sizeof(uint64_t): break;
4767 default: AssertFailedBreak();
4768 }
4769#endif
4770
4771 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4772 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4773
4774 if (cbMask <= sizeof(uint32_t))
4775 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4776 else
4777 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4778
4779 iemNativeVarRegisterRelease(pReNative, idxVar);
4780 return off;
4781}
4782
4783
4784#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4785 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4786
4787#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4788 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4789
4790#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4791 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4792
4793/** Emits code for reversing the byte order in a local value. */
4794DECL_INLINE_THROW(uint32_t)
4795iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4796{
4797 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4798 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4799
4800 switch (cbLocal)
4801 {
4802 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4803 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4804 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4805 default: AssertFailedBreak();
4806 }
4807
4808 iemNativeVarRegisterRelease(pReNative, idxVar);
4809 return off;
4810}
4811
4812
4813#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4814 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4815
4816#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4817 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4818
4819#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4820 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4821
4822/** Emits code for shifting left a local value. */
4823DECL_INLINE_THROW(uint32_t)
4824iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4825{
4826#ifdef VBOX_STRICT
4827 switch (cbLocal)
4828 {
4829 case sizeof(uint8_t): Assert(cShift < 8); break;
4830 case sizeof(uint16_t): Assert(cShift < 16); break;
4831 case sizeof(uint32_t): Assert(cShift < 32); break;
4832 case sizeof(uint64_t): Assert(cShift < 64); break;
4833 default: AssertFailedBreak();
4834 }
4835#endif
4836
4837 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4838 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4839
4840 if (cbLocal <= sizeof(uint32_t))
4841 {
4842 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4843 if (cbLocal < sizeof(uint32_t))
4844 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4845 cbLocal == sizeof(uint16_t)
4846 ? UINT32_C(0xffff)
4847 : UINT32_C(0xff));
4848 }
4849 else
4850 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4851
4852 iemNativeVarRegisterRelease(pReNative, idxVar);
4853 return off;
4854}
4855
4856
4857#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4858 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4859
4860#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4861 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4862
4863#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4864 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4865
4866/** Emits code for shifting left a local value. */
4867DECL_INLINE_THROW(uint32_t)
4868iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4869{
4870#ifdef VBOX_STRICT
4871 switch (cbLocal)
4872 {
4873 case sizeof(int8_t): Assert(cShift < 8); break;
4874 case sizeof(int16_t): Assert(cShift < 16); break;
4875 case sizeof(int32_t): Assert(cShift < 32); break;
4876 case sizeof(int64_t): Assert(cShift < 64); break;
4877 default: AssertFailedBreak();
4878 }
4879#endif
4880
4881 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4882 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4883
4884 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4885 if (cbLocal == sizeof(uint8_t))
4886 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4887 else if (cbLocal == sizeof(uint16_t))
4888 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4889
4890 if (cbLocal <= sizeof(uint32_t))
4891 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4892 else
4893 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4894
4895 iemNativeVarRegisterRelease(pReNative, idxVar);
4896 return off;
4897}
4898
4899
4900#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4901 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4902
4903#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4904 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4905
4906#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4907 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4908
4909/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4910DECL_INLINE_THROW(uint32_t)
4911iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4912{
4913 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4914 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4915 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4916 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4917
4918 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4919 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4920
4921 /* Need to sign extend the value. */
4922 if (cbLocal <= sizeof(uint32_t))
4923 {
4924/** @todo ARM64: In case of boredone, the extended add instruction can do the
4925 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
4926 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4927
4928 switch (cbLocal)
4929 {
4930 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
4931 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
4932 default: AssertFailed();
4933 }
4934
4935 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
4936 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4937 }
4938 else
4939 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
4940
4941 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
4942 iemNativeVarRegisterRelease(pReNative, idxVar);
4943 return off;
4944}
4945
4946
4947
4948/*********************************************************************************************************************************
4949* EFLAGS *
4950*********************************************************************************************************************************/
4951
4952#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
4953# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
4954#else
4955# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
4956 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
4957
4958DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
4959{
4960 if (fEflOutput)
4961 {
4962 PVMCPUCC const pVCpu = pReNative->pVCpu;
4963# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4964 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
4965 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
4966 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
4967# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
4968 if (fEflOutput & (a_fEfl)) \
4969 { \
4970 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
4971 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
4972 else \
4973 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
4974 } else do { } while (0)
4975# else
4976 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
4977 IEMLIVENESSBIT const LivenessClobbered =
4978 {
4979 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4980 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4981 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
4982 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
4983 };
4984 IEMLIVENESSBIT const LivenessDelayable =
4985 {
4986 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4987 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
4988 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4989 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
4990 };
4991# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
4992 if (fEflOutput & (a_fEfl)) \
4993 { \
4994 if (LivenessClobbered.a_fLivenessMember) \
4995 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
4996 else if (LivenessDelayable.a_fLivenessMember) \
4997 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
4998 else \
4999 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5000 } else do { } while (0)
5001# endif
5002 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5003 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5004 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5005 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5006 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5007 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5008 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5009# undef CHECK_FLAG_AND_UPDATE_STATS
5010 }
5011 RT_NOREF(fEflInput);
5012}
5013#endif /* VBOX_WITH_STATISTICS */
5014
5015#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5016#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5017 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5018
5019/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5020DECL_INLINE_THROW(uint32_t)
5021iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5022 uint32_t fEflInput, uint32_t fEflOutput)
5023{
5024 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5025 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5026 RT_NOREF(fEflInput, fEflOutput);
5027
5028#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5029# ifdef VBOX_STRICT
5030 if ( pReNative->idxCurCall != 0
5031 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5032 {
5033 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5034 uint32_t const fBoth = fEflInput | fEflOutput;
5035# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5036 AssertMsg( !(fBoth & (a_fElfConst)) \
5037 || (!(fEflInput & (a_fElfConst)) \
5038 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5039 : !(fEflOutput & (a_fElfConst)) \
5040 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5041 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5042 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5043 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5044 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5045 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5046 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5047 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5048 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5049 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5050# undef ASSERT_ONE_EFL
5051 }
5052# endif
5053#endif
5054
5055 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5056
5057 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5058 * the existing shadow copy. */
5059 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5060 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5061 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5062 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5063 return off;
5064}
5065
5066
5067
5068/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5069 * start using it with custom native code emission (inlining assembly
5070 * instruction helpers). */
5071#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5072#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5073 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5074 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5075
5076#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5077#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5078 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5079 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5080
5081/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5082DECL_INLINE_THROW(uint32_t)
5083iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5084 bool fUpdateSkipping)
5085{
5086 RT_NOREF(fEflOutput);
5087 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5088 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5089
5090#ifdef VBOX_STRICT
5091 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5092 uint32_t offFixup = off;
5093 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5094 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5095 iemNativeFixupFixedJump(pReNative, offFixup, off);
5096
5097 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5098 offFixup = off;
5099 off = iemNativeEmitJzToFixed(pReNative, off, off);
5100 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5101 iemNativeFixupFixedJump(pReNative, offFixup, off);
5102
5103 /** @todo validate that only bits in the fElfOutput mask changed. */
5104#endif
5105
5106#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5107 if (fUpdateSkipping)
5108 {
5109 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5110 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5111 else
5112 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5113 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5114 }
5115#else
5116 RT_NOREF_PV(fUpdateSkipping);
5117#endif
5118
5119 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5120 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5121 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5122 return off;
5123}
5124
5125
5126typedef enum IEMNATIVEMITEFLOP
5127{
5128 kIemNativeEmitEflOp_Invalid = 0,
5129 kIemNativeEmitEflOp_Set,
5130 kIemNativeEmitEflOp_Clear,
5131 kIemNativeEmitEflOp_Flip
5132} IEMNATIVEMITEFLOP;
5133
5134#define IEM_MC_SET_EFL_BIT(a_fBit) \
5135 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5136
5137#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5138 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5139
5140#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5141 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5142
5143/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5144DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5145{
5146 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5147 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5148
5149 switch (enmOp)
5150 {
5151 case kIemNativeEmitEflOp_Set:
5152 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5153 break;
5154 case kIemNativeEmitEflOp_Clear:
5155 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5156 break;
5157 case kIemNativeEmitEflOp_Flip:
5158 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5159 break;
5160 default:
5161 AssertFailed();
5162 break;
5163 }
5164
5165 /** @todo No delayed writeback for EFLAGS right now. */
5166 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5167
5168 /* Free but don't flush the EFLAGS register. */
5169 iemNativeRegFreeTmp(pReNative, idxEflReg);
5170
5171 return off;
5172}
5173
5174
5175/*********************************************************************************************************************************
5176* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5177*********************************************************************************************************************************/
5178
5179#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5180 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5181
5182#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5183 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5184
5185#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5186 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5187
5188
5189/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5190 * IEM_MC_FETCH_SREG_ZX_U64. */
5191DECL_INLINE_THROW(uint32_t)
5192iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5193{
5194 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5195 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5196 Assert(iSReg < X86_SREG_COUNT);
5197
5198 /*
5199 * For now, we will not create a shadow copy of a selector. The rational
5200 * is that since we do not recompile the popping and loading of segment
5201 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5202 * pushing and moving to registers, there is only a small chance that the
5203 * shadow copy will be accessed again before the register is reloaded. One
5204 * scenario would be nested called in 16-bit code, but I doubt it's worth
5205 * the extra register pressure atm.
5206 *
5207 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5208 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5209 * store scencario covered at present (r160730).
5210 */
5211 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5212 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5213 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5214 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5215 return off;
5216}
5217
5218
5219
5220/*********************************************************************************************************************************
5221* Register references. *
5222*********************************************************************************************************************************/
5223
5224#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5225 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5226
5227#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5228 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5229
5230/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5231DECL_INLINE_THROW(uint32_t)
5232iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5233{
5234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5235 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5236 Assert(iGRegEx < 20);
5237
5238 if (iGRegEx < 16)
5239 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5240 else
5241 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5242
5243 /* If we've delayed writing back the register value, flush it now. */
5244 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5245
5246 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5247 if (!fConst)
5248 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5249
5250 return off;
5251}
5252
5253#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5254 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5255
5256#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5257 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5258
5259#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5260 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5261
5262#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5263 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5264
5265#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5266 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5267
5268#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5269 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5270
5271#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5272 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5273
5274#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5275 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5276
5277#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5278 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5279
5280#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5281 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5282
5283/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5284DECL_INLINE_THROW(uint32_t)
5285iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5286{
5287 Assert(iGReg < 16);
5288 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5289 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5290
5291 /* If we've delayed writing back the register value, flush it now. */
5292 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5293
5294 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5295 if (!fConst)
5296 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5297
5298 return off;
5299}
5300
5301
5302#undef IEM_MC_REF_EFLAGS /* should not be used. */
5303#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5304 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5305 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5306
5307/** Handles IEM_MC_REF_EFLAGS. */
5308DECL_INLINE_THROW(uint32_t)
5309iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5310{
5311 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5312 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5313
5314#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5315 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5316
5317 /* Updating the skipping according to the outputs is a little early, but
5318 we don't have any other hooks for references atm. */
5319 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5320 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5321 else if (fEflOutput & X86_EFL_STATUS_BITS)
5322 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5323 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5324#else
5325 RT_NOREF(fEflInput, fEflOutput);
5326#endif
5327
5328 /* If we've delayed writing back the register value, flush it now. */
5329 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5330
5331 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5332 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5333
5334 return off;
5335}
5336
5337
5338/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5339 * different code from threaded recompiler, maybe it would be helpful. For now
5340 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5341#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5342
5343
5344#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5345 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5346
5347#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5348 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5349
5350#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5351 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5352
5353#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5354/* Just being paranoid here. */
5355# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5356AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5357AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5358AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5359AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5360# endif
5361AssertCompileMemberOffset(X86XMMREG, au64, 0);
5362AssertCompileMemberOffset(X86XMMREG, au32, 0);
5363AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5364AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5365
5366# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5367 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5368# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5369 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5370# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5371 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5372# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5373 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5374#endif
5375
5376/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5377DECL_INLINE_THROW(uint32_t)
5378iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5379{
5380 Assert(iXReg < 16);
5381 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5382 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5383
5384 /* If we've delayed writing back the register value, flush it now. */
5385 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5386
5387#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5388 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5389 if (!fConst)
5390 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5391#else
5392 RT_NOREF(fConst);
5393#endif
5394
5395 return off;
5396}
5397
5398
5399
5400/*********************************************************************************************************************************
5401* Effective Address Calculation *
5402*********************************************************************************************************************************/
5403#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5404 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5405
5406/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5407 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5408DECL_INLINE_THROW(uint32_t)
5409iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5410 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5411{
5412 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5413
5414 /*
5415 * Handle the disp16 form with no registers first.
5416 *
5417 * Convert to an immediate value, as that'll delay the register allocation
5418 * and assignment till the memory access / call / whatever and we can use
5419 * a more appropriate register (or none at all).
5420 */
5421 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5422 {
5423 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5424 return off;
5425 }
5426
5427 /* Determin the displacment. */
5428 uint16_t u16EffAddr;
5429 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5430 {
5431 case 0: u16EffAddr = 0; break;
5432 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5433 case 2: u16EffAddr = u16Disp; break;
5434 default: AssertFailedStmt(u16EffAddr = 0);
5435 }
5436
5437 /* Determine the registers involved. */
5438 uint8_t idxGstRegBase;
5439 uint8_t idxGstRegIndex;
5440 switch (bRm & X86_MODRM_RM_MASK)
5441 {
5442 case 0:
5443 idxGstRegBase = X86_GREG_xBX;
5444 idxGstRegIndex = X86_GREG_xSI;
5445 break;
5446 case 1:
5447 idxGstRegBase = X86_GREG_xBX;
5448 idxGstRegIndex = X86_GREG_xDI;
5449 break;
5450 case 2:
5451 idxGstRegBase = X86_GREG_xBP;
5452 idxGstRegIndex = X86_GREG_xSI;
5453 break;
5454 case 3:
5455 idxGstRegBase = X86_GREG_xBP;
5456 idxGstRegIndex = X86_GREG_xDI;
5457 break;
5458 case 4:
5459 idxGstRegBase = X86_GREG_xSI;
5460 idxGstRegIndex = UINT8_MAX;
5461 break;
5462 case 5:
5463 idxGstRegBase = X86_GREG_xDI;
5464 idxGstRegIndex = UINT8_MAX;
5465 break;
5466 case 6:
5467 idxGstRegBase = X86_GREG_xBP;
5468 idxGstRegIndex = UINT8_MAX;
5469 break;
5470#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5471 default:
5472#endif
5473 case 7:
5474 idxGstRegBase = X86_GREG_xBX;
5475 idxGstRegIndex = UINT8_MAX;
5476 break;
5477 }
5478
5479 /*
5480 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5481 */
5482 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5483 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5484 kIemNativeGstRegUse_ReadOnly);
5485 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5486 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5487 kIemNativeGstRegUse_ReadOnly)
5488 : UINT8_MAX;
5489#ifdef RT_ARCH_AMD64
5490 if (idxRegIndex == UINT8_MAX)
5491 {
5492 if (u16EffAddr == 0)
5493 {
5494 /* movxz ret, base */
5495 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5496 }
5497 else
5498 {
5499 /* lea ret32, [base64 + disp32] */
5500 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5501 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5502 if (idxRegRet >= 8 || idxRegBase >= 8)
5503 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5504 pbCodeBuf[off++] = 0x8d;
5505 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5506 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5507 else
5508 {
5509 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5510 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5511 }
5512 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5513 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5514 pbCodeBuf[off++] = 0;
5515 pbCodeBuf[off++] = 0;
5516 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5517
5518 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5519 }
5520 }
5521 else
5522 {
5523 /* lea ret32, [index64 + base64 (+ disp32)] */
5524 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5525 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5526 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5527 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5528 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5529 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5530 pbCodeBuf[off++] = 0x8d;
5531 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5532 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5533 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5534 if (bMod == X86_MOD_MEM4)
5535 {
5536 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5537 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5538 pbCodeBuf[off++] = 0;
5539 pbCodeBuf[off++] = 0;
5540 }
5541 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5542 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5543 }
5544
5545#elif defined(RT_ARCH_ARM64)
5546 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5547 if (u16EffAddr == 0)
5548 {
5549 if (idxRegIndex == UINT8_MAX)
5550 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5551 else
5552 {
5553 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5554 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5555 }
5556 }
5557 else
5558 {
5559 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5560 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5561 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5562 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5563 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5564 else
5565 {
5566 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5567 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5568 }
5569 if (idxRegIndex != UINT8_MAX)
5570 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5571 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5572 }
5573
5574#else
5575# error "port me"
5576#endif
5577
5578 if (idxRegIndex != UINT8_MAX)
5579 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5580 iemNativeRegFreeTmp(pReNative, idxRegBase);
5581 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5582 return off;
5583}
5584
5585
5586#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5587 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5588
5589/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5590 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5591DECL_INLINE_THROW(uint32_t)
5592iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5593 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5594{
5595 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5596
5597 /*
5598 * Handle the disp32 form with no registers first.
5599 *
5600 * Convert to an immediate value, as that'll delay the register allocation
5601 * and assignment till the memory access / call / whatever and we can use
5602 * a more appropriate register (or none at all).
5603 */
5604 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5605 {
5606 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5607 return off;
5608 }
5609
5610 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5611 uint32_t u32EffAddr = 0;
5612 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5613 {
5614 case 0: break;
5615 case 1: u32EffAddr = (int8_t)u32Disp; break;
5616 case 2: u32EffAddr = u32Disp; break;
5617 default: AssertFailed();
5618 }
5619
5620 /* Get the register (or SIB) value. */
5621 uint8_t idxGstRegBase = UINT8_MAX;
5622 uint8_t idxGstRegIndex = UINT8_MAX;
5623 uint8_t cShiftIndex = 0;
5624 switch (bRm & X86_MODRM_RM_MASK)
5625 {
5626 case 0: idxGstRegBase = X86_GREG_xAX; break;
5627 case 1: idxGstRegBase = X86_GREG_xCX; break;
5628 case 2: idxGstRegBase = X86_GREG_xDX; break;
5629 case 3: idxGstRegBase = X86_GREG_xBX; break;
5630 case 4: /* SIB */
5631 {
5632 /* index /w scaling . */
5633 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5634 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5635 {
5636 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5637 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5638 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5639 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5640 case 4: cShiftIndex = 0; /*no index*/ break;
5641 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5642 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5643 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5644 }
5645
5646 /* base */
5647 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5648 {
5649 case 0: idxGstRegBase = X86_GREG_xAX; break;
5650 case 1: idxGstRegBase = X86_GREG_xCX; break;
5651 case 2: idxGstRegBase = X86_GREG_xDX; break;
5652 case 3: idxGstRegBase = X86_GREG_xBX; break;
5653 case 4:
5654 idxGstRegBase = X86_GREG_xSP;
5655 u32EffAddr += uSibAndRspOffset >> 8;
5656 break;
5657 case 5:
5658 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5659 idxGstRegBase = X86_GREG_xBP;
5660 else
5661 {
5662 Assert(u32EffAddr == 0);
5663 u32EffAddr = u32Disp;
5664 }
5665 break;
5666 case 6: idxGstRegBase = X86_GREG_xSI; break;
5667 case 7: idxGstRegBase = X86_GREG_xDI; break;
5668 }
5669 break;
5670 }
5671 case 5: idxGstRegBase = X86_GREG_xBP; break;
5672 case 6: idxGstRegBase = X86_GREG_xSI; break;
5673 case 7: idxGstRegBase = X86_GREG_xDI; break;
5674 }
5675
5676 /*
5677 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5678 * the start of the function.
5679 */
5680 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5681 {
5682 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5683 return off;
5684 }
5685
5686 /*
5687 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5688 */
5689 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5690 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5691 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5692 kIemNativeGstRegUse_ReadOnly);
5693 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5694 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5695 kIemNativeGstRegUse_ReadOnly);
5696
5697 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5698 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5699 {
5700 idxRegBase = idxRegIndex;
5701 idxRegIndex = UINT8_MAX;
5702 }
5703
5704#ifdef RT_ARCH_AMD64
5705 if (idxRegIndex == UINT8_MAX)
5706 {
5707 if (u32EffAddr == 0)
5708 {
5709 /* mov ret, base */
5710 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5711 }
5712 else
5713 {
5714 /* lea ret32, [base64 + disp32] */
5715 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5716 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5717 if (idxRegRet >= 8 || idxRegBase >= 8)
5718 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5719 pbCodeBuf[off++] = 0x8d;
5720 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5721 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5722 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5723 else
5724 {
5725 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5726 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5727 }
5728 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5729 if (bMod == X86_MOD_MEM4)
5730 {
5731 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5732 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5733 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5734 }
5735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5736 }
5737 }
5738 else
5739 {
5740 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5741 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5742 if (idxRegBase == UINT8_MAX)
5743 {
5744 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5745 if (idxRegRet >= 8 || idxRegIndex >= 8)
5746 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5747 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5748 pbCodeBuf[off++] = 0x8d;
5749 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5750 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5751 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5752 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5753 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5754 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5755 }
5756 else
5757 {
5758 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5759 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5760 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5761 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5762 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5763 pbCodeBuf[off++] = 0x8d;
5764 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5765 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5766 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5767 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5768 if (bMod != X86_MOD_MEM0)
5769 {
5770 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5771 if (bMod == X86_MOD_MEM4)
5772 {
5773 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5774 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5775 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5776 }
5777 }
5778 }
5779 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5780 }
5781
5782#elif defined(RT_ARCH_ARM64)
5783 if (u32EffAddr == 0)
5784 {
5785 if (idxRegIndex == UINT8_MAX)
5786 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5787 else if (idxRegBase == UINT8_MAX)
5788 {
5789 if (cShiftIndex == 0)
5790 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5791 else
5792 {
5793 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5794 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5795 }
5796 }
5797 else
5798 {
5799 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5800 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5801 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5802 }
5803 }
5804 else
5805 {
5806 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5807 {
5808 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5809 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5810 }
5811 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5812 {
5813 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5814 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5815 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5816 }
5817 else
5818 {
5819 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5820 if (idxRegBase != UINT8_MAX)
5821 {
5822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5823 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5824 }
5825 }
5826 if (idxRegIndex != UINT8_MAX)
5827 {
5828 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5829 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5830 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5831 }
5832 }
5833
5834#else
5835# error "port me"
5836#endif
5837
5838 if (idxRegIndex != UINT8_MAX)
5839 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5840 if (idxRegBase != UINT8_MAX)
5841 iemNativeRegFreeTmp(pReNative, idxRegBase);
5842 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5843 return off;
5844}
5845
5846
5847#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5848 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5849 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5850
5851#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5852 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5853 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5854
5855#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5856 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5857 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5858
5859/**
5860 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5861 *
5862 * @returns New off.
5863 * @param pReNative .
5864 * @param off .
5865 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5866 * bit 4 to REX.X. The two bits are part of the
5867 * REG sub-field, which isn't needed in this
5868 * function.
5869 * @param uSibAndRspOffset Two parts:
5870 * - The first 8 bits make up the SIB byte.
5871 * - The next 8 bits are the fixed RSP/ESP offset
5872 * in case of a pop [xSP].
5873 * @param u32Disp The displacement byte/word/dword, if any.
5874 * @param cbInstr The size of the fully decoded instruction. Used
5875 * for RIP relative addressing.
5876 * @param idxVarRet The result variable number.
5877 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5878 * when calculating the address.
5879 *
5880 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5881 */
5882DECL_INLINE_THROW(uint32_t)
5883iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5884 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5885{
5886 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5887
5888 /*
5889 * Special case the rip + disp32 form first.
5890 */
5891 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5892 {
5893#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5894 /* Need to take the current PC offset into account for the displacement, no need to flush here
5895 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5896 u32Disp += pReNative->Core.offPc;
5897#endif
5898
5899 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5900 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5901 kIemNativeGstRegUse_ReadOnly);
5902#ifdef RT_ARCH_AMD64
5903 if (f64Bit)
5904 {
5905 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5906 if ((int32_t)offFinalDisp == offFinalDisp)
5907 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5908 else
5909 {
5910 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5911 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5912 }
5913 }
5914 else
5915 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5916
5917#elif defined(RT_ARCH_ARM64)
5918 if (f64Bit)
5919 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5920 (int64_t)(int32_t)u32Disp + cbInstr);
5921 else
5922 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5923 (int32_t)u32Disp + cbInstr);
5924
5925#else
5926# error "Port me!"
5927#endif
5928 iemNativeRegFreeTmp(pReNative, idxRegPc);
5929 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5930 return off;
5931 }
5932
5933 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5934 int64_t i64EffAddr = 0;
5935 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5936 {
5937 case 0: break;
5938 case 1: i64EffAddr = (int8_t)u32Disp; break;
5939 case 2: i64EffAddr = (int32_t)u32Disp; break;
5940 default: AssertFailed();
5941 }
5942
5943 /* Get the register (or SIB) value. */
5944 uint8_t idxGstRegBase = UINT8_MAX;
5945 uint8_t idxGstRegIndex = UINT8_MAX;
5946 uint8_t cShiftIndex = 0;
5947 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
5948 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
5949 else /* SIB: */
5950 {
5951 /* index /w scaling . */
5952 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5953 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5954 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
5955 if (idxGstRegIndex == 4)
5956 {
5957 /* no index */
5958 cShiftIndex = 0;
5959 idxGstRegIndex = UINT8_MAX;
5960 }
5961
5962 /* base */
5963 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
5964 if (idxGstRegBase == 4)
5965 {
5966 /* pop [rsp] hack */
5967 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
5968 }
5969 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
5970 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
5971 {
5972 /* mod=0 and base=5 -> disp32, no base reg. */
5973 Assert(i64EffAddr == 0);
5974 i64EffAddr = (int32_t)u32Disp;
5975 idxGstRegBase = UINT8_MAX;
5976 }
5977 }
5978
5979 /*
5980 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5981 * the start of the function.
5982 */
5983 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5984 {
5985 if (f64Bit)
5986 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
5987 else
5988 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
5989 return off;
5990 }
5991
5992 /*
5993 * Now emit code that calculates:
5994 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5995 * or if !f64Bit:
5996 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5997 */
5998 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5999 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6000 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6001 kIemNativeGstRegUse_ReadOnly);
6002 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6003 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6004 kIemNativeGstRegUse_ReadOnly);
6005
6006 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6007 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6008 {
6009 idxRegBase = idxRegIndex;
6010 idxRegIndex = UINT8_MAX;
6011 }
6012
6013#ifdef RT_ARCH_AMD64
6014 uint8_t bFinalAdj;
6015 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6016 bFinalAdj = 0; /* likely */
6017 else
6018 {
6019 /* pop [rsp] with a problematic disp32 value. Split out the
6020 RSP offset and add it separately afterwards (bFinalAdj). */
6021 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6022 Assert(idxGstRegBase == X86_GREG_xSP);
6023 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6024 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6025 Assert(bFinalAdj != 0);
6026 i64EffAddr -= bFinalAdj;
6027 Assert((int32_t)i64EffAddr == i64EffAddr);
6028 }
6029 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6030//pReNative->pInstrBuf[off++] = 0xcc;
6031
6032 if (idxRegIndex == UINT8_MAX)
6033 {
6034 if (u32EffAddr == 0)
6035 {
6036 /* mov ret, base */
6037 if (f64Bit)
6038 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6039 else
6040 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6041 }
6042 else
6043 {
6044 /* lea ret, [base + disp32] */
6045 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6046 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6047 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6048 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6049 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6050 | (f64Bit ? X86_OP_REX_W : 0);
6051 pbCodeBuf[off++] = 0x8d;
6052 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6053 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6054 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6055 else
6056 {
6057 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6058 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6059 }
6060 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6061 if (bMod == X86_MOD_MEM4)
6062 {
6063 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6064 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6065 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6066 }
6067 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6068 }
6069 }
6070 else
6071 {
6072 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6073 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6074 if (idxRegBase == UINT8_MAX)
6075 {
6076 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6077 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6078 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6079 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6080 | (f64Bit ? X86_OP_REX_W : 0);
6081 pbCodeBuf[off++] = 0x8d;
6082 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6083 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6084 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6085 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6086 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6087 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6088 }
6089 else
6090 {
6091 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6092 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6093 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6094 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6095 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6096 | (f64Bit ? X86_OP_REX_W : 0);
6097 pbCodeBuf[off++] = 0x8d;
6098 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6099 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6100 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6101 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6102 if (bMod != X86_MOD_MEM0)
6103 {
6104 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6105 if (bMod == X86_MOD_MEM4)
6106 {
6107 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6108 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6109 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6110 }
6111 }
6112 }
6113 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6114 }
6115
6116 if (!bFinalAdj)
6117 { /* likely */ }
6118 else
6119 {
6120 Assert(f64Bit);
6121 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6122 }
6123
6124#elif defined(RT_ARCH_ARM64)
6125 if (i64EffAddr == 0)
6126 {
6127 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6128 if (idxRegIndex == UINT8_MAX)
6129 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6130 else if (idxRegBase != UINT8_MAX)
6131 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6132 f64Bit, false /*fSetFlags*/, cShiftIndex);
6133 else
6134 {
6135 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6136 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6137 }
6138 }
6139 else
6140 {
6141 if (f64Bit)
6142 { /* likely */ }
6143 else
6144 i64EffAddr = (int32_t)i64EffAddr;
6145
6146 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6147 {
6148 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6149 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6150 }
6151 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6152 {
6153 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6154 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6155 }
6156 else
6157 {
6158 if (f64Bit)
6159 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6160 else
6161 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6162 if (idxRegBase != UINT8_MAX)
6163 {
6164 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6165 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6166 }
6167 }
6168 if (idxRegIndex != UINT8_MAX)
6169 {
6170 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6171 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6172 f64Bit, false /*fSetFlags*/, cShiftIndex);
6173 }
6174 }
6175
6176#else
6177# error "port me"
6178#endif
6179
6180 if (idxRegIndex != UINT8_MAX)
6181 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6182 if (idxRegBase != UINT8_MAX)
6183 iemNativeRegFreeTmp(pReNative, idxRegBase);
6184 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6185 return off;
6186}
6187
6188
6189/*********************************************************************************************************************************
6190* Memory fetches and stores common *
6191*********************************************************************************************************************************/
6192
6193typedef enum IEMNATIVEMITMEMOP
6194{
6195 kIemNativeEmitMemOp_Store = 0,
6196 kIemNativeEmitMemOp_Fetch,
6197 kIemNativeEmitMemOp_Fetch_Zx_U16,
6198 kIemNativeEmitMemOp_Fetch_Zx_U32,
6199 kIemNativeEmitMemOp_Fetch_Zx_U64,
6200 kIemNativeEmitMemOp_Fetch_Sx_U16,
6201 kIemNativeEmitMemOp_Fetch_Sx_U32,
6202 kIemNativeEmitMemOp_Fetch_Sx_U64
6203} IEMNATIVEMITMEMOP;
6204
6205/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6206 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6207 * (with iSegReg = UINT8_MAX). */
6208DECL_INLINE_THROW(uint32_t)
6209iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6210 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
6211 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6212{
6213 /*
6214 * Assert sanity.
6215 */
6216 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6217 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6218 Assert( enmOp != kIemNativeEmitMemOp_Store
6219 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6220 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6222 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6223 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6224 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6225 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6226 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6227#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6228 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6229 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6230#else
6231 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6232#endif
6233 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6234#ifdef VBOX_STRICT
6235 if (iSegReg == UINT8_MAX)
6236 {
6237 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6238 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6239 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6240 switch (cbMem)
6241 {
6242 case 1:
6243 Assert( pfnFunction
6244 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6245 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6246 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6247 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6248 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6249 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6250 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6251 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6252 : UINT64_C(0xc000b000a0009000) ));
6253 break;
6254 case 2:
6255 Assert( pfnFunction
6256 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6257 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6258 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6259 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6260 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6261 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6262 : UINT64_C(0xc000b000a0009000) ));
6263 break;
6264 case 4:
6265 Assert( pfnFunction
6266 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6267 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6268 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6269 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6270 : UINT64_C(0xc000b000a0009000) ));
6271 break;
6272 case 8:
6273 Assert( pfnFunction
6274 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6275 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6276 : UINT64_C(0xc000b000a0009000) ));
6277 break;
6278#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6279 case sizeof(RTUINT128U):
6280 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6281 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6282 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6283 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6284 || ( enmOp == kIemNativeEmitMemOp_Store
6285 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6286 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6287 break;
6288 case sizeof(RTUINT256U):
6289 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6290 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6291 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6292 || ( enmOp == kIemNativeEmitMemOp_Store
6293 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6294 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6295 break;
6296#endif
6297 }
6298 }
6299 else
6300 {
6301 Assert(iSegReg < 6);
6302 switch (cbMem)
6303 {
6304 case 1:
6305 Assert( pfnFunction
6306 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6307 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6308 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6309 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6310 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6311 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6312 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6313 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6314 : UINT64_C(0xc000b000a0009000) ));
6315 break;
6316 case 2:
6317 Assert( pfnFunction
6318 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6319 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6320 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6321 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6322 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6323 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6324 : UINT64_C(0xc000b000a0009000) ));
6325 break;
6326 case 4:
6327 Assert( pfnFunction
6328 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6329 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6330 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6331 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6332 : UINT64_C(0xc000b000a0009000) ));
6333 break;
6334 case 8:
6335 Assert( pfnFunction
6336 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6337 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6338 : UINT64_C(0xc000b000a0009000) ));
6339 break;
6340#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6341 case sizeof(RTUINT128U):
6342 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6343 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6344 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6345 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6346 || ( enmOp == kIemNativeEmitMemOp_Store
6347 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6348 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6349 break;
6350 case sizeof(RTUINT256U):
6351 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6352 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6353 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6354 || ( enmOp == kIemNativeEmitMemOp_Store
6355 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6356 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6357 break;
6358#endif
6359 }
6360 }
6361#endif
6362
6363#ifdef VBOX_STRICT
6364 /*
6365 * Check that the fExec flags we've got make sense.
6366 */
6367 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6368#endif
6369
6370 /*
6371 * To keep things simple we have to commit any pending writes first as we
6372 * may end up making calls.
6373 */
6374 /** @todo we could postpone this till we make the call and reload the
6375 * registers after returning from the call. Not sure if that's sensible or
6376 * not, though. */
6377#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6378 off = iemNativeRegFlushPendingWrites(pReNative, off);
6379#else
6380 /* The program counter is treated differently for now. */
6381 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6382#endif
6383
6384#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6385 /*
6386 * Move/spill/flush stuff out of call-volatile registers.
6387 * This is the easy way out. We could contain this to the tlb-miss branch
6388 * by saving and restoring active stuff here.
6389 */
6390 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6391#endif
6392
6393 /*
6394 * Define labels and allocate the result register (trying for the return
6395 * register if we can).
6396 */
6397 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6398#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6399 uint8_t idxRegValueFetch = UINT8_MAX;
6400
6401 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6402 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6403 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6404 else
6405 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6406 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6407 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6408 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6409#else
6410 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6411 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6412 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6413 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6414#endif
6415 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6416
6417#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6418 uint8_t idxRegValueStore = UINT8_MAX;
6419
6420 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6421 idxRegValueStore = !TlbState.fSkip
6422 && enmOp == kIemNativeEmitMemOp_Store
6423 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6424 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6425 : UINT8_MAX;
6426 else
6427 idxRegValueStore = !TlbState.fSkip
6428 && enmOp == kIemNativeEmitMemOp_Store
6429 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6430 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6431 : UINT8_MAX;
6432
6433#else
6434 uint8_t const idxRegValueStore = !TlbState.fSkip
6435 && enmOp == kIemNativeEmitMemOp_Store
6436 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6437 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6438 : UINT8_MAX;
6439#endif
6440 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6441 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6442 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6443 : UINT32_MAX;
6444
6445 /*
6446 * Jump to the TLB lookup code.
6447 */
6448 if (!TlbState.fSkip)
6449 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6450
6451 /*
6452 * TlbMiss:
6453 *
6454 * Call helper to do the fetching.
6455 * We flush all guest register shadow copies here.
6456 */
6457 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6458
6459#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6460 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6461#else
6462 RT_NOREF(idxInstr);
6463#endif
6464
6465#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6466 if (pReNative->Core.offPc)
6467 {
6468 /*
6469 * Update the program counter but restore it at the end of the TlbMiss branch.
6470 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6471 * which are hopefully much more frequent, reducing the amount of memory accesses.
6472 */
6473 /* Allocate a temporary PC register. */
6474 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6475
6476 /* Perform the addition and store the result. */
6477 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6478 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6479
6480 /* Free and flush the PC register. */
6481 iemNativeRegFreeTmp(pReNative, idxPcReg);
6482 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6483 }
6484#endif
6485
6486#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6487 /* Save variables in volatile registers. */
6488 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6489 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6490 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6491 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6492#endif
6493
6494 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6495 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6496#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6497 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6498 {
6499 /*
6500 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6501 *
6502 * @note There was a register variable assigned to the variable for the TlbLookup case above
6503 * which must not be freed or the value loaded into the register will not be synced into the register
6504 * further down the road because the variable doesn't know it had a variable assigned.
6505 *
6506 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6507 * as it will be overwritten anyway.
6508 */
6509 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6510 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6511 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6512 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6513 }
6514 else
6515#endif
6516 if (enmOp == kIemNativeEmitMemOp_Store)
6517 {
6518 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6519 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6520#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6521 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6522#else
6523 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6524 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6525#endif
6526 }
6527
6528 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6529 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6530#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6531 fVolGregMask);
6532#else
6533 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6534#endif
6535
6536 if (iSegReg != UINT8_MAX)
6537 {
6538 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6539 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6540 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6541 }
6542
6543 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6544 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6545
6546 /* Done setting up parameters, make the call. */
6547 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6548
6549 /*
6550 * Put the result in the right register if this is a fetch.
6551 */
6552 if (enmOp != kIemNativeEmitMemOp_Store)
6553 {
6554#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6555 if ( cbMem == sizeof(RTUINT128U)
6556 || cbMem == sizeof(RTUINT256U))
6557 {
6558 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6559
6560 /* Sync the value on the stack with the host register assigned to the variable. */
6561 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6562 }
6563 else
6564#endif
6565 {
6566 Assert(idxRegValueFetch == pVarValue->idxReg);
6567 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6568 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6569 }
6570 }
6571
6572#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6573 /* Restore variables and guest shadow registers to volatile registers. */
6574 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6575 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6576#endif
6577
6578#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6579 if (pReNative->Core.offPc)
6580 {
6581 /*
6582 * Time to restore the program counter to its original value.
6583 */
6584 /* Allocate a temporary PC register. */
6585 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6586
6587 /* Restore the original value. */
6588 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6589 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6590
6591 /* Free and flush the PC register. */
6592 iemNativeRegFreeTmp(pReNative, idxPcReg);
6593 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6594 }
6595#endif
6596
6597#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6598 if (!TlbState.fSkip)
6599 {
6600 /* end of TlbMiss - Jump to the done label. */
6601 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6602 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6603
6604 /*
6605 * TlbLookup:
6606 */
6607 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
6608 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6609 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6610
6611 /*
6612 * Emit code to do the actual storing / fetching.
6613 */
6614 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6615# ifdef VBOX_WITH_STATISTICS
6616 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6617 enmOp == kIemNativeEmitMemOp_Store
6618 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6619 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6620# endif
6621 switch (enmOp)
6622 {
6623 case kIemNativeEmitMemOp_Store:
6624 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6625 {
6626 switch (cbMem)
6627 {
6628 case 1:
6629 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6630 break;
6631 case 2:
6632 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6633 break;
6634 case 4:
6635 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6636 break;
6637 case 8:
6638 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6639 break;
6640#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6641 case sizeof(RTUINT128U):
6642 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6643 break;
6644 case sizeof(RTUINT256U):
6645 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6646 break;
6647#endif
6648 default:
6649 AssertFailed();
6650 }
6651 }
6652 else
6653 {
6654 switch (cbMem)
6655 {
6656 case 1:
6657 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6658 idxRegMemResult, TlbState.idxReg1);
6659 break;
6660 case 2:
6661 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6662 idxRegMemResult, TlbState.idxReg1);
6663 break;
6664 case 4:
6665 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6666 idxRegMemResult, TlbState.idxReg1);
6667 break;
6668 case 8:
6669 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6670 idxRegMemResult, TlbState.idxReg1);
6671 break;
6672 default:
6673 AssertFailed();
6674 }
6675 }
6676 break;
6677
6678 case kIemNativeEmitMemOp_Fetch:
6679 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6680 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6681 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6682 switch (cbMem)
6683 {
6684 case 1:
6685 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6686 break;
6687 case 2:
6688 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6689 break;
6690 case 4:
6691 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6692 break;
6693 case 8:
6694 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6695 break;
6696#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6697 case sizeof(RTUINT128U):
6698 /*
6699 * No need to sync back the register with the stack, this is done by the generic variable handling
6700 * code if there is a register assigned to a variable and the stack must be accessed.
6701 */
6702 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6703 break;
6704 case sizeof(RTUINT256U):
6705 /*
6706 * No need to sync back the register with the stack, this is done by the generic variable handling
6707 * code if there is a register assigned to a variable and the stack must be accessed.
6708 */
6709 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6710 break;
6711#endif
6712 default:
6713 AssertFailed();
6714 }
6715 break;
6716
6717 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6718 Assert(cbMem == 1);
6719 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6720 break;
6721
6722 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6723 Assert(cbMem == 1 || cbMem == 2);
6724 if (cbMem == 1)
6725 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6726 else
6727 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6728 break;
6729
6730 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6731 switch (cbMem)
6732 {
6733 case 1:
6734 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6735 break;
6736 case 2:
6737 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6738 break;
6739 case 4:
6740 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6741 break;
6742 default:
6743 AssertFailed();
6744 }
6745 break;
6746
6747 default:
6748 AssertFailed();
6749 }
6750
6751 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6752
6753 /*
6754 * TlbDone:
6755 */
6756 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6757
6758 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6759
6760# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6761 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6762 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6763# endif
6764 }
6765#else
6766 RT_NOREF(fAlignMask, idxLabelTlbMiss);
6767#endif
6768
6769 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6770 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6771 return off;
6772}
6773
6774
6775
6776/*********************************************************************************************************************************
6777* Memory fetches (IEM_MEM_FETCH_XXX). *
6778*********************************************************************************************************************************/
6779
6780/* 8-bit segmented: */
6781#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6782 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6783 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
6784 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6785
6786#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6787 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6788 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6789 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6790
6791#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6792 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6793 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6794 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6795
6796#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6797 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6798 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6799 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6800
6801#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6802 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6803 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6804 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6805
6806#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6807 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6808 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6809 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6810
6811#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6812 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6813 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6814 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6815
6816/* 16-bit segmented: */
6817#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6818 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6819 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6820 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6821
6822#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6823 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6824 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6825 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6826
6827#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6828 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6829 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6830 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6831
6832#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6833 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6834 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6835 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6836
6837#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6838 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6839 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6840 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6841
6842#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6843 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6844 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6845 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6846
6847
6848/* 32-bit segmented: */
6849#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6850 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6851 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6852 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6853
6854#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6855 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6856 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6857 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6858
6859#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6860 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6861 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6862 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6863
6864#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6865 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6866 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6867 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6868
6869#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6870 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6871 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6872 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6873
6874#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6876 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6877 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6878
6879#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6880 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6881 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6882 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6883
6884AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6885#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6886 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6887 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6888 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6889
6890
6891/* 64-bit segmented: */
6892#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6893 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6894 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6895 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6896
6897AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
6898#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
6899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
6900 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6901 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6902
6903
6904/* 8-bit flat: */
6905#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
6906 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
6907 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
6908 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6909
6910#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
6911 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6912 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6913 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6914
6915#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
6916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6917 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6918 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6919
6920#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
6921 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6922 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6923 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6924
6925#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
6926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6927 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6928 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6929
6930#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
6931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6932 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6933 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6934
6935#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
6936 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6937 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6938 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6939
6940
6941/* 16-bit flat: */
6942#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
6943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6944 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6945 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
6946
6947#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
6948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6949 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6950 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6951
6952#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
6953 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6954 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6955 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
6956
6957#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
6958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6959 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6960 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
6961
6962#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
6963 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6964 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6965 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6966
6967#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
6968 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6969 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6970 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6971
6972/* 32-bit flat: */
6973#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
6974 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6975 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6976 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6977
6978#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
6979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6980 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6981 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6982
6983#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
6984 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6985 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6986 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
6987
6988#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
6989 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6990 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6991 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6992
6993#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
6994 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
6995 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6996 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6997
6998#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
6999 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7000 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7001 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7002
7003#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7004 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7005 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7006 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7007
7008#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7009 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7010 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7011 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7012
7013
7014/* 64-bit flat: */
7015#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7016 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7017 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7018 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7019
7020#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7022 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7023 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7024
7025#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7026/* 128-bit segmented: */
7027#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7028 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7029 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7030 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7031
7032#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7033 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7034 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7035 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7036
7037AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7038#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7039 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
7040 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7041 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7042
7043#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7045 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7046 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7047
7048/* 128-bit flat: */
7049#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7050 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7051 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7052 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7053
7054#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7055 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7056 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7057 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7058
7059#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7061 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7062 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7063
7064#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7066 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7067 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7068
7069/* 256-bit segmented: */
7070#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7071 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7072 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7073 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7074
7075#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7076 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7077 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7078 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7079
7080#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7081 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7082 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7083 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7084
7085
7086/* 256-bit flat: */
7087#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7088 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7089 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7090 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7091
7092#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7093 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7094 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7095 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7096
7097#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7098 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7099 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7100 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7101#endif
7102
7103
7104/*********************************************************************************************************************************
7105* Memory stores (IEM_MEM_STORE_XXX). *
7106*********************************************************************************************************************************/
7107
7108#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7110 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
7111 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7112
7113#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7114 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7115 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7116 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7117
7118#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7119 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7120 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7121 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7122
7123#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7125 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7126 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7127
7128
7129#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7130 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7131 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
7132 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7133
7134#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7135 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7136 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7137 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7138
7139#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7140 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7141 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7142 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7143
7144#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7145 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7146 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7147 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7148
7149
7150#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7151 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7152 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7153
7154#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7155 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7156 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7157
7158#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7159 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7160 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7161
7162#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7163 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7164 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7165
7166
7167#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7168 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7169 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7170
7171#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7172 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7173 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7174
7175#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7176 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7177 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7178
7179#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7180 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7181 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7182
7183/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7184 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7185DECL_INLINE_THROW(uint32_t)
7186iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7187 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7188{
7189 /*
7190 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7191 * to do the grunt work.
7192 */
7193 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7194 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7195 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7196 pfnFunction, idxInstr);
7197 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7198 return off;
7199}
7200
7201
7202#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7203# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7204 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7205 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7206 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7207
7208# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7209 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7210 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7211 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7212
7213# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7214 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7215 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7216 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7217
7218# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7219 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7220 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7221 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7222
7223
7224# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7225 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7226 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7227 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7228
7229# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7230 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7231 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7232 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7233
7234# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7235 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7236 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7237 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7238
7239# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7240 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7241 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7242 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7243#endif
7244
7245
7246
7247/*********************************************************************************************************************************
7248* Stack Accesses. *
7249*********************************************************************************************************************************/
7250/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7251#define IEM_MC_PUSH_U16(a_u16Value) \
7252 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7253 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7254#define IEM_MC_PUSH_U32(a_u32Value) \
7255 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7256 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7257#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7258 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7259 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7260#define IEM_MC_PUSH_U64(a_u64Value) \
7261 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7262 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7263
7264#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7265 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7266 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7267#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7268 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7269 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7270#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7271 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7272 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7273
7274#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7275 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7276 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7277#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7278 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7279 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7280
7281
7282/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7283DECL_INLINE_THROW(uint32_t)
7284iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7285 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7286{
7287 /*
7288 * Assert sanity.
7289 */
7290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7291 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7292#ifdef VBOX_STRICT
7293 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7294 {
7295 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7296 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7297 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7298 Assert( pfnFunction
7299 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7300 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7301 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7302 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7303 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7304 : UINT64_C(0xc000b000a0009000) ));
7305 }
7306 else
7307 Assert( pfnFunction
7308 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7309 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7310 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7311 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7312 : UINT64_C(0xc000b000a0009000) ));
7313#endif
7314
7315#ifdef VBOX_STRICT
7316 /*
7317 * Check that the fExec flags we've got make sense.
7318 */
7319 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7320#endif
7321
7322 /*
7323 * To keep things simple we have to commit any pending writes first as we
7324 * may end up making calls.
7325 */
7326 /** @todo we could postpone this till we make the call and reload the
7327 * registers after returning from the call. Not sure if that's sensible or
7328 * not, though. */
7329 off = iemNativeRegFlushPendingWrites(pReNative, off);
7330
7331 /*
7332 * First we calculate the new RSP and the effective stack pointer value.
7333 * For 64-bit mode and flat 32-bit these two are the same.
7334 * (Code structure is very similar to that of PUSH)
7335 */
7336 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7337 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7338 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7339 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7340 ? cbMem : sizeof(uint16_t);
7341 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7342 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7343 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7344 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7345 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7346 if (cBitsFlat != 0)
7347 {
7348 Assert(idxRegEffSp == idxRegRsp);
7349 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7350 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7351 if (cBitsFlat == 64)
7352 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7353 else
7354 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7355 }
7356 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7357 {
7358 Assert(idxRegEffSp != idxRegRsp);
7359 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7360 kIemNativeGstRegUse_ReadOnly);
7361#ifdef RT_ARCH_AMD64
7362 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7363#else
7364 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7365#endif
7366 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7367 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7368 offFixupJumpToUseOtherBitSp = off;
7369 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7370 {
7371 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7372 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7373 }
7374 else
7375 {
7376 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7377 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7378 }
7379 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7380 }
7381 /* SpUpdateEnd: */
7382 uint32_t const offLabelSpUpdateEnd = off;
7383
7384 /*
7385 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7386 * we're skipping lookup).
7387 */
7388 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7389 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7390 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7391 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7392 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7393 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7394 : UINT32_MAX;
7395 uint8_t const idxRegValue = !TlbState.fSkip
7396 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7397 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7398 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7399 : UINT8_MAX;
7400 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7401
7402
7403 if (!TlbState.fSkip)
7404 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7405 else
7406 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7407
7408 /*
7409 * Use16BitSp:
7410 */
7411 if (cBitsFlat == 0)
7412 {
7413#ifdef RT_ARCH_AMD64
7414 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7415#else
7416 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7417#endif
7418 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7419 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7420 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7421 else
7422 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7423 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7424 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7425 }
7426
7427 /*
7428 * TlbMiss:
7429 *
7430 * Call helper to do the pushing.
7431 */
7432 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7433
7434#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7435 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7436#else
7437 RT_NOREF(idxInstr);
7438#endif
7439
7440 /* Save variables in volatile registers. */
7441 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7442 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7443 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7444 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7445 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7446
7447 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7448 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7449 {
7450 /* Swap them using ARG0 as temp register: */
7451 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7452 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7453 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7454 }
7455 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7456 {
7457 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7458 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7459 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7460
7461 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7462 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7463 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7464 }
7465 else
7466 {
7467 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7468 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7469
7470 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7471 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7472 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
7473 }
7474
7475 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7476 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7477
7478 /* Done setting up parameters, make the call. */
7479 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7480
7481 /* Restore variables and guest shadow registers to volatile registers. */
7482 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7483 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7484
7485#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7486 if (!TlbState.fSkip)
7487 {
7488 /* end of TlbMiss - Jump to the done label. */
7489 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7490 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7491
7492 /*
7493 * TlbLookup:
7494 */
7495 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7496 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7497
7498 /*
7499 * Emit code to do the actual storing / fetching.
7500 */
7501 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7502# ifdef VBOX_WITH_STATISTICS
7503 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7504 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7505# endif
7506 if (idxRegValue != UINT8_MAX)
7507 {
7508 switch (cbMemAccess)
7509 {
7510 case 2:
7511 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7512 break;
7513 case 4:
7514 if (!fIsIntelSeg)
7515 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7516 else
7517 {
7518 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7519 PUSH FS in real mode, so we have to try emulate that here.
7520 We borrow the now unused idxReg1 from the TLB lookup code here. */
7521 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7522 kIemNativeGstReg_EFlags);
7523 if (idxRegEfl != UINT8_MAX)
7524 {
7525#ifdef ARCH_AMD64
7526 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7527 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7528 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7529#else
7530 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7531 off, TlbState.idxReg1, idxRegEfl,
7532 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7533#endif
7534 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7535 }
7536 else
7537 {
7538 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7539 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7540 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7541 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7542 }
7543 /* ASSUMES the upper half of idxRegValue is ZERO. */
7544 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7545 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7546 }
7547 break;
7548 case 8:
7549 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7550 break;
7551 default:
7552 AssertFailed();
7553 }
7554 }
7555 else
7556 {
7557 switch (cbMemAccess)
7558 {
7559 case 2:
7560 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7561 idxRegMemResult, TlbState.idxReg1);
7562 break;
7563 case 4:
7564 Assert(!fIsSegReg);
7565 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7566 idxRegMemResult, TlbState.idxReg1);
7567 break;
7568 case 8:
7569 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7570 break;
7571 default:
7572 AssertFailed();
7573 }
7574 }
7575
7576 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7577 TlbState.freeRegsAndReleaseVars(pReNative);
7578
7579 /*
7580 * TlbDone:
7581 *
7582 * Commit the new RSP value.
7583 */
7584 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7585 }
7586#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7587
7588#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7589 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7590#endif
7591 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7592 if (idxRegEffSp != idxRegRsp)
7593 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7594
7595 /* The value variable is implictly flushed. */
7596 if (idxRegValue != UINT8_MAX)
7597 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7598 iemNativeVarFreeLocal(pReNative, idxVarValue);
7599
7600 return off;
7601}
7602
7603
7604
7605/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7606#define IEM_MC_POP_GREG_U16(a_iGReg) \
7607 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7608 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7609#define IEM_MC_POP_GREG_U32(a_iGReg) \
7610 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7611 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7612#define IEM_MC_POP_GREG_U64(a_iGReg) \
7613 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7614 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7615
7616#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7617 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7618 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7619#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7620 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7621 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7622
7623#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7624 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7625 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7626#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7627 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7628 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7629
7630
7631DECL_FORCE_INLINE_THROW(uint32_t)
7632iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7633 uint8_t idxRegTmp)
7634{
7635 /* Use16BitSp: */
7636#ifdef RT_ARCH_AMD64
7637 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7638 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7639 RT_NOREF(idxRegTmp);
7640#else
7641 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7642 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7643 /* add tmp, regrsp, #cbMem */
7644 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7645 /* and tmp, tmp, #0xffff */
7646 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7647 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7648 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7649 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7650#endif
7651 return off;
7652}
7653
7654
7655DECL_FORCE_INLINE(uint32_t)
7656iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7657{
7658 /* Use32BitSp: */
7659 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7660 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7661 return off;
7662}
7663
7664
7665/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7666DECL_INLINE_THROW(uint32_t)
7667iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7668 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7669{
7670 /*
7671 * Assert sanity.
7672 */
7673 Assert(idxGReg < 16);
7674#ifdef VBOX_STRICT
7675 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7676 {
7677 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7678 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7679 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7680 Assert( pfnFunction
7681 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7682 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7683 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7684 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7685 : UINT64_C(0xc000b000a0009000) ));
7686 }
7687 else
7688 Assert( pfnFunction
7689 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7690 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7691 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7692 : UINT64_C(0xc000b000a0009000) ));
7693#endif
7694
7695#ifdef VBOX_STRICT
7696 /*
7697 * Check that the fExec flags we've got make sense.
7698 */
7699 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7700#endif
7701
7702 /*
7703 * To keep things simple we have to commit any pending writes first as we
7704 * may end up making calls.
7705 */
7706 off = iemNativeRegFlushPendingWrites(pReNative, off);
7707
7708 /*
7709 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7710 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7711 * directly as the effective stack pointer.
7712 * (Code structure is very similar to that of PUSH)
7713 */
7714 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7715 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7716 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7717 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7718 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7719 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7720 * will be the resulting register value. */
7721 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7722
7723 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7724 if (cBitsFlat != 0)
7725 {
7726 Assert(idxRegEffSp == idxRegRsp);
7727 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7728 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7729 }
7730 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7731 {
7732 Assert(idxRegEffSp != idxRegRsp);
7733 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7734 kIemNativeGstRegUse_ReadOnly);
7735#ifdef RT_ARCH_AMD64
7736 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7737#else
7738 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7739#endif
7740 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7741 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7742 offFixupJumpToUseOtherBitSp = off;
7743 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7744 {
7745/** @todo can skip idxRegRsp updating when popping ESP. */
7746 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7747 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7748 }
7749 else
7750 {
7751 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7752 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7753 }
7754 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7755 }
7756 /* SpUpdateEnd: */
7757 uint32_t const offLabelSpUpdateEnd = off;
7758
7759 /*
7760 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7761 * we're skipping lookup).
7762 */
7763 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7764 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7765 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7766 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7767 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7768 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7769 : UINT32_MAX;
7770
7771 if (!TlbState.fSkip)
7772 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7773 else
7774 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7775
7776 /*
7777 * Use16BitSp:
7778 */
7779 if (cBitsFlat == 0)
7780 {
7781#ifdef RT_ARCH_AMD64
7782 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7783#else
7784 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7785#endif
7786 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7787 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7788 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7789 else
7790 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7791 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7793 }
7794
7795 /*
7796 * TlbMiss:
7797 *
7798 * Call helper to do the pushing.
7799 */
7800 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7801
7802#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7803 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7804#else
7805 RT_NOREF(idxInstr);
7806#endif
7807
7808 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7809 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7810 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7811 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7812
7813
7814 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7815 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7816 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7817
7818 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7819 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7820
7821 /* Done setting up parameters, make the call. */
7822 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7823
7824 /* Move the return register content to idxRegMemResult. */
7825 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7827
7828 /* Restore variables and guest shadow registers to volatile registers. */
7829 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7830 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7831
7832#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7833 if (!TlbState.fSkip)
7834 {
7835 /* end of TlbMiss - Jump to the done label. */
7836 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7837 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7838
7839 /*
7840 * TlbLookup:
7841 */
7842 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7843 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7844
7845 /*
7846 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7847 */
7848 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7849# ifdef VBOX_WITH_STATISTICS
7850 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7851 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7852# endif
7853 switch (cbMem)
7854 {
7855 case 2:
7856 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7857 break;
7858 case 4:
7859 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7860 break;
7861 case 8:
7862 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7863 break;
7864 default:
7865 AssertFailed();
7866 }
7867
7868 TlbState.freeRegsAndReleaseVars(pReNative);
7869
7870 /*
7871 * TlbDone:
7872 *
7873 * Set the new RSP value (FLAT accesses needs to calculate it first) and
7874 * commit the popped register value.
7875 */
7876 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7877 }
7878#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7879
7880 if (idxGReg != X86_GREG_xSP)
7881 {
7882 /* Set the register. */
7883 if (cbMem >= sizeof(uint32_t))
7884 {
7885#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7886 AssertMsg( pReNative->idxCurCall == 0
7887 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
7888 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
7889#endif
7890 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
7891#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7892 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
7893#endif
7894#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7895 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
7896 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7897#endif
7898 }
7899 else
7900 {
7901 Assert(cbMem == sizeof(uint16_t));
7902 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
7903 kIemNativeGstRegUse_ForUpdate);
7904 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
7905#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7906 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7907#endif
7908 iemNativeRegFreeTmp(pReNative, idxRegDst);
7909 }
7910
7911 /* Complete RSP calculation for FLAT mode. */
7912 if (idxRegEffSp == idxRegRsp)
7913 {
7914 if (cBitsFlat == 64)
7915 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
7916 else
7917 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
7918 }
7919 }
7920 else
7921 {
7922 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
7923 if (cbMem == sizeof(uint64_t))
7924 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
7925 else if (cbMem == sizeof(uint32_t))
7926 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
7927 else
7928 {
7929 if (idxRegEffSp == idxRegRsp)
7930 {
7931 if (cBitsFlat == 64)
7932 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
7933 else
7934 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
7935 }
7936 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
7937 }
7938 }
7939
7940#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7941 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
7942#endif
7943
7944 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7945 if (idxRegEffSp != idxRegRsp)
7946 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7947 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7948
7949 return off;
7950}
7951
7952
7953
7954/*********************************************************************************************************************************
7955* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
7956*********************************************************************************************************************************/
7957
7958#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7959 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7960 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
7961 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
7962
7963#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7964 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7965 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7966 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
7967
7968#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7969 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7970 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7971 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
7972
7973#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7974 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7975 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7976 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
7977
7978
7979#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7980 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7981 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7982 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
7983
7984#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7985 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7986 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7987 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
7988
7989#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7990 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7991 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7992 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
7993
7994#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7995 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7996 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7997 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
7998
7999#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8000 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8001 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8002 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8003
8004
8005#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8006 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8007 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8008 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8009
8010#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8011 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8012 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8013 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8014
8015#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8016 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8017 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8018 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8019
8020#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8021 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8022 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8023 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8024
8025#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8026 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8027 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8028 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8029
8030
8031#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8032 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8033 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8034 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8035
8036#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8037 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8038 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8039 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8040#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8042 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8043 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8044
8045#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8046 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8047 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8048 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8049
8050#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8051 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8052 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8053 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8054
8055
8056#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8057 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8058 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8059 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8060
8061#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8062 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8063 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
8064 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8065
8066
8067#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8068 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8069 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8070 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8071
8072#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8073 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8074 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8075 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8076
8077#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8078 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8079 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8080 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8081
8082#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8083 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8084 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8085 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8086
8087
8088
8089#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8090 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8091 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
8092 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8093
8094#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8095 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8096 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
8097 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8098
8099#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8100 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8101 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
8102 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8103
8104#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8105 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8106 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
8107 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8108
8109
8110#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8111 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8112 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8113 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8114
8115#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8116 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8117 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8118 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8119
8120#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8121 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8122 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8123 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8124
8125#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8126 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8127 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8128 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8129
8130#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8132 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8133 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8134
8135
8136#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8137 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8138 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8139 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8140
8141#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8142 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8143 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8144 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8145
8146#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8147 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8148 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8149 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8150
8151#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8152 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8153 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8154 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8155
8156#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8157 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8158 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8159 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8160
8161
8162#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8163 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8164 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8165 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8166
8167#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8168 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8169 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8170 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8171
8172#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8173 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8174 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8175 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8176
8177#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8178 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8179 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8180 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8181
8182#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8183 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8184 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8185 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8186
8187
8188#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8189 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8190 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8191 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8192
8193#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8194 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8195 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
8196 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8197
8198
8199#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8201 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8202 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8203
8204#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8205 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8206 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8207 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8208
8209#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8210 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8211 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8212 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8213
8214#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8215 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8216 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8217 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8218
8219
8220DECL_INLINE_THROW(uint32_t)
8221iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8222 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
8223 uintptr_t pfnFunction, uint8_t idxInstr)
8224{
8225 /*
8226 * Assert sanity.
8227 */
8228 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8229 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8230 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8231 && pVarMem->cbVar == sizeof(void *),
8232 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8233
8234 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8236 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8237 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8238 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8239
8240 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8241 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8242 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8243 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8244 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8245
8246 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8247
8248 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8249
8250#ifdef VBOX_STRICT
8251# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8252 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8253 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8254 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8255 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8256# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8257 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8258 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8259 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8260
8261 if (iSegReg == UINT8_MAX)
8262 {
8263 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8264 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8265 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8266 switch (cbMem)
8267 {
8268 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
8269 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
8270 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
8271 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
8272 case 10:
8273 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8274 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8275 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8276 break;
8277 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
8278# if 0
8279 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
8280 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
8281# endif
8282 default: AssertFailed(); break;
8283 }
8284 }
8285 else
8286 {
8287 Assert(iSegReg < 6);
8288 switch (cbMem)
8289 {
8290 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
8291 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
8292 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
8293 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
8294 case 10:
8295 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8296 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8297 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8298 break;
8299 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
8300# if 0
8301 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
8302 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
8303# endif
8304 default: AssertFailed(); break;
8305 }
8306 }
8307# undef IEM_MAP_HLP_FN
8308# undef IEM_MAP_HLP_FN_NO_AT
8309#endif
8310
8311#ifdef VBOX_STRICT
8312 /*
8313 * Check that the fExec flags we've got make sense.
8314 */
8315 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8316#endif
8317
8318 /*
8319 * To keep things simple we have to commit any pending writes first as we
8320 * may end up making calls.
8321 */
8322 off = iemNativeRegFlushPendingWrites(pReNative, off);
8323
8324#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8325 /*
8326 * Move/spill/flush stuff out of call-volatile registers.
8327 * This is the easy way out. We could contain this to the tlb-miss branch
8328 * by saving and restoring active stuff here.
8329 */
8330 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8331 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8332#endif
8333
8334 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8335 while the tlb-miss codepath will temporarily put it on the stack.
8336 Set the the type to stack here so we don't need to do it twice below. */
8337 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8338 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8339 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8340 * lookup is done. */
8341
8342 /*
8343 * Define labels and allocate the result register (trying for the return
8344 * register if we can).
8345 */
8346 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8347 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8348 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8349 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8350 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8351 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8352 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8353 : UINT32_MAX;
8354//off=iemNativeEmitBrk(pReNative, off, 0);
8355 /*
8356 * Jump to the TLB lookup code.
8357 */
8358 if (!TlbState.fSkip)
8359 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8360
8361 /*
8362 * TlbMiss:
8363 *
8364 * Call helper to do the fetching.
8365 * We flush all guest register shadow copies here.
8366 */
8367 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8368
8369#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8370 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8371#else
8372 RT_NOREF(idxInstr);
8373#endif
8374
8375#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8376 /* Save variables in volatile registers. */
8377 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8378 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8379#endif
8380
8381 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8382 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8383#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8384 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8385#else
8386 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8387#endif
8388
8389 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8390 if (iSegReg != UINT8_MAX)
8391 {
8392 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8393 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8394 }
8395
8396 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8397 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8398 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8399
8400 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8401 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8402
8403 /* Done setting up parameters, make the call. */
8404 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8405
8406 /*
8407 * Put the output in the right registers.
8408 */
8409 Assert(idxRegMemResult == pVarMem->idxReg);
8410 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8411 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8412
8413#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8414 /* Restore variables and guest shadow registers to volatile registers. */
8415 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8416 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8417#endif
8418
8419 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8420 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8421
8422#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8423 if (!TlbState.fSkip)
8424 {
8425 /* end of tlbsmiss - Jump to the done label. */
8426 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8427 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8428
8429 /*
8430 * TlbLookup:
8431 */
8432 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
8433 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8434# ifdef VBOX_WITH_STATISTICS
8435 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8436 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8437# endif
8438
8439 /* [idxVarUnmapInfo] = 0; */
8440 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8441
8442 /*
8443 * TlbDone:
8444 */
8445 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8446
8447 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8448
8449# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8450 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8451 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8452# endif
8453 }
8454#else
8455 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
8456#endif
8457
8458 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8459 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8460
8461 return off;
8462}
8463
8464
8465#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8466 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8467 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8468
8469#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8470 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8471 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8472
8473#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8474 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8475 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8476
8477#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8478 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8479 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8480
8481DECL_INLINE_THROW(uint32_t)
8482iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8483 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8484{
8485 /*
8486 * Assert sanity.
8487 */
8488 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8489#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8490 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8491#endif
8492 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8493 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8494 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8495#ifdef VBOX_STRICT
8496 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8497 {
8498 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8499 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8500 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8501 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8502 case IEM_ACCESS_TYPE_WRITE:
8503 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8504 case IEM_ACCESS_TYPE_READ:
8505 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8506 default: AssertFailed();
8507 }
8508#else
8509 RT_NOREF(fAccess);
8510#endif
8511
8512 /*
8513 * To keep things simple we have to commit any pending writes first as we
8514 * may end up making calls (there shouldn't be any at this point, so this
8515 * is just for consistency).
8516 */
8517 /** @todo we could postpone this till we make the call and reload the
8518 * registers after returning from the call. Not sure if that's sensible or
8519 * not, though. */
8520 off = iemNativeRegFlushPendingWrites(pReNative, off);
8521
8522 /*
8523 * Move/spill/flush stuff out of call-volatile registers.
8524 *
8525 * We exclude any register holding the bUnmapInfo variable, as we'll be
8526 * checking it after returning from the call and will free it afterwards.
8527 */
8528 /** @todo save+restore active registers and maybe guest shadows in miss
8529 * scenario. */
8530 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8531 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8532
8533 /*
8534 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8535 * to call the unmap helper function.
8536 *
8537 * The likelyhood of it being zero is higher than for the TLB hit when doing
8538 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8539 * access should also end up with a mapping that won't need special unmapping.
8540 */
8541 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8542 * should speed up things for the pure interpreter as well when TLBs
8543 * are enabled. */
8544#ifdef RT_ARCH_AMD64
8545 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8546 {
8547 /* test byte [rbp - xxx], 0ffh */
8548 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8549 pbCodeBuf[off++] = 0xf6;
8550 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8551 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8552 pbCodeBuf[off++] = 0xff;
8553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8554 }
8555 else
8556#endif
8557 {
8558 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8559 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8560 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8561 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8562 }
8563 uint32_t const offJmpFixup = off;
8564 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8565
8566 /*
8567 * Call the unmap helper function.
8568 */
8569#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8570 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8571#else
8572 RT_NOREF(idxInstr);
8573#endif
8574
8575 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8576 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8577 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8578
8579 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8580 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8581
8582 /* Done setting up parameters, make the call. */
8583 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8584
8585 /* The bUnmapInfo variable is implictly free by these MCs. */
8586 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8587
8588 /*
8589 * Done, just fixup the jump for the non-call case.
8590 */
8591 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8592
8593 return off;
8594}
8595
8596
8597
8598/*********************************************************************************************************************************
8599* State and Exceptions *
8600*********************************************************************************************************************************/
8601
8602#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8603#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8604
8605#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8606#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8607#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8608
8609#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8610#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8611#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8612
8613
8614DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8615{
8616 /** @todo this needs a lot more work later. */
8617 RT_NOREF(pReNative, fForChange);
8618 return off;
8619}
8620
8621
8622
8623/*********************************************************************************************************************************
8624* Emitters for FPU related operations. *
8625*********************************************************************************************************************************/
8626
8627#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8628 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8629
8630/** Emits code for IEM_MC_FETCH_FCW. */
8631DECL_INLINE_THROW(uint32_t)
8632iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8633{
8634 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8635 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8636
8637 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8638
8639 /* Allocate a temporary FCW register. */
8640 /** @todo eliminate extra register */
8641 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8642 kIemNativeGstRegUse_ReadOnly);
8643
8644 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8645
8646 /* Free but don't flush the FCW register. */
8647 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8648 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8649
8650 return off;
8651}
8652
8653
8654#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8655 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8656
8657/** Emits code for IEM_MC_FETCH_FSW. */
8658DECL_INLINE_THROW(uint32_t)
8659iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8660{
8661 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8662 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8663
8664 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8665 /* Allocate a temporary FSW register. */
8666 /** @todo eliminate extra register */
8667 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8668 kIemNativeGstRegUse_ReadOnly);
8669
8670 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8671
8672 /* Free but don't flush the FSW register. */
8673 iemNativeRegFreeTmp(pReNative, idxFswReg);
8674 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8675
8676 return off;
8677}
8678
8679
8680
8681#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8682
8683
8684/*********************************************************************************************************************************
8685* Emitters for SSE/AVX specific operations. *
8686*********************************************************************************************************************************/
8687
8688#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8689 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8690
8691/** Emits code for IEM_MC_COPY_XREG_U128. */
8692DECL_INLINE_THROW(uint32_t)
8693iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8694{
8695 /* This is a nop if the source and destination register are the same. */
8696 if (iXRegDst != iXRegSrc)
8697 {
8698 /* Allocate destination and source register. */
8699 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8700 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8701 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8702 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8703
8704 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8705
8706 /* Free but don't flush the source and destination register. */
8707 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8708 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8709 }
8710
8711 return off;
8712}
8713
8714
8715#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8716 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8717
8718/** Emits code for IEM_MC_FETCH_XREG_U128. */
8719DECL_INLINE_THROW(uint32_t)
8720iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8721{
8722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8723 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8724
8725 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8726 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8727
8728 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8729
8730 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8731
8732 /* Free but don't flush the source register. */
8733 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8734 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8735
8736 return off;
8737}
8738
8739
8740#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8741 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8742
8743#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8744 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8745
8746/** Emits code for IEM_MC_FETCH_XREG_U64. */
8747DECL_INLINE_THROW(uint32_t)
8748iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8749{
8750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8751 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8752
8753 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8754 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8755
8756 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8757 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8758
8759 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8760
8761 /* Free but don't flush the source register. */
8762 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8763 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8764
8765 return off;
8766}
8767
8768
8769#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8770 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8771
8772#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8773 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8774
8775/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8776DECL_INLINE_THROW(uint32_t)
8777iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8778{
8779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8780 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8781
8782 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8783 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8784
8785 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8786 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8787
8788 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8789
8790 /* Free but don't flush the source register. */
8791 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8792 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8793
8794 return off;
8795}
8796
8797
8798#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8799 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8800
8801/** Emits code for IEM_MC_FETCH_XREG_U16. */
8802DECL_INLINE_THROW(uint32_t)
8803iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8804{
8805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8807
8808 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8809 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8810
8811 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8812 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8813
8814 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
8815
8816 /* Free but don't flush the source register. */
8817 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8818 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8819
8820 return off;
8821}
8822
8823
8824#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
8825 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
8826
8827/** Emits code for IEM_MC_FETCH_XREG_U8. */
8828DECL_INLINE_THROW(uint32_t)
8829iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
8830{
8831 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8832 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
8833
8834 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8835 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8836
8837 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8838 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8839
8840 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
8841
8842 /* Free but don't flush the source register. */
8843 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8844 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8845
8846 return off;
8847}
8848
8849
8850#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
8851 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
8852
8853AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8854#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
8855 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
8856
8857
8858/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
8859DECL_INLINE_THROW(uint32_t)
8860iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8861{
8862 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8863 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8864
8865 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8866 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8867 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
8868
8869 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8870
8871 /* Free but don't flush the source register. */
8872 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8873 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8874
8875 return off;
8876}
8877
8878
8879#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
8880 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
8881
8882#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
8883 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
8884
8885#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
8886 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
8887
8888#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
8889 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
8890
8891#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
8892 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
8893
8894#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
8895 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
8896
8897/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
8898DECL_INLINE_THROW(uint32_t)
8899iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
8900 uint8_t cbLocal, uint8_t iElem)
8901{
8902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8903 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
8904
8905#ifdef VBOX_STRICT
8906 switch (cbLocal)
8907 {
8908 case sizeof(uint64_t): Assert(iElem < 2); break;
8909 case sizeof(uint32_t): Assert(iElem < 4); break;
8910 case sizeof(uint16_t): Assert(iElem < 8); break;
8911 case sizeof(uint8_t): Assert(iElem < 16); break;
8912 default: AssertFailed();
8913 }
8914#endif
8915
8916 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8917 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8918 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8919
8920 switch (cbLocal)
8921 {
8922 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8923 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8924 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8925 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8926 default: AssertFailed();
8927 }
8928
8929 /* Free but don't flush the source register. */
8930 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8931 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8932
8933 return off;
8934}
8935
8936
8937#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
8938 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
8939
8940/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
8941DECL_INLINE_THROW(uint32_t)
8942iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
8943{
8944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8945 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8946
8947 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8948 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8949 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8950
8951 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
8952 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
8953 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
8954
8955 /* Free but don't flush the source register. */
8956 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8957 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8958
8959 return off;
8960}
8961
8962
8963#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
8964 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
8965
8966/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
8967DECL_INLINE_THROW(uint32_t)
8968iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
8969{
8970 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8971 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8972
8973 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8974 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8975 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8976
8977 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
8978 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
8979 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
8980
8981 /* Free but don't flush the source register. */
8982 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8983 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8984
8985 return off;
8986}
8987
8988
8989#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
8990 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
8991
8992/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
8993DECL_INLINE_THROW(uint32_t)
8994iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
8995 uint8_t idxSrcVar, uint8_t iDwSrc)
8996{
8997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8998 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8999
9000 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9001 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9002 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9003
9004 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9005 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9006
9007 /* Free but don't flush the destination register. */
9008 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9009 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9010
9011 return off;
9012}
9013
9014
9015#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9016 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9017
9018/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9019DECL_INLINE_THROW(uint32_t)
9020iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9021{
9022 /*
9023 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9024 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9025 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9026 */
9027 if (iYRegDst != iYRegSrc)
9028 {
9029 /* Allocate destination and source register. */
9030 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9031 kIemNativeGstSimdRegLdStSz_256,
9032 kIemNativeGstRegUse_ForFullWrite);
9033 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9034 kIemNativeGstSimdRegLdStSz_Low128,
9035 kIemNativeGstRegUse_ReadOnly);
9036
9037 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9038 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9039
9040 /* Free but don't flush the source and destination register. */
9041 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9042 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9043 }
9044 else
9045 {
9046 /* This effectively only clears the upper 128-bits of the register. */
9047 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9048 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9049
9050 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9051
9052 /* Free but don't flush the destination register. */
9053 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9054 }
9055
9056 return off;
9057}
9058
9059
9060#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9061 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9062
9063/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9064DECL_INLINE_THROW(uint32_t)
9065iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9066{
9067 /*
9068 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9069 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9070 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9071 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
9072 */
9073 if (iYRegDst != iYRegSrc)
9074 {
9075 /* Allocate destination and source register. */
9076 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9077 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9078 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9079 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9080
9081 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9082
9083 /* Free but don't flush the source and destination register. */
9084 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9085 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9086 }
9087
9088 return off;
9089}
9090
9091
9092#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9093 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9094
9095/** Emits code for IEM_MC_FETCH_YREG_U128. */
9096DECL_INLINE_THROW(uint32_t)
9097iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9098{
9099 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9100 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9101
9102 Assert(iDQWord <= 1);
9103 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9104 iDQWord == 1
9105 ? kIemNativeGstSimdRegLdStSz_High128
9106 : kIemNativeGstSimdRegLdStSz_Low128,
9107 kIemNativeGstRegUse_ReadOnly);
9108
9109 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9110 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9111
9112 if (iDQWord == 1)
9113 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9114 else
9115 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9116
9117 /* Free but don't flush the source register. */
9118 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9119 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9120
9121 return off;
9122}
9123
9124
9125#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9126 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9127
9128/** Emits code for IEM_MC_FETCH_YREG_U64. */
9129DECL_INLINE_THROW(uint32_t)
9130iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9131{
9132 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9133 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9134
9135 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9136 iQWord >= 2
9137 ? kIemNativeGstSimdRegLdStSz_High128
9138 : kIemNativeGstSimdRegLdStSz_Low128,
9139 kIemNativeGstRegUse_ReadOnly);
9140
9141 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9142 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9143
9144 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9145
9146 /* Free but don't flush the source register. */
9147 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9148 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9149
9150 return off;
9151}
9152
9153
9154#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9155 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9156
9157/** Emits code for IEM_MC_FETCH_YREG_U32. */
9158DECL_INLINE_THROW(uint32_t)
9159iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9160{
9161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9162 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9163
9164 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9165 iDWord >= 4
9166 ? kIemNativeGstSimdRegLdStSz_High128
9167 : kIemNativeGstSimdRegLdStSz_Low128,
9168 kIemNativeGstRegUse_ReadOnly);
9169
9170 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9171 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9172
9173 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9174
9175 /* Free but don't flush the source register. */
9176 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9177 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9178
9179 return off;
9180}
9181
9182
9183#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9184 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9185
9186/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9187DECL_INLINE_THROW(uint32_t)
9188iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9189{
9190 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9191 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9192
9193 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9194
9195 /* Free but don't flush the register. */
9196 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9197
9198 return off;
9199}
9200
9201
9202#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9203 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9204
9205/** Emits code for IEM_MC_STORE_YREG_U128. */
9206DECL_INLINE_THROW(uint32_t)
9207iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9208{
9209 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9210 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9211
9212 Assert(iDQword <= 1);
9213 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9214 iDQword == 0
9215 ? kIemNativeGstSimdRegLdStSz_Low128
9216 : kIemNativeGstSimdRegLdStSz_High128,
9217 kIemNativeGstRegUse_ForFullWrite);
9218
9219 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9220
9221 if (iDQword == 0)
9222 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9223 else
9224 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9225
9226 /* Free but don't flush the source register. */
9227 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9228 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9229
9230 return off;
9231}
9232
9233
9234#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9235 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9236
9237/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9238DECL_INLINE_THROW(uint32_t)
9239iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9240{
9241 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9242 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9243
9244 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9245 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9246
9247 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9248
9249 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9250 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9251
9252 /* Free but don't flush the source register. */
9253 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9254 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9255
9256 return off;
9257}
9258
9259
9260#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9261 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9262
9263/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9264DECL_INLINE_THROW(uint32_t)
9265iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9266{
9267 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9268 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9269
9270 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9271 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9272
9273 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9274
9275 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9276 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9277
9278 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9279 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9280
9281 return off;
9282}
9283
9284
9285#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9286 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9287
9288/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9289DECL_INLINE_THROW(uint32_t)
9290iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9291{
9292 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9293 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9294
9295 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9296 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9297
9298 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9299
9300 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9301 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9302
9303 /* Free but don't flush the source register. */
9304 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9305 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9306
9307 return off;
9308}
9309
9310
9311#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9312 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9313
9314/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9315DECL_INLINE_THROW(uint32_t)
9316iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9317{
9318 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9319 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9320
9321 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9322 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9323
9324 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9325
9326 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9327 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9328
9329 /* Free but don't flush the source register. */
9330 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9331 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9332
9333 return off;
9334}
9335
9336
9337#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9338 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9339
9340/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9341DECL_INLINE_THROW(uint32_t)
9342iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9343{
9344 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9345 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9346
9347 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9348 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9349
9350 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9351
9352 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9353 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9354
9355 /* Free but don't flush the source register. */
9356 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9357 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9358
9359 return off;
9360}
9361
9362
9363#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9364 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9365
9366/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9367DECL_INLINE_THROW(uint32_t)
9368iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9369{
9370 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9371 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9372
9373 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9374 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9375
9376 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9377
9378 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9379
9380 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9381 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9382
9383 return off;
9384}
9385
9386
9387#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9388 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9389
9390/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9391DECL_INLINE_THROW(uint32_t)
9392iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9393{
9394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9395 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9396
9397 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9398 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9399
9400 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9401
9402 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9403
9404 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9405 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9406
9407 return off;
9408}
9409
9410
9411#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9412 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9413
9414/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9415DECL_INLINE_THROW(uint32_t)
9416iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9417{
9418 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9419 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9420
9421 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9422 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9423
9424 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9425
9426 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9427
9428 /* Free but don't flush the source register. */
9429 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9430 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9431
9432 return off;
9433}
9434
9435
9436#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9437 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9438
9439/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9440DECL_INLINE_THROW(uint32_t)
9441iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9442{
9443 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9444 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9445
9446 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9447 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9448
9449 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9450
9451 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9452
9453 /* Free but don't flush the source register. */
9454 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9455 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9456
9457 return off;
9458}
9459
9460
9461#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9462 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9463
9464/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9465DECL_INLINE_THROW(uint32_t)
9466iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9467{
9468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9469 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9470
9471 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9472 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9473
9474 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9475
9476 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9477
9478 /* Free but don't flush the source register. */
9479 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9480 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9481
9482 return off;
9483}
9484
9485
9486#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9487 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9488
9489/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9490DECL_INLINE_THROW(uint32_t)
9491iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9492{
9493 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9494 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9495
9496 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9497 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9498
9499 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9500
9501 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9502 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9503
9504 /* Free but don't flush the source register. */
9505 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9506 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9507
9508 return off;
9509}
9510
9511
9512#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9513 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9514
9515/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9516DECL_INLINE_THROW(uint32_t)
9517iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9518{
9519 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9520 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9521
9522 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9523 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9524
9525 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9526
9527 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9528 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9529
9530 /* Free but don't flush the source register. */
9531 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9532 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9533
9534 return off;
9535}
9536
9537
9538#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9539 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9540
9541/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9542DECL_INLINE_THROW(uint32_t)
9543iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9544{
9545 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9546 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9547
9548 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9549 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9550 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9551 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9553
9554 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9555 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9556 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9557
9558 /* Free but don't flush the source and destination registers. */
9559 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9560 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9561 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9562
9563 return off;
9564}
9565
9566
9567#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9568 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9569
9570/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9571DECL_INLINE_THROW(uint32_t)
9572iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9573{
9574 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9575 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9576
9577 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9578 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9579 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9580 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9581 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9582
9583 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9584 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9585 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9586
9587 /* Free but don't flush the source and destination registers. */
9588 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9589 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9590 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9591
9592 return off;
9593}
9594
9595
9596#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9597 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9598
9599
9600/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9601DECL_INLINE_THROW(uint32_t)
9602iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9603{
9604 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9605 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9606
9607 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9608 if (bImm8Mask & RT_BIT(0))
9609 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9610 if (bImm8Mask & RT_BIT(1))
9611 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9612 if (bImm8Mask & RT_BIT(2))
9613 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9614 if (bImm8Mask & RT_BIT(3))
9615 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9616
9617 /* Free but don't flush the destination register. */
9618 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9619
9620 return off;
9621}
9622
9623
9624#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9625 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9626
9627
9628/** Emits code for IEM_MC_FETCH_YREG_U256. */
9629DECL_INLINE_THROW(uint32_t)
9630iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9631{
9632 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9633 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9634
9635 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9636 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9637 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9638
9639 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9640
9641 /* Free but don't flush the source register. */
9642 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9643 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9644
9645 return off;
9646}
9647
9648
9649#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9650 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9651
9652
9653/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
9654DECL_INLINE_THROW(uint32_t)
9655iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9656{
9657 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9658 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9659
9660 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9661 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9662 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9663
9664 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9665
9666 /* Free but don't flush the source register. */
9667 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9668 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9669
9670 return off;
9671}
9672
9673
9674#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9675 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9676
9677
9678/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9679DECL_INLINE_THROW(uint32_t)
9680iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9681 uint8_t idxSrcVar, uint8_t iDwSrc)
9682{
9683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9684 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9685
9686 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9687 iDwDst < 4
9688 ? kIemNativeGstSimdRegLdStSz_Low128
9689 : kIemNativeGstSimdRegLdStSz_High128,
9690 kIemNativeGstRegUse_ForUpdate);
9691 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9692 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9693
9694 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9695 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9696
9697 /* Free but don't flush the source register. */
9698 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9699 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9700 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9701
9702 return off;
9703}
9704
9705
9706#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9707 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9708
9709
9710/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9711DECL_INLINE_THROW(uint32_t)
9712iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9713 uint8_t idxSrcVar, uint8_t iQwSrc)
9714{
9715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9716 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9717
9718 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9719 iQwDst < 2
9720 ? kIemNativeGstSimdRegLdStSz_Low128
9721 : kIemNativeGstSimdRegLdStSz_High128,
9722 kIemNativeGstRegUse_ForUpdate);
9723 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9724 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9725
9726 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9727 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9728
9729 /* Free but don't flush the source register. */
9730 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9731 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9732 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9733
9734 return off;
9735}
9736
9737
9738#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9739 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9740
9741
9742/** Emits code for IEM_MC_STORE_YREG_U64. */
9743DECL_INLINE_THROW(uint32_t)
9744iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9745{
9746 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9747 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9748
9749 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9750 iQwDst < 2
9751 ? kIemNativeGstSimdRegLdStSz_Low128
9752 : kIemNativeGstSimdRegLdStSz_High128,
9753 kIemNativeGstRegUse_ForUpdate);
9754
9755 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9756
9757 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9758
9759 /* Free but don't flush the source register. */
9760 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9761 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9762
9763 return off;
9764}
9765
9766
9767#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9768 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9769
9770/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
9771DECL_INLINE_THROW(uint32_t)
9772iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9773{
9774 RT_NOREF(pReNative, iYReg);
9775 /** @todo Needs to be implemented when support for AVX-512 is added. */
9776 return off;
9777}
9778
9779
9780
9781/*********************************************************************************************************************************
9782* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
9783*********************************************************************************************************************************/
9784
9785/**
9786 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
9787 */
9788DECL_INLINE_THROW(uint32_t)
9789iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
9790{
9791 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
9792 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9793 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
9794 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9795
9796 /*
9797 * Need to do the FPU preparation.
9798 */
9799 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
9800
9801 /*
9802 * Do all the call setup and cleanup.
9803 */
9804 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
9805
9806 /*
9807 * Load the MXCSR register into the first argument and mask out the current exception flags.
9808 */
9809 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
9810 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
9811
9812 /*
9813 * Make the call.
9814 */
9815 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9816
9817 /*
9818 * The updated MXCSR is in the return register.
9819 */
9820 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
9821
9822#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9823 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
9824 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
9825#endif
9826 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9827
9828 return off;
9829}
9830
9831
9832#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
9833 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9834
9835/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
9836DECL_INLINE_THROW(uint32_t)
9837iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9838{
9839 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9840 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9841 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
9842}
9843
9844
9845#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
9846 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
9847
9848/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
9849DECL_INLINE_THROW(uint32_t)
9850iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9851{
9852 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9853 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9854 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9855 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
9856}
9857
9858
9859/*********************************************************************************************************************************
9860* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
9861*********************************************************************************************************************************/
9862
9863#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
9864 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9865
9866/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
9867DECL_INLINE_THROW(uint32_t)
9868iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9869{
9870 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9871 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9872 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
9873}
9874
9875
9876#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
9877 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
9878
9879/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
9880DECL_INLINE_THROW(uint32_t)
9881iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9882{
9883 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9884 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9885 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9886 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
9887}
9888#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9889
9890
9891/*********************************************************************************************************************************
9892* Include instruction emitters. *
9893*********************************************************************************************************************************/
9894#include "target-x86/IEMAllN8veEmit-x86.h"
9895
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use