VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

Last change on this file was 106465, checked in by vboxsync, 2 months ago

VMM/IEM: Added iemNativeEmitLoadGprWithGstReg[Ex]T and iemNativeEmitStoreGprToGstReg[Ex]T as better way of explictly loading & storing standard guest registers. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 372.3 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 106465 2024-10-18 00:27:52Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 6 instruction bytes.
251 * - ARM64: 2 instruction words (8 bytes).
252 *
253 * @note The top 32 bits will be cleared.
254 */
255template<uint32_t const a_uImm32>
256DECL_FORCE_INLINE(uint32_t) iemNativeEmitLoadGpr32ImmExT(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr)
257{
258#ifdef RT_ARCH_AMD64
259 if (a_uImm32 == 0)
260 {
261 /* xor gpr, gpr */
262 if (iGpr >= 8)
263 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
264 pCodeBuf[off++] = 0x33;
265 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
266 }
267 else
268 {
269 /* mov gpr, imm32 */
270 if (iGpr >= 8)
271 pCodeBuf[off++] = X86_OP_REX_B;
272 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
273 pCodeBuf[off++] = RT_BYTE1(a_uImm32);
274 pCodeBuf[off++] = RT_BYTE2(a_uImm32);
275 pCodeBuf[off++] = RT_BYTE3(a_uImm32);
276 pCodeBuf[off++] = RT_BYTE4(a_uImm32);
277 }
278
279#elif defined(RT_ARCH_ARM64)
280 if RT_CONSTEXPR_IF((a_uImm32 >> 16) == 0)
281 /* movz gpr, imm16 */
282 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32, 0, false /*f64Bit*/);
283 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == 0)
284 /* movz gpr, imm16, lsl #16 */
285 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
286 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
287 /* movn gpr, imm16, lsl #16 */
288 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32 >> 16, 1, false /*f64Bit*/);
289 else if RT_CONSTEXPR_IF((a_uImm32 >> 16) == UINT32_C(0xffff))
290 /* movn gpr, imm16 */
291 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32, 0, false /*f64Bit*/);
292 else
293 {
294 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
295 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
296 }
297
298#else
299# error "port me"
300#endif
301 return off;
302}
303
304
305/**
306 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
307 * buffer space.
308 *
309 * Max buffer consumption:
310 * - AMD64: 10 instruction bytes.
311 * - ARM64: 4 instruction words (16 bytes).
312 */
313DECL_FORCE_INLINE(uint32_t)
314iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
315{
316#ifdef RT_ARCH_AMD64
317 if (uImm64 == 0)
318 {
319 /* xor gpr, gpr */
320 if (iGpr >= 8)
321 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
322 pCodeBuf[off++] = 0x33;
323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
324 }
325 else if (uImm64 <= UINT32_MAX)
326 {
327 /* mov gpr, imm32 */
328 if (iGpr >= 8)
329 pCodeBuf[off++] = X86_OP_REX_B;
330 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
331 pCodeBuf[off++] = RT_BYTE1(uImm64);
332 pCodeBuf[off++] = RT_BYTE2(uImm64);
333 pCodeBuf[off++] = RT_BYTE3(uImm64);
334 pCodeBuf[off++] = RT_BYTE4(uImm64);
335 }
336 else if (uImm64 == (uint64_t)(int32_t)uImm64)
337 {
338 /* mov gpr, sx(imm32) */
339 if (iGpr < 8)
340 pCodeBuf[off++] = X86_OP_REX_W;
341 else
342 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
343 pCodeBuf[off++] = 0xc7;
344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
345 pCodeBuf[off++] = RT_BYTE1(uImm64);
346 pCodeBuf[off++] = RT_BYTE2(uImm64);
347 pCodeBuf[off++] = RT_BYTE3(uImm64);
348 pCodeBuf[off++] = RT_BYTE4(uImm64);
349 }
350 else
351 {
352 /* mov gpr, imm64 */
353 if (iGpr < 8)
354 pCodeBuf[off++] = X86_OP_REX_W;
355 else
356 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
357 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
358 pCodeBuf[off++] = RT_BYTE1(uImm64);
359 pCodeBuf[off++] = RT_BYTE2(uImm64);
360 pCodeBuf[off++] = RT_BYTE3(uImm64);
361 pCodeBuf[off++] = RT_BYTE4(uImm64);
362 pCodeBuf[off++] = RT_BYTE5(uImm64);
363 pCodeBuf[off++] = RT_BYTE6(uImm64);
364 pCodeBuf[off++] = RT_BYTE7(uImm64);
365 pCodeBuf[off++] = RT_BYTE8(uImm64);
366 }
367
368#elif defined(RT_ARCH_ARM64)
369 /*
370 * Quick simplification: Do 32-bit load if top half is zero.
371 */
372 if (uImm64 <= UINT32_MAX)
373 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
374
375 /*
376 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
377 * supply remaining bits using 'movk grp, imm16, lsl #x'.
378 *
379 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
380 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
381 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
382 * after the first non-zero immediate component so we switch to movk for
383 * the remainder.
384 */
385 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
386 + !((uImm64 >> 16) & UINT16_MAX)
387 + !((uImm64 >> 32) & UINT16_MAX)
388 + !((uImm64 >> 48) & UINT16_MAX);
389 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
390 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
391 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
392 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
393 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
394 if (cFfffHalfWords <= cZeroHalfWords)
395 {
396 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
397
398 /* movz gpr, imm16 */
399 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
400 if (uImmPart || cZeroHalfWords == 4)
401 {
402 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
403 fMovBase |= RT_BIT_32(29);
404 }
405 /* mov[z/k] gpr, imm16, lsl #16 */
406 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
407 if (uImmPart)
408 {
409 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
410 fMovBase |= RT_BIT_32(29);
411 }
412 /* mov[z/k] gpr, imm16, lsl #32 */
413 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
414 if (uImmPart)
415 {
416 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
417 fMovBase |= RT_BIT_32(29);
418 }
419 /* mov[z/k] gpr, imm16, lsl #48 */
420 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
421 if (uImmPart)
422 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
423 }
424 else
425 {
426 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
427
428 /* find the first half-word that isn't UINT16_MAX. */
429 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
430 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
431 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
432
433 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
434 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
435 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
436 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
437 /* movk gpr, imm16 */
438 if (iHwNotFfff != 0)
439 {
440 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
441 if (uImmPart != UINT32_C(0xffff))
442 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
443 }
444 /* movk gpr, imm16, lsl #16 */
445 if (iHwNotFfff != 1)
446 {
447 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
448 if (uImmPart != UINT32_C(0xffff))
449 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
450 }
451 /* movk gpr, imm16, lsl #32 */
452 if (iHwNotFfff != 2)
453 {
454 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
455 if (uImmPart != UINT32_C(0xffff))
456 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
457 }
458 /* movk gpr, imm16, lsl #48 */
459 if (iHwNotFfff != 3)
460 {
461 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
462 if (uImmPart != UINT32_C(0xffff))
463 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
464 }
465 }
466
467#else
468# error "port me"
469#endif
470 return off;
471}
472
473
474/**
475 * Emits loading a constant into a 64-bit GPR
476 */
477DECL_INLINE_THROW(uint32_t)
478iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
479{
480#ifdef RT_ARCH_AMD64
481 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
482#elif defined(RT_ARCH_ARM64)
483 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
484#else
485# error "port me"
486#endif
487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
488 return off;
489}
490
491
492/**
493 * Emits loading a constant into a 32-bit GPR.
494 * @note The top 32 bits will be cleared.
495 */
496DECL_INLINE_THROW(uint32_t)
497iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
498{
499#ifdef RT_ARCH_AMD64
500 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
501#elif defined(RT_ARCH_ARM64)
502 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
503#else
504# error "port me"
505#endif
506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507 return off;
508}
509
510
511/**
512 * Emits loading a constant into a 8-bit GPR
513 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
514 * only the ARM64 version does that.
515 */
516DECL_INLINE_THROW(uint32_t)
517iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
518{
519#ifdef RT_ARCH_AMD64
520 /* mov gpr, imm8 */
521 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
522 if (iGpr >= 8)
523 pbCodeBuf[off++] = X86_OP_REX_B;
524 else if (iGpr >= 4)
525 pbCodeBuf[off++] = X86_OP_REX;
526 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
527 pbCodeBuf[off++] = RT_BYTE1(uImm8);
528
529#elif defined(RT_ARCH_ARM64)
530 /* movz gpr, imm16, lsl #0 */
531 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
532 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
533
534#else
535# error "port me"
536#endif
537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
538 return off;
539}
540
541
542#ifdef RT_ARCH_AMD64
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE(uint32_t)
547iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
548{
549 if (offVCpu < 128)
550 {
551 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
552 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
553 }
554 else
555 {
556 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
557 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
558 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
559 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
560 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
561 }
562 return off;
563}
564
565/**
566 * Special variant of iemNativeEmitGprByVCpuDisp for accessing the VM structure.
567 */
568DECL_FORCE_INLINE(uint32_t)
569iemNativeEmitGprByVCpuSignedDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu)
570{
571 if (offVCpu < 128 && offVCpu >= -128)
572 {
573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
574 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
575 }
576 else
577 {
578 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
579 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
580 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
581 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
582 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
583 }
584 return off;
585}
586
587#elif defined(RT_ARCH_ARM64)
588
589/**
590 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
591 *
592 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
593 * registers (@a iGprTmp).
594 * @note DON'T try this with prefetch.
595 */
596DECL_FORCE_INLINE_THROW(uint32_t)
597iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
598 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
599{
600 /*
601 * There are a couple of ldr variants that takes an immediate offset, so
602 * try use those if we can, otherwise we have to use the temporary register
603 * help with the addressing.
604 */
605 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
606 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
607 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
608 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
609 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
610 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
611 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
612 {
613 /* The offset is too large, so we must load it into a register and use
614 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
615 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
616 if (iGprTmp == UINT8_MAX)
617 iGprTmp = iGprReg;
618 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
619 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
620 }
621 else
622# ifdef IEM_WITH_THROW_CATCH
623 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
624# else
625 AssertReleaseFailedStmt(off = UINT32_MAX);
626# endif
627
628 return off;
629}
630
631/**
632 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
633 */
634DECL_FORCE_INLINE_THROW(uint32_t)
635iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
636 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
637{
638 /*
639 * There are a couple of ldr variants that takes an immediate offset, so
640 * try use those if we can, otherwise we have to use the temporary register
641 * help with the addressing.
642 */
643 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
644 {
645 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
646 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
647 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
648 }
649 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
650 {
651 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
652 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
653 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
654 }
655 else
656 {
657 /* The offset is too large, so we must load it into a register and use
658 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
659 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
660 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
661 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
663 IEMNATIVE_REG_FIXED_TMP0);
664 }
665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
666 return off;
667}
668
669
670/**
671 * Special variant of iemNativeEmitGprByVCpuLdStEx for accessing the VM
672 * structure.
673 *
674 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
675 * registers (@a iGprTmp).
676 * @note DON'T try this with prefetch.
677 */
678DECL_FORCE_INLINE_THROW(uint32_t)
679iemNativeEmitGprBySignedVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu,
680 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
681{
682 Assert((uint32_t)RT_ABS(offVCpu) < RT_BIT_32(28)); /* we should be way out of range for problematic sign extending issues. */
683 Assert(!((uint32_t)RT_ABS(offVCpu) & (cbData - 1)));
684
685 /*
686 * For negative offsets we need to use put the displacement in a register
687 * as the two variants with signed immediates will either post or pre
688 * increment the base address register.
689 */
690 if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
691 {
692 uint8_t const idxIndexReg = !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) ? iGprReg : IEMNATIVE_REG_FIXED_TMP0;
693 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxIndexReg, offVCpu / (int32_t)cbData);
694 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, idxIndexReg,
695 kArmv8A64InstrLdStExtend_Sxtw, cbData > 1 /*fShifted*/);
696 }
697 else
698# ifdef IEM_WITH_THROW_CATCH
699 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
700# else
701 AssertReleaseFailedStmt(off = UINT32_MAX);
702# endif
703
704 return off;
705}
706
707/**
708 * Special variant of iemNativeEmitGprByVCpuLdSt for accessing the VM structure.
709 */
710DECL_FORCE_INLINE_THROW(uint32_t)
711iemNativeEmitGprBySignedVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
712 int32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
713{
714 off = iemNativeEmitGprBySignedVCpuLdStEx(iemNativeInstrBufEnsure(pReNative, off, 2 + 1), off, iGprReg,
715 offVCpu, enmOperation, cbData, IEMNATIVE_REG_FIXED_TMP0);
716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
717 return off;
718}
719
720#endif /* RT_ARCH_ARM64 */
721
722
723/**
724 * Emits a 64-bit GPR load of a VCpu value.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
728{
729#ifdef RT_ARCH_AMD64
730 /* mov reg64, mem64 */
731 if (iGpr < 8)
732 pCodeBuf[off++] = X86_OP_REX_W;
733 else
734 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
735 pCodeBuf[off++] = 0x8b;
736 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
737
738#elif defined(RT_ARCH_ARM64)
739 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
740
741#else
742# error "port me"
743#endif
744 return off;
745}
746
747
748/**
749 * Emits a 64-bit GPR load of a VCpu value.
750 */
751DECL_INLINE_THROW(uint32_t)
752iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
753{
754#ifdef RT_ARCH_AMD64
755 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
757
758#elif defined(RT_ARCH_ARM64)
759 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
760
761#else
762# error "port me"
763#endif
764 return off;
765}
766
767/**
768 * Emits a 32-bit GPR load of a VCpu value.
769 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
770 */
771DECL_FORCE_INLINE_THROW(uint32_t)
772iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
773{
774#ifdef RT_ARCH_AMD64
775 /* mov reg32, mem32 */
776 if (iGpr >= 8)
777 pCodeBuf[off++] = X86_OP_REX_R;
778 pCodeBuf[off++] = 0x8b;
779 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
780
781#elif defined(RT_ARCH_ARM64)
782 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
783
784#else
785# error "port me"
786#endif
787 return off;
788}
789
790
791/**
792 * Emits a 32-bit GPR load of a VCpu value.
793 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
794 */
795DECL_INLINE_THROW(uint32_t)
796iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
797{
798#ifdef RT_ARCH_AMD64
799 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
801
802#elif defined(RT_ARCH_ARM64)
803 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
804
805#else
806# error "port me"
807#endif
808 return off;
809}
810
811
812/**
813 * Emits a 16-bit GPR load of a VCpu value.
814 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
815 */
816DECL_FORCE_INLINE_THROW(uint32_t)
817iemNativeEmitLoadGprFromVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
818{
819#ifdef RT_ARCH_AMD64
820 /* movzx reg32, mem16 */
821 if (iGpr >= 8)
822 pCodeBuf[off++] = X86_OP_REX_R;
823 pCodeBuf[off++] = 0x0f;
824 pCodeBuf[off++] = 0xb7;
825 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
826
827#elif defined(RT_ARCH_ARM64)
828 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
829
830#else
831# error "port me"
832#endif
833 return off;
834}
835
836
837/**
838 * Emits a 16-bit GPR load of a VCpu value.
839 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
840 */
841DECL_INLINE_THROW(uint32_t)
842iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
843{
844#ifdef RT_ARCH_AMD64
845 off = iemNativeEmitLoadGprFromVCpuU16Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGpr, offVCpu);
846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
847
848#elif defined(RT_ARCH_ARM64)
849 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
850
851#else
852# error "port me"
853#endif
854 return off;
855}
856
857
858/**
859 * Emits a 8-bit GPR load of a VCpu value.
860 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
861 */
862DECL_INLINE_THROW(uint32_t)
863iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
864{
865#ifdef RT_ARCH_AMD64
866 /* movzx reg32, mem8 */
867 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
868 if (iGpr >= 8)
869 pbCodeBuf[off++] = X86_OP_REX_R;
870 pbCodeBuf[off++] = 0x0f;
871 pbCodeBuf[off++] = 0xb6;
872 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
874
875#elif defined(RT_ARCH_ARM64)
876 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
877
878#else
879# error "port me"
880#endif
881 return off;
882}
883
884
885/**
886 * Emits a store of a GPR value to a 64-bit VCpu field.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
890 uint8_t iGprTmp = UINT8_MAX)
891{
892#ifdef RT_ARCH_AMD64
893 /* mov mem64, reg64 */
894 if (iGpr < 8)
895 pCodeBuf[off++] = X86_OP_REX_W;
896 else
897 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
898 pCodeBuf[off++] = 0x89;
899 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
900 RT_NOREF(iGprTmp);
901
902#elif defined(RT_ARCH_ARM64)
903 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
904
905#else
906# error "port me"
907#endif
908 return off;
909}
910
911
912/**
913 * Emits a store of a GPR value to a 64-bit VCpu field.
914 */
915DECL_INLINE_THROW(uint32_t)
916iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
917{
918#ifdef RT_ARCH_AMD64
919 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
920#elif defined(RT_ARCH_ARM64)
921 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
922 IEMNATIVE_REG_FIXED_TMP0);
923#else
924# error "port me"
925#endif
926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
927 return off;
928}
929
930
931/**
932 * Emits a store of a GPR value to a 32-bit VCpu field.
933 *
934 * @note Limited range on ARM64.
935 */
936DECL_INLINE_THROW(uint32_t)
937iemNativeEmitStoreGprToVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
938 uint8_t iGprTmp = UINT8_MAX)
939{
940#ifdef RT_ARCH_AMD64
941 /* mov mem32, reg32 */
942 if (iGpr >= 8)
943 pCodeBuf[off++] = X86_OP_REX_R;
944 pCodeBuf[off++] = 0x89;
945 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
946 RT_NOREF(iGprTmp);
947
948#elif defined(RT_ARCH_ARM64)
949 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
950
951#else
952# error "port me"
953#endif
954 return off;
955}
956
957
958/**
959 * Emits a store of a GPR value to a 32-bit VCpu field.
960 */
961DECL_INLINE_THROW(uint32_t)
962iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
963{
964#ifdef RT_ARCH_AMD64
965 /* mov mem32, reg32 */
966 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
967 if (iGpr >= 8)
968 pbCodeBuf[off++] = X86_OP_REX_R;
969 pbCodeBuf[off++] = 0x89;
970 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972
973#elif defined(RT_ARCH_ARM64)
974 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
975
976#else
977# error "port me"
978#endif
979 return off;
980}
981
982
983/**
984 * Emits a store of a GPR value to a 16-bit VCpu field.
985 *
986 * @note Limited range on ARM64.
987 */
988DECL_INLINE_THROW(uint32_t)
989iemNativeEmitStoreGprToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
990 uint8_t iGprTmp = UINT8_MAX)
991{
992#ifdef RT_ARCH_AMD64
993 /* mov mem16, reg16 */
994 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
995 if (iGpr >= 8)
996 pCodeBuf[off++] = X86_OP_REX_R;
997 pCodeBuf[off++] = 0x89;
998 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
999 RT_NOREF(iGprTmp);
1000
1001#elif defined(RT_ARCH_ARM64)
1002 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
1003
1004#else
1005# error "port me"
1006#endif
1007 return off;
1008}
1009
1010
1011/**
1012 * Emits a store of a GPR value to a 16-bit VCpu field.
1013 */
1014DECL_INLINE_THROW(uint32_t)
1015iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1016{
1017#ifdef RT_ARCH_AMD64
1018 /* mov mem16, reg16 */
1019 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1020 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1021 if (iGpr >= 8)
1022 pbCodeBuf[off++] = X86_OP_REX_R;
1023 pbCodeBuf[off++] = 0x89;
1024 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
1025 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1026
1027#elif defined(RT_ARCH_ARM64)
1028 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
1029
1030#else
1031# error "port me"
1032#endif
1033 return off;
1034}
1035
1036
1037/**
1038 * Emits a store of a GPR value to a 8-bit VCpu field.
1039 */
1040DECL_INLINE_THROW(uint32_t)
1041iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1042{
1043#ifdef RT_ARCH_AMD64
1044 /* mov mem8, reg8 */
1045 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1046 if (iGpr >= 8)
1047 pbCodeBuf[off++] = X86_OP_REX_R;
1048 pbCodeBuf[off++] = 0x88;
1049 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
1050 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1051
1052#elif defined(RT_ARCH_ARM64)
1053 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1054
1055#else
1056# error "port me"
1057#endif
1058 return off;
1059}
1060
1061
1062/**
1063 * Emits a store of an immediate value to a 64-bit VCpu field.
1064 *
1065 * @note Will allocate temporary registers on both ARM64 and AMD64.
1066 */
1067DECL_FORCE_INLINE_THROW(uint32_t)
1068iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
1069{
1070#ifdef RT_ARCH_AMD64
1071 /* mov mem32, imm32 */
1072 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1073 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
1074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1075 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1076
1077#elif defined(RT_ARCH_ARM64)
1078 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1079 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
1080 if (idxRegImm != ARMV8_A64_REG_XZR)
1081 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1082
1083#else
1084# error "port me"
1085#endif
1086 return off;
1087}
1088
1089
1090/**
1091 * Emits a store of an immediate value to a 32-bit VCpu field.
1092 *
1093 * @note ARM64: Will allocate temporary registers.
1094 */
1095DECL_FORCE_INLINE_THROW(uint32_t)
1096iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
1097{
1098#ifdef RT_ARCH_AMD64
1099 /* mov mem32, imm32 */
1100 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1101 pCodeBuf[off++] = 0xc7;
1102 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1103 pCodeBuf[off++] = RT_BYTE1(uImm);
1104 pCodeBuf[off++] = RT_BYTE2(uImm);
1105 pCodeBuf[off++] = RT_BYTE3(uImm);
1106 pCodeBuf[off++] = RT_BYTE4(uImm);
1107 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1108
1109#elif defined(RT_ARCH_ARM64)
1110 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1111 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
1112 if (idxRegImm != ARMV8_A64_REG_XZR)
1113 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1114
1115#else
1116# error "port me"
1117#endif
1118 return off;
1119}
1120
1121
1122
1123/**
1124 * Emits a store of an immediate value to a 16-bit VCpu field.
1125 *
1126 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
1127 * offset can be encoded as an immediate or not. The @a offVCpu immediate
1128 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
1129 */
1130DECL_FORCE_INLINE_THROW(uint32_t)
1131iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
1132 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
1133{
1134#ifdef RT_ARCH_AMD64
1135 /* mov mem16, imm16 */
1136 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1137 pCodeBuf[off++] = 0xc7;
1138 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1139 pCodeBuf[off++] = RT_BYTE1(uImm);
1140 pCodeBuf[off++] = RT_BYTE2(uImm);
1141 RT_NOREF(idxTmp1, idxTmp2);
1142
1143#elif defined(RT_ARCH_ARM64)
1144 if (idxTmp1 != UINT8_MAX)
1145 {
1146 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
1147 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
1148 sizeof(uint16_t), idxTmp2);
1149 }
1150 else
1151# ifdef IEM_WITH_THROW_CATCH
1152 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
1153# else
1154 AssertReleaseFailedStmt(off = UINT32_MAX);
1155# endif
1156
1157#else
1158# error "port me"
1159#endif
1160 return off;
1161}
1162
1163
1164/**
1165 * Emits a store of an immediate value to a 8-bit VCpu field.
1166 */
1167DECL_INLINE_THROW(uint32_t)
1168iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu,
1169 uint8_t idxRegTmp = UINT8_MAX)
1170{
1171#ifdef RT_ARCH_AMD64
1172 /* mov mem8, imm8 */
1173 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1174 pbCodeBuf[off++] = 0xc6;
1175 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
1176 pbCodeBuf[off++] = bImm;
1177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1178 RT_NOREF(idxRegTmp);
1179
1180#elif defined(RT_ARCH_ARM64)
1181 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
1182 if (idxRegTmp != UINT8_MAX)
1183 {
1184 Assert(idxRegTmp != IEMNATIVE_REG_FIXED_TMP0);
1185 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegTmp, bImm);
1186 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegTmp, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1187 }
1188 else
1189 {
1190 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
1191 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1192 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1193 }
1194
1195#else
1196# error "port me"
1197#endif
1198 return off;
1199}
1200
1201
1202/**
1203 * Emits a load effective address to a GRP of a VCpu field.
1204 */
1205DECL_INLINE_THROW(uint32_t)
1206iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
1207{
1208#ifdef RT_ARCH_AMD64
1209 /* lea gprdst, [rbx + offDisp] */
1210 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1211 if (iGprDst < 8)
1212 pbCodeBuf[off++] = X86_OP_REX_W;
1213 else
1214 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1215 pbCodeBuf[off++] = 0x8d;
1216 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
1217
1218#elif defined(RT_ARCH_ARM64)
1219 if (offVCpu < (unsigned)_4K)
1220 {
1221 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1222 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1223 }
1224 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1225 {
1226 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1227 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1228 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1229 }
1230 else if (offVCpu <= 0xffffffU)
1231 {
1232 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1233 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1234 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1235 if (offVCpu & 0xfffU)
1236 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1237 }
1238 else
1239 {
1240 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1241 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1242 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1243 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1244 }
1245
1246#else
1247# error "port me"
1248#endif
1249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1250 return off;
1251}
1252
1253
1254/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1255DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1256{
1257 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1258 Assert(off < sizeof(VMCPU));
1259 return off;
1260}
1261
1262
1263/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1264DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1265{
1266 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1267 Assert(off < sizeof(VMCPU));
1268 return off;
1269}
1270
1271
1272/**
1273 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1274 *
1275 * @note The two temp registers are not required for AMD64. ARM64 always
1276 * requires the first, and the 2nd is needed if the offset cannot be
1277 * encoded as an immediate.
1278 */
1279DECL_FORCE_INLINE(uint32_t)
1280iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1281{
1282#ifdef RT_ARCH_AMD64
1283 /* inc qword [pVCpu + off] */
1284 pCodeBuf[off++] = X86_OP_REX_W;
1285 pCodeBuf[off++] = 0xff;
1286 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1287 RT_NOREF(idxTmp1, idxTmp2);
1288
1289#elif defined(RT_ARCH_ARM64)
1290 /* Determine how we're to access pVCpu first. */
1291 uint32_t const cbData = sizeof(STAMCOUNTER);
1292 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1293 {
1294 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1295 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1296 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1297 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1298 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1299 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1300 }
1301 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1302 {
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1304 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1305 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1306 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1307 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1308 }
1309 else
1310 {
1311 /* The offset is too large, so we must load it into a register and use
1312 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1313 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1314 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1315 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1316 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1317 }
1318
1319#else
1320# error "port me"
1321#endif
1322 return off;
1323}
1324
1325
1326/**
1327 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1328 *
1329 * @note The two temp registers are not required for AMD64. ARM64 always
1330 * requires the first, and the 2nd is needed if the offset cannot be
1331 * encoded as an immediate.
1332 */
1333DECL_FORCE_INLINE(uint32_t)
1334iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1335{
1336#ifdef RT_ARCH_AMD64
1337 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1338#elif defined(RT_ARCH_ARM64)
1339 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1340#else
1341# error "port me"
1342#endif
1343 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1344 return off;
1345}
1346
1347
1348/**
1349 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1350 *
1351 * @note The two temp registers are not required for AMD64. ARM64 always
1352 * requires the first, and the 2nd is needed if the offset cannot be
1353 * encoded as an immediate.
1354 */
1355DECL_FORCE_INLINE(uint32_t)
1356iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1357{
1358 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1359#ifdef RT_ARCH_AMD64
1360 /* inc dword [pVCpu + offVCpu] */
1361 pCodeBuf[off++] = 0xff;
1362 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1363 RT_NOREF(idxTmp1, idxTmp2);
1364
1365#elif defined(RT_ARCH_ARM64)
1366 /* Determine how we're to access pVCpu first. */
1367 uint32_t const cbData = sizeof(uint32_t);
1368 if (offVCpu < (unsigned)(_4K * cbData))
1369 {
1370 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1371 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1372 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1373 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1374 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1375 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1376 }
1377 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1378 {
1379 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1380 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1381 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1382 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1383 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1384 }
1385 else
1386 {
1387 /* The offset is too large, so we must load it into a register and use
1388 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1389 of the instruction if that'll reduce the constant to 16-bits. */
1390 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1391 {
1392 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1393 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1394 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1395 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1396 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1397 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1398 }
1399 else
1400 {
1401 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1402 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1403 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1404 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1405 }
1406 }
1407
1408#else
1409# error "port me"
1410#endif
1411 return off;
1412}
1413
1414
1415/**
1416 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1417 *
1418 * @note The two temp registers are not required for AMD64. ARM64 always
1419 * requires the first, and the 2nd is needed if the offset cannot be
1420 * encoded as an immediate.
1421 */
1422DECL_FORCE_INLINE(uint32_t)
1423iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1424{
1425#ifdef RT_ARCH_AMD64
1426 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1427#elif defined(RT_ARCH_ARM64)
1428 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1429#else
1430# error "port me"
1431#endif
1432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1433 return off;
1434}
1435
1436
1437/**
1438 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1439 *
1440 * @note May allocate temporary registers (not AMD64).
1441 */
1442DECL_FORCE_INLINE(uint32_t)
1443iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1444{
1445 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1446#ifdef RT_ARCH_AMD64
1447 /* or dword [pVCpu + offVCpu], imm8/32 */
1448 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1449 if (fMask < 0x80)
1450 {
1451 pCodeBuf[off++] = 0x83;
1452 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1453 pCodeBuf[off++] = (uint8_t)fMask;
1454 }
1455 else
1456 {
1457 pCodeBuf[off++] = 0x81;
1458 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1459 pCodeBuf[off++] = RT_BYTE1(fMask);
1460 pCodeBuf[off++] = RT_BYTE2(fMask);
1461 pCodeBuf[off++] = RT_BYTE3(fMask);
1462 pCodeBuf[off++] = RT_BYTE4(fMask);
1463 }
1464
1465#elif defined(RT_ARCH_ARM64)
1466 /* If the constant is unwieldy we'll need a register to hold it as well. */
1467 uint32_t uImmSizeLen, uImmRotate;
1468 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1469 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1470
1471 /* We need a temp register for holding the member value we're modifying. */
1472 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1473
1474 /* Determine how we're to access pVCpu first. */
1475 uint32_t const cbData = sizeof(uint32_t);
1476 if (offVCpu < (unsigned)(_4K * cbData))
1477 {
1478 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1479 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1480 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1481 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1482 if (idxTmpMask == UINT8_MAX)
1483 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1484 else
1485 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1486 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1487 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1488 }
1489 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1490 {
1491 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1492 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1493 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1494 if (idxTmpMask == UINT8_MAX)
1495 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1496 else
1497 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1498 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1499 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1500 }
1501 else
1502 {
1503 /* The offset is too large, so we must load it into a register and use
1504 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1505 of the instruction if that'll reduce the constant to 16-bits. */
1506 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1507 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1508 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1509 if (fShifted)
1510 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1511 else
1512 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1513
1514 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1515 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1516
1517 if (idxTmpMask == UINT8_MAX)
1518 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1519 else
1520 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1521
1522 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1523 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1524 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1525 }
1526 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1527 if (idxTmpMask != UINT8_MAX)
1528 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1529
1530#else
1531# error "port me"
1532#endif
1533 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1534 return off;
1535}
1536
1537
1538/**
1539 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1540 *
1541 * @note May allocate temporary registers (not AMD64).
1542 */
1543DECL_FORCE_INLINE(uint32_t)
1544iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1545{
1546 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1547#ifdef RT_ARCH_AMD64
1548 /* and dword [pVCpu + offVCpu], imm8/32 */
1549 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1550 if (fMask < 0x80)
1551 {
1552 pCodeBuf[off++] = 0x83;
1553 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1554 pCodeBuf[off++] = (uint8_t)fMask;
1555 }
1556 else
1557 {
1558 pCodeBuf[off++] = 0x81;
1559 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1560 pCodeBuf[off++] = RT_BYTE1(fMask);
1561 pCodeBuf[off++] = RT_BYTE2(fMask);
1562 pCodeBuf[off++] = RT_BYTE3(fMask);
1563 pCodeBuf[off++] = RT_BYTE4(fMask);
1564 }
1565
1566#elif defined(RT_ARCH_ARM64)
1567 /* If the constant is unwieldy we'll need a register to hold it as well. */
1568 uint32_t uImmSizeLen, uImmRotate;
1569 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1570 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1571
1572 /* We need a temp register for holding the member value we're modifying. */
1573 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1574
1575 /* Determine how we're to access pVCpu first. */
1576 uint32_t const cbData = sizeof(uint32_t);
1577 if (offVCpu < (unsigned)(_4K * cbData))
1578 {
1579 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1580 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1581 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1582 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1583 if (idxTmpMask == UINT8_MAX)
1584 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1585 else
1586 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1587 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1588 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1589 }
1590 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1591 {
1592 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1593 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1594 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1595 if (idxTmpMask == UINT8_MAX)
1596 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1597 else
1598 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1599 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1600 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1601 }
1602 else
1603 {
1604 /* The offset is too large, so we must load it into a register and use
1605 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1606 of the instruction if that'll reduce the constant to 16-bits. */
1607 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1608 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1609 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1610 if (fShifted)
1611 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1612 else
1613 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1614
1615 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1616 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1617
1618 if (idxTmpMask == UINT8_MAX)
1619 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1620 else
1621 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1622
1623 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1624 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1625 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1626 }
1627 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1628 if (idxTmpMask != UINT8_MAX)
1629 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1630
1631#else
1632# error "port me"
1633#endif
1634 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1635 return off;
1636}
1637
1638
1639/**
1640 * Emits a gprdst = gprsrc load.
1641 */
1642DECL_FORCE_INLINE(uint32_t)
1643iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1644{
1645#ifdef RT_ARCH_AMD64
1646 /* mov gprdst, gprsrc */
1647 if ((iGprDst | iGprSrc) >= 8)
1648 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1649 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1650 : X86_OP_REX_W | X86_OP_REX_R;
1651 else
1652 pCodeBuf[off++] = X86_OP_REX_W;
1653 pCodeBuf[off++] = 0x8b;
1654 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1655
1656#elif defined(RT_ARCH_ARM64)
1657 /* mov dst, src; alias for: orr dst, xzr, src */
1658 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1659
1660#else
1661# error "port me"
1662#endif
1663 return off;
1664}
1665
1666
1667/**
1668 * Emits a gprdst = gprsrc load.
1669 */
1670DECL_INLINE_THROW(uint32_t)
1671iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1672{
1673#ifdef RT_ARCH_AMD64
1674 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1675#elif defined(RT_ARCH_ARM64)
1676 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1677#else
1678# error "port me"
1679#endif
1680 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1681 return off;
1682}
1683
1684
1685/**
1686 * Emits a gprdst = gprsrc[31:0] load.
1687 * @note Bits 63 thru 32 are cleared.
1688 */
1689DECL_FORCE_INLINE(uint32_t)
1690iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1691{
1692#ifdef RT_ARCH_AMD64
1693 /* mov gprdst, gprsrc */
1694 if ((iGprDst | iGprSrc) >= 8)
1695 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1696 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1697 : X86_OP_REX_R;
1698 pCodeBuf[off++] = 0x8b;
1699 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1700
1701#elif defined(RT_ARCH_ARM64)
1702 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1703 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1704
1705#else
1706# error "port me"
1707#endif
1708 return off;
1709}
1710
1711
1712/**
1713 * Emits a gprdst = gprsrc[31:0] load.
1714 * @note Bits 63 thru 32 are cleared.
1715 */
1716DECL_INLINE_THROW(uint32_t)
1717iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1718{
1719#ifdef RT_ARCH_AMD64
1720 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1721#elif defined(RT_ARCH_ARM64)
1722 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1723#else
1724# error "port me"
1725#endif
1726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1727 return off;
1728}
1729
1730
1731/**
1732 * Emits a gprdst = gprsrc[15:0] load.
1733 * @note Bits 63 thru 15 are cleared.
1734 */
1735DECL_INLINE_THROW(uint32_t)
1736iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1737{
1738#ifdef RT_ARCH_AMD64
1739 /* movzx Gv,Ew */
1740 if ((iGprDst | iGprSrc) >= 8)
1741 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1742 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1743 : X86_OP_REX_R;
1744 pCodeBuf[off++] = 0x0f;
1745 pCodeBuf[off++] = 0xb7;
1746 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1747
1748#elif defined(RT_ARCH_ARM64)
1749 /* and gprdst, gprsrc, #0xffff */
1750# if 1
1751 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1752 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1753# else
1754 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1755 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1756# endif
1757
1758#else
1759# error "port me"
1760#endif
1761 return off;
1762}
1763
1764
1765/**
1766 * Emits a gprdst = gprsrc[15:0] load.
1767 * @note Bits 63 thru 15 are cleared.
1768 */
1769DECL_INLINE_THROW(uint32_t)
1770iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1771{
1772#ifdef RT_ARCH_AMD64
1773 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1774#elif defined(RT_ARCH_ARM64)
1775 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1776#else
1777# error "port me"
1778#endif
1779 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1780 return off;
1781}
1782
1783
1784/**
1785 * Emits a gprdst = gprsrc[7:0] load.
1786 * @note Bits 63 thru 8 are cleared.
1787 */
1788DECL_FORCE_INLINE(uint32_t)
1789iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1790{
1791#ifdef RT_ARCH_AMD64
1792 /* movzx Gv,Eb */
1793 if (iGprDst >= 8 || iGprSrc >= 8)
1794 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1795 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1796 : X86_OP_REX_R;
1797 else if (iGprSrc >= 4)
1798 pCodeBuf[off++] = X86_OP_REX;
1799 pCodeBuf[off++] = 0x0f;
1800 pCodeBuf[off++] = 0xb6;
1801 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1802
1803#elif defined(RT_ARCH_ARM64)
1804 /* and gprdst, gprsrc, #0xff */
1805 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1806 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1807
1808#else
1809# error "port me"
1810#endif
1811 return off;
1812}
1813
1814
1815/**
1816 * Emits a gprdst = gprsrc[7:0] load.
1817 * @note Bits 63 thru 8 are cleared.
1818 */
1819DECL_INLINE_THROW(uint32_t)
1820iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1821{
1822#ifdef RT_ARCH_AMD64
1823 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1824#elif defined(RT_ARCH_ARM64)
1825 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1826#else
1827# error "port me"
1828#endif
1829 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1830 return off;
1831}
1832
1833
1834/**
1835 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1836 * @note Bits 63 thru 8 are cleared.
1837 */
1838DECL_INLINE_THROW(uint32_t)
1839iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1840{
1841#ifdef RT_ARCH_AMD64
1842 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1843
1844 /* movzx Gv,Ew */
1845 if ((iGprDst | iGprSrc) >= 8)
1846 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1847 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1848 : X86_OP_REX_R;
1849 pbCodeBuf[off++] = 0x0f;
1850 pbCodeBuf[off++] = 0xb7;
1851 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1852
1853 /* shr Ev,8 */
1854 if (iGprDst >= 8)
1855 pbCodeBuf[off++] = X86_OP_REX_B;
1856 pbCodeBuf[off++] = 0xc1;
1857 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1858 pbCodeBuf[off++] = 8;
1859
1860#elif defined(RT_ARCH_ARM64)
1861 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1862 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1863 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1864
1865#else
1866# error "port me"
1867#endif
1868 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1869 return off;
1870}
1871
1872
1873/**
1874 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1875 */
1876DECL_INLINE_THROW(uint32_t)
1877iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1878{
1879#ifdef RT_ARCH_AMD64
1880 /* movsxd r64, r/m32 */
1881 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1882 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1883 pbCodeBuf[off++] = 0x63;
1884 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1885
1886#elif defined(RT_ARCH_ARM64)
1887 /* sxtw dst, src */
1888 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1889 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1890
1891#else
1892# error "port me"
1893#endif
1894 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1895 return off;
1896}
1897
1898
1899/**
1900 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1901 */
1902DECL_INLINE_THROW(uint32_t)
1903iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1904{
1905#ifdef RT_ARCH_AMD64
1906 /* movsx r64, r/m16 */
1907 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1908 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1909 pbCodeBuf[off++] = 0x0f;
1910 pbCodeBuf[off++] = 0xbf;
1911 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1912
1913#elif defined(RT_ARCH_ARM64)
1914 /* sxth dst, src */
1915 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1916 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1917
1918#else
1919# error "port me"
1920#endif
1921 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1922 return off;
1923}
1924
1925
1926/**
1927 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1928 */
1929DECL_INLINE_THROW(uint32_t)
1930iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1931{
1932#ifdef RT_ARCH_AMD64
1933 /* movsx r64, r/m16 */
1934 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1935 if (iGprDst >= 8 || iGprSrc >= 8)
1936 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1937 pbCodeBuf[off++] = 0x0f;
1938 pbCodeBuf[off++] = 0xbf;
1939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1940
1941#elif defined(RT_ARCH_ARM64)
1942 /* sxth dst32, src */
1943 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1944 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1945
1946#else
1947# error "port me"
1948#endif
1949 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1950 return off;
1951}
1952
1953
1954/**
1955 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1956 */
1957DECL_INLINE_THROW(uint32_t)
1958iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1959{
1960#ifdef RT_ARCH_AMD64
1961 /* movsx r64, r/m8 */
1962 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1963 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1964 pbCodeBuf[off++] = 0x0f;
1965 pbCodeBuf[off++] = 0xbe;
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1967
1968#elif defined(RT_ARCH_ARM64)
1969 /* sxtb dst, src */
1970 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1971 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1972
1973#else
1974# error "port me"
1975#endif
1976 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1977 return off;
1978}
1979
1980
1981/**
1982 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1983 * @note Bits 63 thru 32 are cleared.
1984 */
1985DECL_INLINE_THROW(uint32_t)
1986iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1987{
1988#ifdef RT_ARCH_AMD64
1989 /* movsx r32, r/m8 */
1990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1991 if (iGprDst >= 8 || iGprSrc >= 8)
1992 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1993 else if (iGprSrc >= 4)
1994 pbCodeBuf[off++] = X86_OP_REX;
1995 pbCodeBuf[off++] = 0x0f;
1996 pbCodeBuf[off++] = 0xbe;
1997 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1998
1999#elif defined(RT_ARCH_ARM64)
2000 /* sxtb dst32, src32 */
2001 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2002 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
2003
2004#else
2005# error "port me"
2006#endif
2007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2008 return off;
2009}
2010
2011
2012/**
2013 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
2014 * @note Bits 63 thru 16 are cleared.
2015 */
2016DECL_INLINE_THROW(uint32_t)
2017iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
2018{
2019#ifdef RT_ARCH_AMD64
2020 /* movsx r16, r/m8 */
2021 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2022 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2023 if (iGprDst >= 8 || iGprSrc >= 8)
2024 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
2025 else if (iGprSrc >= 4)
2026 pbCodeBuf[off++] = X86_OP_REX;
2027 pbCodeBuf[off++] = 0x0f;
2028 pbCodeBuf[off++] = 0xbe;
2029 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
2030
2031 /* movzx r32, r/m16 */
2032 if (iGprDst >= 8)
2033 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
2034 pbCodeBuf[off++] = 0x0f;
2035 pbCodeBuf[off++] = 0xb7;
2036 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2037
2038#elif defined(RT_ARCH_ARM64)
2039 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
2040 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2041 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
2042 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2043 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
2044
2045#else
2046# error "port me"
2047#endif
2048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2049 return off;
2050}
2051
2052
2053/**
2054 * Emits a gprdst = gprsrc + addend load.
2055 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
2056 */
2057#ifdef RT_ARCH_AMD64
2058DECL_INLINE_THROW(uint32_t)
2059iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2060 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2061{
2062 Assert(iAddend != 0);
2063
2064 /* lea gprdst, [gprsrc + iAddend] */
2065 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2066 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2067 pbCodeBuf[off++] = 0x8d;
2068 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2069 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2070 return off;
2071}
2072
2073#elif defined(RT_ARCH_ARM64)
2074DECL_INLINE_THROW(uint32_t)
2075iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2076 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2077{
2078 if ((uint32_t)iAddend < 4096)
2079 {
2080 /* add dst, src, uimm12 */
2081 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2082 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
2083 }
2084 else if ((uint32_t)-iAddend < 4096)
2085 {
2086 /* sub dst, src, uimm12 */
2087 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2088 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
2089 }
2090 else
2091 {
2092 Assert(iGprSrc != iGprDst);
2093 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
2094 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2095 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
2096 }
2097 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2098 return off;
2099}
2100#else
2101# error "port me"
2102#endif
2103
2104/**
2105 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
2106 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
2107 */
2108#ifdef RT_ARCH_AMD64
2109DECL_INLINE_THROW(uint32_t)
2110iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2111 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2112#else
2113DECL_INLINE_THROW(uint32_t)
2114iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2115 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2116#endif
2117{
2118 if (iAddend != 0)
2119 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2120 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
2121}
2122
2123
2124/**
2125 * Emits a gprdst = gprsrc32 + addend load.
2126 * @note Bits 63 thru 32 are cleared.
2127 */
2128DECL_INLINE_THROW(uint32_t)
2129iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2130 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2131{
2132 Assert(iAddend != 0);
2133
2134#ifdef RT_ARCH_AMD64
2135 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
2136 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2137 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
2138 if ((iGprDst | iGprSrc) >= 8)
2139 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2140 pbCodeBuf[off++] = 0x8d;
2141 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2142
2143#elif defined(RT_ARCH_ARM64)
2144 if ((uint32_t)iAddend < 4096)
2145 {
2146 /* add dst, src, uimm12 */
2147 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2148 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
2149 }
2150 else if ((uint32_t)-iAddend < 4096)
2151 {
2152 /* sub dst, src, uimm12 */
2153 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2154 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
2155 }
2156 else
2157 {
2158 Assert(iGprSrc != iGprDst);
2159 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
2160 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2161 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
2162 }
2163
2164#else
2165# error "port me"
2166#endif
2167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2168 return off;
2169}
2170
2171
2172/**
2173 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
2174 */
2175DECL_INLINE_THROW(uint32_t)
2176iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2177 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2178{
2179 if (iAddend != 0)
2180 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2181 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
2182}
2183
2184
2185/**
2186 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2187 * destination.
2188 */
2189DECL_FORCE_INLINE(uint32_t)
2190iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2191{
2192#ifdef RT_ARCH_AMD64
2193 /* mov reg16, r/m16 */
2194 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2195 if (idxDst >= 8 || idxSrc >= 8)
2196 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
2197 pCodeBuf[off++] = 0x8b;
2198 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
2199
2200#elif defined(RT_ARCH_ARM64)
2201 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
2202 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
2203
2204#else
2205# error "Port me!"
2206#endif
2207 return off;
2208}
2209
2210
2211/**
2212 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2213 * destination.
2214 */
2215DECL_INLINE_THROW(uint32_t)
2216iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2217{
2218#ifdef RT_ARCH_AMD64
2219 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2220#elif defined(RT_ARCH_ARM64)
2221 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2222#else
2223# error "Port me!"
2224#endif
2225 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2226 return off;
2227}
2228
2229
2230#ifdef RT_ARCH_AMD64
2231/**
2232 * Common bit of iemNativeEmitLoadGprByBp and friends.
2233 */
2234DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2235 PIEMRECOMPILERSTATE pReNativeAssert)
2236{
2237 if (offDisp < 128 && offDisp >= -128)
2238 {
2239 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2240 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2241 }
2242 else
2243 {
2244 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2245 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2246 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2247 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2248 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2249 }
2250 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2251 return off;
2252}
2253#elif defined(RT_ARCH_ARM64)
2254/**
2255 * Common bit of iemNativeEmitLoadGprByBp and friends.
2256 */
2257DECL_FORCE_INLINE_THROW(uint32_t)
2258iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2259 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2260{
2261 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2262 {
2263 /* str w/ unsigned imm12 (scaled) */
2264 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2265 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2266 }
2267 else if (offDisp >= -256 && offDisp <= 256)
2268 {
2269 /* stur w/ signed imm9 (unscaled) */
2270 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2271 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2272 }
2273 else
2274 {
2275 /* Use temporary indexing register. */
2276 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2277 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2278 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2279 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2280 }
2281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2282 return off;
2283}
2284#endif
2285
2286
2287/**
2288 * Emits a 64-bit GRP load instruction with an BP relative source address.
2289 */
2290DECL_INLINE_THROW(uint32_t)
2291iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2292{
2293#ifdef RT_ARCH_AMD64
2294 /* mov gprdst, qword [rbp + offDisp] */
2295 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2296 if (iGprDst < 8)
2297 pbCodeBuf[off++] = X86_OP_REX_W;
2298 else
2299 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2300 pbCodeBuf[off++] = 0x8b;
2301 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2302
2303#elif defined(RT_ARCH_ARM64)
2304 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2305
2306#else
2307# error "port me"
2308#endif
2309}
2310
2311
2312/**
2313 * Emits a 32-bit GRP load instruction with an BP relative source address.
2314 * @note Bits 63 thru 32 of the GPR will be cleared.
2315 */
2316DECL_INLINE_THROW(uint32_t)
2317iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2318{
2319#ifdef RT_ARCH_AMD64
2320 /* mov gprdst, dword [rbp + offDisp] */
2321 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2322 if (iGprDst >= 8)
2323 pbCodeBuf[off++] = X86_OP_REX_R;
2324 pbCodeBuf[off++] = 0x8b;
2325 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2326
2327#elif defined(RT_ARCH_ARM64)
2328 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2329
2330#else
2331# error "port me"
2332#endif
2333}
2334
2335
2336/**
2337 * Emits a 16-bit GRP load instruction with an BP relative source address.
2338 * @note Bits 63 thru 16 of the GPR will be cleared.
2339 */
2340DECL_INLINE_THROW(uint32_t)
2341iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2342{
2343#ifdef RT_ARCH_AMD64
2344 /* movzx gprdst, word [rbp + offDisp] */
2345 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2346 if (iGprDst >= 8)
2347 pbCodeBuf[off++] = X86_OP_REX_R;
2348 pbCodeBuf[off++] = 0x0f;
2349 pbCodeBuf[off++] = 0xb7;
2350 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2351
2352#elif defined(RT_ARCH_ARM64)
2353 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2354
2355#else
2356# error "port me"
2357#endif
2358}
2359
2360
2361/**
2362 * Emits a 8-bit GRP load instruction with an BP relative source address.
2363 * @note Bits 63 thru 8 of the GPR will be cleared.
2364 */
2365DECL_INLINE_THROW(uint32_t)
2366iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2367{
2368#ifdef RT_ARCH_AMD64
2369 /* movzx gprdst, byte [rbp + offDisp] */
2370 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2371 if (iGprDst >= 8)
2372 pbCodeBuf[off++] = X86_OP_REX_R;
2373 pbCodeBuf[off++] = 0x0f;
2374 pbCodeBuf[off++] = 0xb6;
2375 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2376
2377#elif defined(RT_ARCH_ARM64)
2378 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2379
2380#else
2381# error "port me"
2382#endif
2383}
2384
2385
2386/**
2387 * Emits a 128-bit vector register load instruction with an BP relative source address.
2388 */
2389DECL_FORCE_INLINE_THROW(uint32_t)
2390iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2391{
2392#ifdef RT_ARCH_AMD64
2393 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2394
2395 /* movdqu reg128, mem128 */
2396 pbCodeBuf[off++] = 0xf3;
2397 if (iVecRegDst >= 8)
2398 pbCodeBuf[off++] = X86_OP_REX_R;
2399 pbCodeBuf[off++] = 0x0f;
2400 pbCodeBuf[off++] = 0x6f;
2401 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2402#elif defined(RT_ARCH_ARM64)
2403 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2404#else
2405# error "port me"
2406#endif
2407}
2408
2409
2410/**
2411 * Emits a 256-bit vector register load instruction with an BP relative source address.
2412 */
2413DECL_FORCE_INLINE_THROW(uint32_t)
2414iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2415{
2416#ifdef RT_ARCH_AMD64
2417 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2418
2419 /* vmovdqu reg256, mem256 */
2420 pbCodeBuf[off++] = X86_OP_VEX2;
2421 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2422 pbCodeBuf[off++] = 0x6f;
2423 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2424#elif defined(RT_ARCH_ARM64)
2425 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2426 Assert(!(iVecRegDst & 0x1));
2427 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2428 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2429#else
2430# error "port me"
2431#endif
2432}
2433
2434
2435/**
2436 * Emits a load effective address to a GRP with an BP relative source address.
2437 */
2438DECL_INLINE_THROW(uint32_t)
2439iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2440{
2441#ifdef RT_ARCH_AMD64
2442 /* lea gprdst, [rbp + offDisp] */
2443 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2444 if (iGprDst < 8)
2445 pbCodeBuf[off++] = X86_OP_REX_W;
2446 else
2447 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2448 pbCodeBuf[off++] = 0x8d;
2449 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2450
2451#elif defined(RT_ARCH_ARM64)
2452 bool const fSub = offDisp < 0;
2453 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2454 if (offAbsDisp <= 0xffffffU)
2455 {
2456 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2457 if (offAbsDisp <= 0xfffU)
2458 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2459 else
2460 {
2461 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2462 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2463 if (offAbsDisp & 0xfffU)
2464 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2465 }
2466 }
2467 else
2468 {
2469 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2470 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2471 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2472 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2473 }
2474
2475#else
2476# error "port me"
2477#endif
2478
2479 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2480 return off;
2481}
2482
2483
2484/**
2485 * Emits a 64-bit GPR store with an BP relative destination address.
2486 *
2487 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2488 */
2489DECL_INLINE_THROW(uint32_t)
2490iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2491{
2492#ifdef RT_ARCH_AMD64
2493 /* mov qword [rbp + offDisp], gprdst */
2494 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2495 if (iGprSrc < 8)
2496 pbCodeBuf[off++] = X86_OP_REX_W;
2497 else
2498 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2499 pbCodeBuf[off++] = 0x89;
2500 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2501
2502#elif defined(RT_ARCH_ARM64)
2503 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2504 {
2505 /* str w/ unsigned imm12 (scaled) */
2506 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2507 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2508 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2509 }
2510 else if (offDisp >= -256 && offDisp <= 256)
2511 {
2512 /* stur w/ signed imm9 (unscaled) */
2513 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2514 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2515 }
2516 else if ((uint32_t)-offDisp < (unsigned)_4K)
2517 {
2518 /* Use temporary indexing register w/ sub uimm12. */
2519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2520 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2521 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2522 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2523 }
2524 else
2525 {
2526 /* Use temporary indexing register. */
2527 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2528 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2529 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2530 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2531 }
2532 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2533 return off;
2534
2535#else
2536# error "Port me!"
2537#endif
2538}
2539
2540
2541/**
2542 * Emits a 64-bit immediate store with an BP relative destination address.
2543 *
2544 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2545 */
2546DECL_INLINE_THROW(uint32_t)
2547iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2548{
2549#ifdef RT_ARCH_AMD64
2550 if ((int64_t)uImm64 == (int32_t)uImm64)
2551 {
2552 /* mov qword [rbp + offDisp], imm32 - sign extended */
2553 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2554 pbCodeBuf[off++] = X86_OP_REX_W;
2555 pbCodeBuf[off++] = 0xc7;
2556 if (offDisp < 128 && offDisp >= -128)
2557 {
2558 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2559 pbCodeBuf[off++] = (uint8_t)offDisp;
2560 }
2561 else
2562 {
2563 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2564 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2565 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2566 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2567 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2568 }
2569 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2570 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2571 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2572 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2574 return off;
2575 }
2576#endif
2577
2578 /* Load tmp0, imm64; Store tmp to bp+disp. */
2579 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2580 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2581}
2582
2583
2584/**
2585 * Emits a 128-bit vector register store with an BP relative destination address.
2586 *
2587 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2588 */
2589DECL_INLINE_THROW(uint32_t)
2590iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2591{
2592#ifdef RT_ARCH_AMD64
2593 /* movdqu [rbp + offDisp], vecsrc */
2594 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2595 pbCodeBuf[off++] = 0xf3;
2596 if (iVecRegSrc >= 8)
2597 pbCodeBuf[off++] = X86_OP_REX_R;
2598 pbCodeBuf[off++] = 0x0f;
2599 pbCodeBuf[off++] = 0x7f;
2600 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2601
2602#elif defined(RT_ARCH_ARM64)
2603 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2604 {
2605 /* str w/ unsigned imm12 (scaled) */
2606 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2607 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2608 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2609 }
2610 else if (offDisp >= -256 && offDisp <= 256)
2611 {
2612 /* stur w/ signed imm9 (unscaled) */
2613 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2614 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2615 }
2616 else if ((uint32_t)-offDisp < (unsigned)_4K)
2617 {
2618 /* Use temporary indexing register w/ sub uimm12. */
2619 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2620 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2621 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2622 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2623 }
2624 else
2625 {
2626 /* Use temporary indexing register. */
2627 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2628 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2629 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2630 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2631 }
2632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2633 return off;
2634
2635#else
2636# error "Port me!"
2637#endif
2638}
2639
2640
2641/**
2642 * Emits a 256-bit vector register store with an BP relative destination address.
2643 *
2644 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2645 */
2646DECL_INLINE_THROW(uint32_t)
2647iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2648{
2649#ifdef RT_ARCH_AMD64
2650 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2651
2652 /* vmovdqu mem256, reg256 */
2653 pbCodeBuf[off++] = X86_OP_VEX2;
2654 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2655 pbCodeBuf[off++] = 0x7f;
2656 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2657#elif defined(RT_ARCH_ARM64)
2658 Assert(!(iVecRegSrc & 0x1));
2659 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2660 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2661#else
2662# error "Port me!"
2663#endif
2664}
2665
2666#if defined(RT_ARCH_ARM64)
2667
2668/**
2669 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2670 *
2671 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2672 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2673 * caller does not heed this.
2674 *
2675 * @note DON'T try this with prefetch.
2676 */
2677DECL_FORCE_INLINE_THROW(uint32_t)
2678iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2679 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2680{
2681 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2682 {
2683 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2684 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2685 }
2686 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2687 && iGprReg != iGprBase)
2688 || iGprTmp != UINT8_MAX)
2689 {
2690 /* The offset is too large, so we must load it into a register and use
2691 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2692 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2693 if (iGprTmp == UINT8_MAX)
2694 iGprTmp = iGprReg;
2695 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2696 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2697 }
2698 else
2699# ifdef IEM_WITH_THROW_CATCH
2700 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2701# else
2702 AssertReleaseFailedStmt(off = UINT32_MAX);
2703# endif
2704 return off;
2705}
2706
2707/**
2708 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2709 */
2710DECL_FORCE_INLINE_THROW(uint32_t)
2711iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2712 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2713{
2714 /*
2715 * There are a couple of ldr variants that takes an immediate offset, so
2716 * try use those if we can, otherwise we have to use the temporary register
2717 * help with the addressing.
2718 */
2719 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2720 {
2721 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2722 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2723 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2724 }
2725 else
2726 {
2727 /* The offset is too large, so we must load it into a register and use
2728 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2729 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2730 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2731
2732 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2733 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2734
2735 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2736 }
2737 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2738 return off;
2739}
2740
2741/**
2742 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2743 *
2744 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2745 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2746 * caller does not heed this.
2747 *
2748 * @note DON'T try this with prefetch.
2749 */
2750DECL_FORCE_INLINE_THROW(uint32_t)
2751iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2752 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2753{
2754 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2755 {
2756 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2757 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2758 }
2759 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2760 || iGprTmp != UINT8_MAX)
2761 {
2762 /* The offset is too large, so we must load it into a register and use
2763 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2764 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2765 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2766 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2767 }
2768 else
2769# ifdef IEM_WITH_THROW_CATCH
2770 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2771# else
2772 AssertReleaseFailedStmt(off = UINT32_MAX);
2773# endif
2774 return off;
2775}
2776
2777
2778/**
2779 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2780 */
2781DECL_FORCE_INLINE_THROW(uint32_t)
2782iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2783 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2784{
2785 /*
2786 * There are a couple of ldr variants that takes an immediate offset, so
2787 * try use those if we can, otherwise we have to use the temporary register
2788 * help with the addressing.
2789 */
2790 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2791 {
2792 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2793 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2794 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2795 }
2796 else
2797 {
2798 /* The offset is too large, so we must load it into a register and use
2799 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2800 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2801 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2802
2803 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2804 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2805
2806 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2807 }
2808 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2809 return off;
2810}
2811#endif /* RT_ARCH_ARM64 */
2812
2813/**
2814 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2815 *
2816 * @note ARM64: Misaligned @a offDisp values and values not in the
2817 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2818 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2819 * does not heed this.
2820 */
2821DECL_FORCE_INLINE_THROW(uint32_t)
2822iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2823 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2824{
2825#ifdef RT_ARCH_AMD64
2826 /* mov reg64, mem64 */
2827 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2828 pCodeBuf[off++] = 0x8b;
2829 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2830 RT_NOREF(iGprTmp);
2831
2832#elif defined(RT_ARCH_ARM64)
2833 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2834 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2835
2836#else
2837# error "port me"
2838#endif
2839 return off;
2840}
2841
2842
2843/**
2844 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2845 */
2846DECL_INLINE_THROW(uint32_t)
2847iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2848{
2849#ifdef RT_ARCH_AMD64
2850 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2851 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2852
2853#elif defined(RT_ARCH_ARM64)
2854 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2855
2856#else
2857# error "port me"
2858#endif
2859 return off;
2860}
2861
2862
2863/**
2864 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2865 *
2866 * @note ARM64: Misaligned @a offDisp values and values not in the
2867 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2868 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2869 * caller does not heed this.
2870 *
2871 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2872 */
2873DECL_FORCE_INLINE_THROW(uint32_t)
2874iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2875 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2876{
2877#ifdef RT_ARCH_AMD64
2878 /* mov reg32, mem32 */
2879 if (iGprDst >= 8 || iGprBase >= 8)
2880 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2881 pCodeBuf[off++] = 0x8b;
2882 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2883 RT_NOREF(iGprTmp);
2884
2885#elif defined(RT_ARCH_ARM64)
2886 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2887 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2888
2889#else
2890# error "port me"
2891#endif
2892 return off;
2893}
2894
2895
2896/**
2897 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2898 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2899 */
2900DECL_INLINE_THROW(uint32_t)
2901iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2902{
2903#ifdef RT_ARCH_AMD64
2904 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2906
2907#elif defined(RT_ARCH_ARM64)
2908 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2909
2910#else
2911# error "port me"
2912#endif
2913 return off;
2914}
2915
2916
2917/**
2918 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2919 * sign-extending the value to 64 bits.
2920 *
2921 * @note ARM64: Misaligned @a offDisp values and values not in the
2922 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2923 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2924 * caller does not heed this.
2925 */
2926DECL_FORCE_INLINE_THROW(uint32_t)
2927iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2928 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2929{
2930#ifdef RT_ARCH_AMD64
2931 /* movsxd reg64, mem32 */
2932 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2933 pCodeBuf[off++] = 0x63;
2934 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2935 RT_NOREF(iGprTmp);
2936
2937#elif defined(RT_ARCH_ARM64)
2938 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2939 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2940
2941#else
2942# error "port me"
2943#endif
2944 return off;
2945}
2946
2947
2948/**
2949 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2950 *
2951 * @note ARM64: Misaligned @a offDisp values and values not in the
2952 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2953 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2954 * caller does not heed this.
2955 *
2956 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2957 */
2958DECL_FORCE_INLINE_THROW(uint32_t)
2959iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2960 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2961{
2962#ifdef RT_ARCH_AMD64
2963 /* movzx reg32, mem16 */
2964 if (iGprDst >= 8 || iGprBase >= 8)
2965 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2966 pCodeBuf[off++] = 0x0f;
2967 pCodeBuf[off++] = 0xb7;
2968 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2969 RT_NOREF(iGprTmp);
2970
2971#elif defined(RT_ARCH_ARM64)
2972 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2973 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2984 * sign-extending the value to 64 bits.
2985 *
2986 * @note ARM64: Misaligned @a offDisp values and values not in the
2987 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2988 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2989 * caller does not heed this.
2990 */
2991DECL_FORCE_INLINE_THROW(uint32_t)
2992iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2993 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2994{
2995#ifdef RT_ARCH_AMD64
2996 /* movsx reg64, mem16 */
2997 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2998 pCodeBuf[off++] = 0x0f;
2999 pCodeBuf[off++] = 0xbf;
3000 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3001 RT_NOREF(iGprTmp);
3002
3003#elif defined(RT_ARCH_ARM64)
3004 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3005 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
3006
3007#else
3008# error "port me"
3009#endif
3010 return off;
3011}
3012
3013
3014/**
3015 * Emits a 16-bit GPR load via a GPR base address with a displacement,
3016 * sign-extending the value to 32 bits.
3017 *
3018 * @note ARM64: Misaligned @a offDisp values and values not in the
3019 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
3020 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
3021 * caller does not heed this.
3022 *
3023 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3024 */
3025DECL_FORCE_INLINE_THROW(uint32_t)
3026iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3027 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3028{
3029#ifdef RT_ARCH_AMD64
3030 /* movsx reg32, mem16 */
3031 if (iGprDst >= 8 || iGprBase >= 8)
3032 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3033 pCodeBuf[off++] = 0x0f;
3034 pCodeBuf[off++] = 0xbf;
3035 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3036 RT_NOREF(iGprTmp);
3037
3038#elif defined(RT_ARCH_ARM64)
3039 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3040 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
3041
3042#else
3043# error "port me"
3044#endif
3045 return off;
3046}
3047
3048
3049/**
3050 * Emits a 8-bit GPR load via a GPR base address with a displacement.
3051 *
3052 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3053 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3054 * same. Will assert / throw if caller does not heed this.
3055 *
3056 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
3057 */
3058DECL_FORCE_INLINE_THROW(uint32_t)
3059iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3060 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3061{
3062#ifdef RT_ARCH_AMD64
3063 /* movzx reg32, mem8 */
3064 if (iGprDst >= 8 || iGprBase >= 8)
3065 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3066 pCodeBuf[off++] = 0x0f;
3067 pCodeBuf[off++] = 0xb6;
3068 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3069 RT_NOREF(iGprTmp);
3070
3071#elif defined(RT_ARCH_ARM64)
3072 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3073 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
3074
3075#else
3076# error "port me"
3077#endif
3078 return off;
3079}
3080
3081
3082/**
3083 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3084 * sign-extending the value to 64 bits.
3085 *
3086 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3087 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3088 * same. Will assert / throw if caller does not heed this.
3089 */
3090DECL_FORCE_INLINE_THROW(uint32_t)
3091iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3092 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3093{
3094#ifdef RT_ARCH_AMD64
3095 /* movsx reg64, mem8 */
3096 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3097 pCodeBuf[off++] = 0x0f;
3098 pCodeBuf[off++] = 0xbe;
3099 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3100 RT_NOREF(iGprTmp);
3101
3102#elif defined(RT_ARCH_ARM64)
3103 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3104 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
3105
3106#else
3107# error "port me"
3108#endif
3109 return off;
3110}
3111
3112
3113/**
3114 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3115 * sign-extending the value to 32 bits.
3116 *
3117 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3118 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3119 * same. Will assert / throw if caller does not heed this.
3120 *
3121 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3122 */
3123DECL_FORCE_INLINE_THROW(uint32_t)
3124iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3125 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3126{
3127#ifdef RT_ARCH_AMD64
3128 /* movsx reg32, mem8 */
3129 if (iGprDst >= 8 || iGprBase >= 8)
3130 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3131 pCodeBuf[off++] = 0x0f;
3132 pCodeBuf[off++] = 0xbe;
3133 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3134 RT_NOREF(iGprTmp);
3135
3136#elif defined(RT_ARCH_ARM64)
3137 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3138 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3139
3140#else
3141# error "port me"
3142#endif
3143 return off;
3144}
3145
3146
3147/**
3148 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3149 * sign-extending the value to 16 bits.
3150 *
3151 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3152 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3153 * same. Will assert / throw if caller does not heed this.
3154 *
3155 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
3156 */
3157DECL_FORCE_INLINE_THROW(uint32_t)
3158iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3159 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3160{
3161#ifdef RT_ARCH_AMD64
3162 /* movsx reg32, mem8 */
3163 if (iGprDst >= 8 || iGprBase >= 8)
3164 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3165 pCodeBuf[off++] = 0x0f;
3166 pCodeBuf[off++] = 0xbe;
3167 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3168# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
3169 /* and reg32, 0xffffh */
3170 if (iGprDst >= 8)
3171 pCodeBuf[off++] = X86_OP_REX_B;
3172 pCodeBuf[off++] = 0x81;
3173 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
3174 pCodeBuf[off++] = 0xff;
3175 pCodeBuf[off++] = 0xff;
3176 pCodeBuf[off++] = 0;
3177 pCodeBuf[off++] = 0;
3178# else
3179 /* movzx reg32, reg16 */
3180 if (iGprDst >= 8)
3181 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
3182 pCodeBuf[off++] = 0x0f;
3183 pCodeBuf[off++] = 0xb7;
3184 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
3185# endif
3186 RT_NOREF(iGprTmp);
3187
3188#elif defined(RT_ARCH_ARM64)
3189 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3190 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3191 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
3192 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
3193
3194#else
3195# error "port me"
3196#endif
3197 return off;
3198}
3199
3200
3201/**
3202 * Emits a 128-bit vector register load via a GPR base address with a displacement.
3203 *
3204 * @note ARM64: Misaligned @a offDisp values and values not in the
3205 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3206 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3207 * does not heed this.
3208 */
3209DECL_FORCE_INLINE_THROW(uint32_t)
3210iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3211 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3212{
3213#ifdef RT_ARCH_AMD64
3214 /* movdqu reg128, mem128 */
3215 pCodeBuf[off++] = 0xf3;
3216 if (iVecRegDst >= 8 || iGprBase >= 8)
3217 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3218 pCodeBuf[off++] = 0x0f;
3219 pCodeBuf[off++] = 0x6f;
3220 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3221 RT_NOREF(iGprTmp);
3222
3223#elif defined(RT_ARCH_ARM64)
3224 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3225 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3226
3227#else
3228# error "port me"
3229#endif
3230 return off;
3231}
3232
3233
3234/**
3235 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3236 */
3237DECL_INLINE_THROW(uint32_t)
3238iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3239{
3240#ifdef RT_ARCH_AMD64
3241 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3242 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3243
3244#elif defined(RT_ARCH_ARM64)
3245 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3246
3247#else
3248# error "port me"
3249#endif
3250 return off;
3251}
3252
3253
3254/**
3255 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3256 *
3257 * @note ARM64: Misaligned @a offDisp values and values not in the
3258 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3259 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3260 * does not heed this.
3261 */
3262DECL_FORCE_INLINE_THROW(uint32_t)
3263iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3264 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3265{
3266#ifdef RT_ARCH_AMD64
3267 /* vmovdqu reg256, mem256 */
3268 pCodeBuf[off++] = X86_OP_VEX3;
3269 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3270 | X86_OP_VEX3_BYTE1_X
3271 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3272 | UINT8_C(0x01);
3273 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3274 pCodeBuf[off++] = 0x6f;
3275 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3276 RT_NOREF(iGprTmp);
3277
3278#elif defined(RT_ARCH_ARM64)
3279 Assert(!(iVecRegDst & 0x1));
3280 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3281 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3282 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3283 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3284#else
3285# error "port me"
3286#endif
3287 return off;
3288}
3289
3290
3291/**
3292 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3293 */
3294DECL_INLINE_THROW(uint32_t)
3295iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3296{
3297#ifdef RT_ARCH_AMD64
3298 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3300
3301#elif defined(RT_ARCH_ARM64)
3302 Assert(!(iVecRegDst & 0x1));
3303 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3304 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3305 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3306 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3307
3308#else
3309# error "port me"
3310#endif
3311 return off;
3312}
3313
3314
3315/**
3316 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3317 *
3318 * @note ARM64: Misaligned @a offDisp values and values not in the
3319 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3320 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3321 * does not heed this.
3322 */
3323DECL_FORCE_INLINE_THROW(uint32_t)
3324iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3325 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3326{
3327#ifdef RT_ARCH_AMD64
3328 /* mov mem64, reg64 */
3329 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3330 pCodeBuf[off++] = 0x89;
3331 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3332 RT_NOREF(iGprTmp);
3333
3334#elif defined(RT_ARCH_ARM64)
3335 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3336 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3337
3338#else
3339# error "port me"
3340#endif
3341 return off;
3342}
3343
3344
3345/**
3346 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3347 *
3348 * @note ARM64: Misaligned @a offDisp values and values not in the
3349 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3350 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3351 * does not heed this.
3352 */
3353DECL_FORCE_INLINE_THROW(uint32_t)
3354iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3355 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3356{
3357#ifdef RT_ARCH_AMD64
3358 /* mov mem32, reg32 */
3359 if (iGprSrc >= 8 || iGprBase >= 8)
3360 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3361 pCodeBuf[off++] = 0x89;
3362 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3363 RT_NOREF(iGprTmp);
3364
3365#elif defined(RT_ARCH_ARM64)
3366 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3367 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3368
3369#else
3370# error "port me"
3371#endif
3372 return off;
3373}
3374
3375
3376/**
3377 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3378 *
3379 * @note ARM64: Misaligned @a offDisp values and values not in the
3380 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3381 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3382 * does not heed this.
3383 */
3384DECL_FORCE_INLINE_THROW(uint32_t)
3385iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3386 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3387{
3388#ifdef RT_ARCH_AMD64
3389 /* mov mem16, reg16 */
3390 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3391 if (iGprSrc >= 8 || iGprBase >= 8)
3392 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3393 pCodeBuf[off++] = 0x89;
3394 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3395 RT_NOREF(iGprTmp);
3396
3397#elif defined(RT_ARCH_ARM64)
3398 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3399 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3400
3401#else
3402# error "port me"
3403#endif
3404 return off;
3405}
3406
3407
3408/**
3409 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3410 *
3411 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3412 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3413 * same. Will assert / throw if caller does not heed this.
3414 */
3415DECL_FORCE_INLINE_THROW(uint32_t)
3416iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3417 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3418{
3419#ifdef RT_ARCH_AMD64
3420 /* mov mem8, reg8 */
3421 if (iGprSrc >= 8 || iGprBase >= 8)
3422 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3423 else if (iGprSrc >= 4)
3424 pCodeBuf[off++] = X86_OP_REX;
3425 pCodeBuf[off++] = 0x88;
3426 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3427 RT_NOREF(iGprTmp);
3428
3429#elif defined(RT_ARCH_ARM64)
3430 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3431 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3432
3433#else
3434# error "port me"
3435#endif
3436 return off;
3437}
3438
3439
3440/**
3441 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3442 *
3443 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3444 * AMD64 it depends on the immediate value.
3445 *
3446 * @note ARM64: Misaligned @a offDisp values and values not in the
3447 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3448 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3449 * does not heed this.
3450 */
3451DECL_FORCE_INLINE_THROW(uint32_t)
3452iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3453 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3454{
3455#ifdef RT_ARCH_AMD64
3456 if ((int32_t)uImm == (int64_t)uImm)
3457 {
3458 /* mov mem64, imm32 (sign-extended) */
3459 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3460 pCodeBuf[off++] = 0xc7;
3461 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3462 pCodeBuf[off++] = RT_BYTE1(uImm);
3463 pCodeBuf[off++] = RT_BYTE2(uImm);
3464 pCodeBuf[off++] = RT_BYTE3(uImm);
3465 pCodeBuf[off++] = RT_BYTE4(uImm);
3466 }
3467 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3468 {
3469 /* require temporary register. */
3470 if (iGprImmTmp == UINT8_MAX)
3471 iGprImmTmp = iGprTmp;
3472 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3473 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3474 }
3475 else
3476# ifdef IEM_WITH_THROW_CATCH
3477 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3478# else
3479 AssertReleaseFailedStmt(off = UINT32_MAX);
3480# endif
3481
3482#elif defined(RT_ARCH_ARM64)
3483 if (uImm == 0)
3484 iGprImmTmp = ARMV8_A64_REG_XZR;
3485 else
3486 {
3487 Assert(iGprImmTmp < 31);
3488 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3489 }
3490 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3491
3492#else
3493# error "port me"
3494#endif
3495 return off;
3496}
3497
3498
3499/**
3500 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3501 *
3502 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3503 *
3504 * @note ARM64: Misaligned @a offDisp values and values not in the
3505 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3506 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3507 * does not heed this.
3508 */
3509DECL_FORCE_INLINE_THROW(uint32_t)
3510iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3511 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3512{
3513#ifdef RT_ARCH_AMD64
3514 /* mov mem32, imm32 */
3515 if (iGprBase >= 8)
3516 pCodeBuf[off++] = X86_OP_REX_B;
3517 pCodeBuf[off++] = 0xc7;
3518 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3519 pCodeBuf[off++] = RT_BYTE1(uImm);
3520 pCodeBuf[off++] = RT_BYTE2(uImm);
3521 pCodeBuf[off++] = RT_BYTE3(uImm);
3522 pCodeBuf[off++] = RT_BYTE4(uImm);
3523 RT_NOREF(iGprImmTmp, iGprTmp);
3524
3525#elif defined(RT_ARCH_ARM64)
3526 Assert(iGprImmTmp < 31);
3527 if (uImm == 0)
3528 iGprImmTmp = ARMV8_A64_REG_XZR;
3529 else
3530 {
3531 Assert(iGprImmTmp < 31);
3532 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3533 }
3534 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3535 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3536
3537#else
3538# error "port me"
3539#endif
3540 return off;
3541}
3542
3543
3544/**
3545 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3546 *
3547 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3548 *
3549 * @note ARM64: Misaligned @a offDisp values and values not in the
3550 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3551 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3552 * does not heed this.
3553 */
3554DECL_FORCE_INLINE_THROW(uint32_t)
3555iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3556 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3557{
3558#ifdef RT_ARCH_AMD64
3559 /* mov mem16, imm16 */
3560 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3561 if (iGprBase >= 8)
3562 pCodeBuf[off++] = X86_OP_REX_B;
3563 pCodeBuf[off++] = 0xc7;
3564 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3565 pCodeBuf[off++] = RT_BYTE1(uImm);
3566 pCodeBuf[off++] = RT_BYTE2(uImm);
3567 RT_NOREF(iGprImmTmp, iGprTmp);
3568
3569#elif defined(RT_ARCH_ARM64)
3570 if (uImm == 0)
3571 iGprImmTmp = ARMV8_A64_REG_XZR;
3572 else
3573 {
3574 Assert(iGprImmTmp < 31);
3575 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3576 }
3577 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3578 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3579
3580#else
3581# error "port me"
3582#endif
3583 return off;
3584}
3585
3586
3587/**
3588 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3589 *
3590 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3591 *
3592 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3593 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3594 * same. Will assert / throw if caller does not heed this.
3595 */
3596DECL_FORCE_INLINE_THROW(uint32_t)
3597iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3598 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3599{
3600#ifdef RT_ARCH_AMD64
3601 /* mov mem8, imm8 */
3602 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3603 if (iGprBase >= 8)
3604 pCodeBuf[off++] = X86_OP_REX_B;
3605 pCodeBuf[off++] = 0xc6;
3606 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3607 pCodeBuf[off++] = uImm;
3608 RT_NOREF(iGprImmTmp, iGprTmp);
3609
3610#elif defined(RT_ARCH_ARM64)
3611 if (uImm == 0)
3612 iGprImmTmp = ARMV8_A64_REG_XZR;
3613 else
3614 {
3615 Assert(iGprImmTmp < 31);
3616 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3617 }
3618 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3619 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3620
3621#else
3622# error "port me"
3623#endif
3624 return off;
3625}
3626
3627
3628/**
3629 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3630 *
3631 * @note ARM64: Misaligned @a offDisp values and values not in the
3632 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3633 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3634 * does not heed this.
3635 */
3636DECL_FORCE_INLINE_THROW(uint32_t)
3637iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3638 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3639{
3640#ifdef RT_ARCH_AMD64
3641 /* movdqu mem128, reg128 */
3642 pCodeBuf[off++] = 0xf3;
3643 if (iVecRegDst >= 8 || iGprBase >= 8)
3644 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3645 pCodeBuf[off++] = 0x0f;
3646 pCodeBuf[off++] = 0x7f;
3647 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3648 RT_NOREF(iGprTmp);
3649
3650#elif defined(RT_ARCH_ARM64)
3651 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3652 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3653
3654#else
3655# error "port me"
3656#endif
3657 return off;
3658}
3659
3660
3661/**
3662 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3663 */
3664DECL_INLINE_THROW(uint32_t)
3665iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3666{
3667#ifdef RT_ARCH_AMD64
3668 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3669 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3670
3671#elif defined(RT_ARCH_ARM64)
3672 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3673
3674#else
3675# error "port me"
3676#endif
3677 return off;
3678}
3679
3680
3681/**
3682 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3683 *
3684 * @note ARM64: Misaligned @a offDisp values and values not in the
3685 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3686 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3687 * does not heed this.
3688 */
3689DECL_FORCE_INLINE_THROW(uint32_t)
3690iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3691 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3692{
3693#ifdef RT_ARCH_AMD64
3694 /* vmovdqu mem256, reg256 */
3695 pCodeBuf[off++] = X86_OP_VEX3;
3696 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3697 | X86_OP_VEX3_BYTE1_X
3698 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3699 | UINT8_C(0x01);
3700 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3701 pCodeBuf[off++] = 0x7f;
3702 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3703 RT_NOREF(iGprTmp);
3704
3705#elif defined(RT_ARCH_ARM64)
3706 Assert(!(iVecRegDst & 0x1));
3707 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3708 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3709 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3710 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3711#else
3712# error "port me"
3713#endif
3714 return off;
3715}
3716
3717
3718/**
3719 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3720 */
3721DECL_INLINE_THROW(uint32_t)
3722iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3723{
3724#ifdef RT_ARCH_AMD64
3725 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3727
3728#elif defined(RT_ARCH_ARM64)
3729 Assert(!(iVecRegDst & 0x1));
3730 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3731 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3732 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3733 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3734
3735#else
3736# error "port me"
3737#endif
3738 return off;
3739}
3740
3741
3742
3743/*********************************************************************************************************************************
3744* Subtraction and Additions *
3745*********************************************************************************************************************************/
3746
3747/**
3748 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3749 * @note The AMD64 version sets flags.
3750 */
3751DECL_INLINE_THROW(uint32_t)
3752iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3753{
3754#if defined(RT_ARCH_AMD64)
3755 /* sub Gv,Ev */
3756 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3757 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3758 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3759 pbCodeBuf[off++] = 0x2b;
3760 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3761
3762#elif defined(RT_ARCH_ARM64)
3763 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3764 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3765
3766#else
3767# error "Port me"
3768#endif
3769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3770 return off;
3771}
3772
3773
3774/**
3775 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3776 * @note The AMD64 version sets flags.
3777 */
3778DECL_FORCE_INLINE(uint32_t)
3779iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3780{
3781#if defined(RT_ARCH_AMD64)
3782 /* sub Gv,Ev */
3783 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3784 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3785 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3786 pCodeBuf[off++] = 0x2b;
3787 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3788
3789#elif defined(RT_ARCH_ARM64)
3790 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3791
3792#else
3793# error "Port me"
3794#endif
3795 return off;
3796}
3797
3798
3799/**
3800 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3801 * @note The AMD64 version sets flags.
3802 */
3803DECL_INLINE_THROW(uint32_t)
3804iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3805{
3806#if defined(RT_ARCH_AMD64)
3807 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3808#elif defined(RT_ARCH_ARM64)
3809 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3810#else
3811# error "Port me"
3812#endif
3813 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3814 return off;
3815}
3816
3817
3818/**
3819 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3820 *
3821 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3822 *
3823 * @note Larger constants will require a temporary register. Failing to specify
3824 * one when needed will trigger fatal assertion / throw.
3825 */
3826DECL_FORCE_INLINE_THROW(uint32_t)
3827iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3828 uint8_t iGprTmp = UINT8_MAX)
3829{
3830#ifdef RT_ARCH_AMD64
3831 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3832 if (iSubtrahend == 1)
3833 {
3834 /* dec r/m64 */
3835 pCodeBuf[off++] = 0xff;
3836 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3837 }
3838 else if (iSubtrahend == -1)
3839 {
3840 /* inc r/m64 */
3841 pCodeBuf[off++] = 0xff;
3842 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3843 }
3844 else if ((int8_t)iSubtrahend == iSubtrahend)
3845 {
3846 /* sub r/m64, imm8 */
3847 pCodeBuf[off++] = 0x83;
3848 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3849 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3850 }
3851 else if ((int32_t)iSubtrahend == iSubtrahend)
3852 {
3853 /* sub r/m64, imm32 */
3854 pCodeBuf[off++] = 0x81;
3855 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3856 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3857 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3858 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3859 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3860 }
3861 else if (iGprTmp != UINT8_MAX)
3862 {
3863 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3864 /* sub r/m64, r64 */
3865 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3866 pCodeBuf[off++] = 0x29;
3867 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3868 }
3869 else
3870# ifdef IEM_WITH_THROW_CATCH
3871 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3872# else
3873 AssertReleaseFailedStmt(off = UINT32_MAX);
3874# endif
3875
3876#elif defined(RT_ARCH_ARM64)
3877 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3878 if (uAbsSubtrahend < 4096)
3879 {
3880 if (iSubtrahend >= 0)
3881 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3882 else
3883 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3884 }
3885 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3886 {
3887 if (iSubtrahend >= 0)
3888 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3889 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3890 else
3891 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3892 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3893 }
3894 else if (iGprTmp != UINT8_MAX)
3895 {
3896 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3897 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3898 }
3899 else
3900# ifdef IEM_WITH_THROW_CATCH
3901 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3902# else
3903 AssertReleaseFailedStmt(off = UINT32_MAX);
3904# endif
3905
3906#else
3907# error "Port me"
3908#endif
3909 return off;
3910}
3911
3912
3913/**
3914 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3915 *
3916 * @note Larger constants will require a temporary register. Failing to specify
3917 * one when needed will trigger fatal assertion / throw.
3918 */
3919DECL_INLINE_THROW(uint32_t)
3920iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3921 uint8_t iGprTmp = UINT8_MAX)
3922
3923{
3924#ifdef RT_ARCH_AMD64
3925 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3926#elif defined(RT_ARCH_ARM64)
3927 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3928#else
3929# error "Port me"
3930#endif
3931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3932 return off;
3933}
3934
3935
3936/**
3937 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3938 *
3939 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3940 *
3941 * @note ARM64: Larger constants will require a temporary register. Failing to
3942 * specify one when needed will trigger fatal assertion / throw.
3943 */
3944DECL_FORCE_INLINE_THROW(uint32_t)
3945iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3946 uint8_t iGprTmp = UINT8_MAX)
3947{
3948#ifdef RT_ARCH_AMD64
3949 if (iGprDst >= 8)
3950 pCodeBuf[off++] = X86_OP_REX_B;
3951 if (iSubtrahend == 1)
3952 {
3953 /* dec r/m32 */
3954 pCodeBuf[off++] = 0xff;
3955 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3956 }
3957 else if (iSubtrahend == -1)
3958 {
3959 /* inc r/m32 */
3960 pCodeBuf[off++] = 0xff;
3961 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3962 }
3963 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3964 {
3965 /* sub r/m32, imm8 */
3966 pCodeBuf[off++] = 0x83;
3967 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3968 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3969 }
3970 else
3971 {
3972 /* sub r/m32, imm32 */
3973 pCodeBuf[off++] = 0x81;
3974 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3975 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3976 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3977 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3978 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3979 }
3980 RT_NOREF(iGprTmp);
3981
3982#elif defined(RT_ARCH_ARM64)
3983 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3984 if (uAbsSubtrahend < 4096)
3985 {
3986 if (iSubtrahend >= 0)
3987 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3988 else
3989 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3990 }
3991 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3992 {
3993 if (iSubtrahend >= 0)
3994 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3995 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3996 else
3997 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3998 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3999 }
4000 else if (iGprTmp != UINT8_MAX)
4001 {
4002 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4003 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4004 }
4005 else
4006# ifdef IEM_WITH_THROW_CATCH
4007 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4008# else
4009 AssertReleaseFailedStmt(off = UINT32_MAX);
4010# endif
4011
4012#else
4013# error "Port me"
4014#endif
4015 return off;
4016}
4017
4018
4019/**
4020 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
4021 *
4022 * @note ARM64: Larger constants will require a temporary register. Failing to
4023 * specify one when needed will trigger fatal assertion / throw.
4024 */
4025DECL_INLINE_THROW(uint32_t)
4026iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
4027 uint8_t iGprTmp = UINT8_MAX)
4028
4029{
4030#ifdef RT_ARCH_AMD64
4031 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
4032#elif defined(RT_ARCH_ARM64)
4033 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
4034#else
4035# error "Port me"
4036#endif
4037 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4038 return off;
4039}
4040
4041
4042/**
4043 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
4044 *
4045 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
4046 * so not suitable as a base for conditional jumps.
4047 *
4048 * @note AMD64: Will only update the lower 16 bits of the register.
4049 * @note ARM64: Will update the entire register.
4050 * @note ARM64: Larger constants will require a temporary register. Failing to
4051 * specify one when needed will trigger fatal assertion / throw.
4052 */
4053DECL_FORCE_INLINE_THROW(uint32_t)
4054iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
4055 uint8_t iGprTmp = UINT8_MAX)
4056{
4057#ifdef RT_ARCH_AMD64
4058 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4059 if (iGprDst >= 8)
4060 pCodeBuf[off++] = X86_OP_REX_B;
4061 if (iSubtrahend == 1)
4062 {
4063 /* dec r/m16 */
4064 pCodeBuf[off++] = 0xff;
4065 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4066 }
4067 else if (iSubtrahend == -1)
4068 {
4069 /* inc r/m16 */
4070 pCodeBuf[off++] = 0xff;
4071 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4072 }
4073 else if ((int8_t)iSubtrahend == iSubtrahend)
4074 {
4075 /* sub r/m16, imm8 */
4076 pCodeBuf[off++] = 0x83;
4077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4078 pCodeBuf[off++] = (uint8_t)iSubtrahend;
4079 }
4080 else
4081 {
4082 /* sub r/m16, imm16 */
4083 pCodeBuf[off++] = 0x81;
4084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4085 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
4086 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
4087 }
4088 RT_NOREF(iGprTmp);
4089
4090#elif defined(RT_ARCH_ARM64)
4091 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
4092 if (uAbsSubtrahend < 4096)
4093 {
4094 if (iSubtrahend >= 0)
4095 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4096 else
4097 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4098 }
4099 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
4100 {
4101 if (iSubtrahend >= 0)
4102 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4103 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4104 else
4105 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4106 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4107 }
4108 else if (iGprTmp != UINT8_MAX)
4109 {
4110 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4111 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4112 }
4113 else
4114# ifdef IEM_WITH_THROW_CATCH
4115 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4116# else
4117 AssertReleaseFailedStmt(off = UINT32_MAX);
4118# endif
4119 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4120
4121#else
4122# error "Port me"
4123#endif
4124 return off;
4125}
4126
4127
4128/**
4129 * Emits adding a 64-bit GPR to another, storing the result in the first.
4130 * @note The AMD64 version sets flags.
4131 */
4132DECL_FORCE_INLINE(uint32_t)
4133iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4134{
4135#if defined(RT_ARCH_AMD64)
4136 /* add Gv,Ev */
4137 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4138 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
4139 pCodeBuf[off++] = 0x03;
4140 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4141
4142#elif defined(RT_ARCH_ARM64)
4143 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
4144
4145#else
4146# error "Port me"
4147#endif
4148 return off;
4149}
4150
4151
4152/**
4153 * Emits adding a 64-bit GPR to another, storing the result in the first.
4154 * @note The AMD64 version sets flags.
4155 */
4156DECL_INLINE_THROW(uint32_t)
4157iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4158{
4159#if defined(RT_ARCH_AMD64)
4160 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4161#elif defined(RT_ARCH_ARM64)
4162 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4163#else
4164# error "Port me"
4165#endif
4166 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4167 return off;
4168}
4169
4170
4171/**
4172 * Emits adding a 64-bit GPR to another, storing the result in the first.
4173 * @note The AMD64 version sets flags.
4174 */
4175DECL_FORCE_INLINE(uint32_t)
4176iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4177{
4178#if defined(RT_ARCH_AMD64)
4179 /* add Gv,Ev */
4180 if (iGprDst >= 8 || iGprAddend >= 8)
4181 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
4182 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
4183 pCodeBuf[off++] = 0x03;
4184 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4185
4186#elif defined(RT_ARCH_ARM64)
4187 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
4188
4189#else
4190# error "Port me"
4191#endif
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits adding a 64-bit GPR to another, storing the result in the first.
4198 * @note The AMD64 version sets flags.
4199 */
4200DECL_INLINE_THROW(uint32_t)
4201iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4202{
4203#if defined(RT_ARCH_AMD64)
4204 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4205#elif defined(RT_ARCH_ARM64)
4206 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4207#else
4208# error "Port me"
4209#endif
4210 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4211 return off;
4212}
4213
4214
4215/**
4216 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4217 */
4218DECL_INLINE_THROW(uint32_t)
4219iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4220{
4221#if defined(RT_ARCH_AMD64)
4222 /* add or inc */
4223 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4224 if (iImm8 != 1)
4225 {
4226 pCodeBuf[off++] = 0x83;
4227 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4228 pCodeBuf[off++] = (uint8_t)iImm8;
4229 }
4230 else
4231 {
4232 pCodeBuf[off++] = 0xff;
4233 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4234 }
4235
4236#elif defined(RT_ARCH_ARM64)
4237 if (iImm8 >= 0)
4238 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4239 else
4240 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4241
4242#else
4243# error "Port me"
4244#endif
4245 return off;
4246}
4247
4248
4249/**
4250 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4251 */
4252DECL_INLINE_THROW(uint32_t)
4253iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4254{
4255#if defined(RT_ARCH_AMD64)
4256 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4257#elif defined(RT_ARCH_ARM64)
4258 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4259#else
4260# error "Port me"
4261#endif
4262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4263 return off;
4264}
4265
4266
4267/**
4268 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4269 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4270 */
4271DECL_FORCE_INLINE(uint32_t)
4272iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4273{
4274#if defined(RT_ARCH_AMD64)
4275 /* add or inc */
4276 if (iGprDst >= 8)
4277 pCodeBuf[off++] = X86_OP_REX_B;
4278 if (iImm8 != 1)
4279 {
4280 pCodeBuf[off++] = 0x83;
4281 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4282 pCodeBuf[off++] = (uint8_t)iImm8;
4283 }
4284 else
4285 {
4286 pCodeBuf[off++] = 0xff;
4287 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4288 }
4289
4290#elif defined(RT_ARCH_ARM64)
4291 if (iImm8 >= 0)
4292 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4293 else
4294 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4295
4296#else
4297# error "Port me"
4298#endif
4299 return off;
4300}
4301
4302
4303/**
4304 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4305 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4306 */
4307DECL_INLINE_THROW(uint32_t)
4308iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4309{
4310#if defined(RT_ARCH_AMD64)
4311 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4312#elif defined(RT_ARCH_ARM64)
4313 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4314#else
4315# error "Port me"
4316#endif
4317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4318 return off;
4319}
4320
4321
4322/**
4323 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4324 *
4325 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4326 */
4327DECL_FORCE_INLINE_THROW(uint32_t)
4328iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4329{
4330#if defined(RT_ARCH_AMD64)
4331 if ((int8_t)iAddend == iAddend)
4332 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4333
4334 if ((int32_t)iAddend == iAddend)
4335 {
4336 /* add grp, imm32 */
4337 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4338 pCodeBuf[off++] = 0x81;
4339 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4340 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4341 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4342 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4343 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4344 }
4345 else if (iGprTmp != UINT8_MAX)
4346 {
4347 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4348
4349 /* add dst, tmpreg */
4350 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4351 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4352 pCodeBuf[off++] = 0x03;
4353 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4354 }
4355 else
4356# ifdef IEM_WITH_THROW_CATCH
4357 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4358# else
4359 AssertReleaseFailedStmt(off = UINT32_MAX);
4360# endif
4361
4362#elif defined(RT_ARCH_ARM64)
4363 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4364 if (uAbsAddend <= 0xffffffU)
4365 {
4366 bool const fSub = iAddend < 0;
4367 if (uAbsAddend > 0xfffU)
4368 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4369 false /*fSetFlags*/, true /*fShift12*/);
4370 if (uAbsAddend & 0xfffU)
4371 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4372 }
4373 else if (iGprTmp != UINT8_MAX)
4374 {
4375 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4376 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4377 }
4378 else
4379# ifdef IEM_WITH_THROW_CATCH
4380 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4381# else
4382 AssertReleaseFailedStmt(off = UINT32_MAX);
4383# endif
4384
4385#else
4386# error "Port me"
4387#endif
4388 return off;
4389}
4390
4391
4392/**
4393 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4394 */
4395DECL_INLINE_THROW(uint32_t)
4396iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4397{
4398#if defined(RT_ARCH_AMD64)
4399 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4400 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4401
4402 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4403 {
4404 /* add grp, imm32 */
4405 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4406 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4407 pbCodeBuf[off++] = 0x81;
4408 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4409 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4410 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4411 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4412 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4413 }
4414 else
4415 {
4416 /* Best to use a temporary register to deal with this in the simplest way: */
4417 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4418
4419 /* add dst, tmpreg */
4420 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4421 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4422 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4423 pbCodeBuf[off++] = 0x03;
4424 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4425
4426 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4427 }
4428
4429#elif defined(RT_ARCH_ARM64)
4430 bool const fSub = iAddend < 0;
4431 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4432 if (uAbsAddend <= 0xffffffU)
4433 {
4434 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4435 if (uAbsAddend > 0xfffU)
4436 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4437 false /*fSetFlags*/, true /*fShift12*/);
4438 if (uAbsAddend & 0xfffU)
4439 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4440 }
4441 else
4442 {
4443 /* Use temporary register for the immediate. */
4444 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4445
4446 /* add gprdst, gprdst, tmpreg */
4447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4448 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4449
4450 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4451 }
4452
4453#else
4454# error "Port me"
4455#endif
4456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4457 return off;
4458}
4459
4460
4461/**
4462 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4463 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4464 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4465 * The negative ranges are also allowed, making it behave like a
4466 * subtraction. If the constant does not conform, bad stuff will happen.
4467 */
4468DECL_FORCE_INLINE_THROW(uint32_t)
4469iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4470{
4471#if defined(RT_ARCH_AMD64)
4472 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4473 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4474
4475 /* add grp, imm32 */
4476 if (iGprDst >= 8)
4477 pCodeBuf[off++] = X86_OP_REX_B;
4478 pCodeBuf[off++] = 0x81;
4479 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4480 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4481 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4482 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4483 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4484 RT_NOREF(iGprTmp);
4485
4486#elif defined(RT_ARCH_ARM64)
4487 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4488 if (uAbsAddend <= 0xffffffU)
4489 {
4490 bool const fSub = iAddend < 0;
4491 if (uAbsAddend > 0xfffU)
4492 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4493 false /*fSetFlags*/, true /*fShift12*/);
4494 if (uAbsAddend & 0xfffU)
4495 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4496 }
4497 else if (iGprTmp != UINT8_MAX)
4498 {
4499 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, iAddend);
4500 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4501 }
4502 else
4503# ifdef IEM_WITH_THROW_CATCH
4504 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4505# else
4506 AssertReleaseFailedStmt(off = UINT32_MAX);
4507# endif
4508
4509#else
4510# error "Port me"
4511#endif
4512 return off;
4513}
4514
4515
4516/**
4517 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4518 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4519 */
4520DECL_INLINE_THROW(uint32_t)
4521iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4522{
4523#if defined(RT_ARCH_AMD64)
4524 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4525
4526#elif defined(RT_ARCH_ARM64)
4527 bool const fSub = iAddend < 0;
4528 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4529 if (uAbsAddend <= 0xffffffU)
4530 {
4531 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4532 if (uAbsAddend > 0xfffU)
4533 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4534 false /*fSetFlags*/, true /*fShift12*/);
4535 if (uAbsAddend & 0xfffU)
4536 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4537 }
4538 else
4539 {
4540 /* Use temporary register for the immediate. */
4541 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4542
4543 /* add gprdst, gprdst, tmpreg */
4544 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4545 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4546
4547 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4548 }
4549
4550#else
4551# error "Port me"
4552#endif
4553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4554 return off;
4555}
4556
4557
4558/**
4559 * Emits a 16-bit GPR add with a signed immediate addend.
4560 *
4561 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4562 * so not suitable as a base for conditional jumps.
4563 *
4564 * @note AMD64: Will only update the lower 16 bits of the register.
4565 * @note ARM64: Will update the entire register.
4566 * @sa iemNativeEmitSubGpr16ImmEx
4567 */
4568DECL_FORCE_INLINE(uint32_t)
4569iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4570{
4571#ifdef RT_ARCH_AMD64
4572 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4573 if (iGprDst >= 8)
4574 pCodeBuf[off++] = X86_OP_REX_B;
4575 if (iAddend == 1)
4576 {
4577 /* inc r/m16 */
4578 pCodeBuf[off++] = 0xff;
4579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4580 }
4581 else if (iAddend == -1)
4582 {
4583 /* dec r/m16 */
4584 pCodeBuf[off++] = 0xff;
4585 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4586 }
4587 else if ((int8_t)iAddend == iAddend)
4588 {
4589 /* add r/m16, imm8 */
4590 pCodeBuf[off++] = 0x83;
4591 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4592 pCodeBuf[off++] = (uint8_t)iAddend;
4593 }
4594 else
4595 {
4596 /* add r/m16, imm16 */
4597 pCodeBuf[off++] = 0x81;
4598 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4599 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4600 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4601 }
4602
4603#elif defined(RT_ARCH_ARM64)
4604 bool const fSub = iAddend < 0;
4605 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4606 if (uAbsAddend > 0xfffU)
4607 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4608 false /*fSetFlags*/, true /*fShift12*/);
4609 if (uAbsAddend & 0xfffU)
4610 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4611 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4612
4613#else
4614# error "Port me"
4615#endif
4616 return off;
4617}
4618
4619
4620
4621/**
4622 * Adds two 64-bit GPRs together, storing the result in a third register.
4623 */
4624DECL_FORCE_INLINE(uint32_t)
4625iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4626{
4627#ifdef RT_ARCH_AMD64
4628 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4629 {
4630 /** @todo consider LEA */
4631 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4632 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4633 }
4634 else
4635 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4636
4637#elif defined(RT_ARCH_ARM64)
4638 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4639
4640#else
4641# error "Port me!"
4642#endif
4643 return off;
4644}
4645
4646
4647
4648/**
4649 * Adds two 32-bit GPRs together, storing the result in a third register.
4650 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4651 */
4652DECL_FORCE_INLINE(uint32_t)
4653iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4654{
4655#ifdef RT_ARCH_AMD64
4656 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4657 {
4658 /** @todo consider LEA */
4659 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4660 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4661 }
4662 else
4663 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4664
4665#elif defined(RT_ARCH_ARM64)
4666 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4667
4668#else
4669# error "Port me!"
4670#endif
4671 return off;
4672}
4673
4674
4675/**
4676 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4677 * third register.
4678 *
4679 * @note The ARM64 version does not work for non-trivial constants if the
4680 * two registers are the same. Will assert / throw exception.
4681 */
4682DECL_FORCE_INLINE_THROW(uint32_t)
4683iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4684{
4685#ifdef RT_ARCH_AMD64
4686 /** @todo consider LEA */
4687 if ((int8_t)iImmAddend == iImmAddend)
4688 {
4689 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4690 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4691 }
4692 else
4693 {
4694 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4695 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4696 }
4697
4698#elif defined(RT_ARCH_ARM64)
4699 bool const fSub = iImmAddend < 0;
4700 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4701 if (uAbsImmAddend <= 0xfffU)
4702 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4703 else if (uAbsImmAddend <= 0xffffffU)
4704 {
4705 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4706 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4707 if (uAbsImmAddend & 0xfffU)
4708 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4709 }
4710 else if (iGprDst != iGprAddend)
4711 {
4712 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4713 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4714 }
4715 else
4716# ifdef IEM_WITH_THROW_CATCH
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4718# else
4719 AssertReleaseFailedStmt(off = UINT32_MAX);
4720# endif
4721
4722#else
4723# error "Port me!"
4724#endif
4725 return off;
4726}
4727
4728
4729/**
4730 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4731 * third register.
4732 *
4733 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4734 *
4735 * @note The ARM64 version does not work for non-trivial constants if the
4736 * two registers are the same. Will assert / throw exception.
4737 */
4738DECL_FORCE_INLINE_THROW(uint32_t)
4739iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4740{
4741#ifdef RT_ARCH_AMD64
4742 /** @todo consider LEA */
4743 if ((int8_t)iImmAddend == iImmAddend)
4744 {
4745 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4746 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4747 }
4748 else
4749 {
4750 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4751 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4752 }
4753
4754#elif defined(RT_ARCH_ARM64)
4755 bool const fSub = iImmAddend < 0;
4756 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4757 if (uAbsImmAddend <= 0xfffU)
4758 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4759 else if (uAbsImmAddend <= 0xffffffU)
4760 {
4761 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4762 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4763 if (uAbsImmAddend & 0xfffU)
4764 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4765 }
4766 else if (iGprDst != iGprAddend)
4767 {
4768 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4769 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4770 }
4771 else
4772# ifdef IEM_WITH_THROW_CATCH
4773 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4774# else
4775 AssertReleaseFailedStmt(off = UINT32_MAX);
4776# endif
4777
4778#else
4779# error "Port me!"
4780#endif
4781 return off;
4782}
4783
4784
4785/*********************************************************************************************************************************
4786* Unary Operations *
4787*********************************************************************************************************************************/
4788
4789/**
4790 * Emits code for two complement negation of a 64-bit GPR.
4791 */
4792DECL_FORCE_INLINE_THROW(uint32_t)
4793iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4794{
4795#if defined(RT_ARCH_AMD64)
4796 /* neg Ev */
4797 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4798 pCodeBuf[off++] = 0xf7;
4799 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4800
4801#elif defined(RT_ARCH_ARM64)
4802 /* sub dst, xzr, dst */
4803 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4804
4805#else
4806# error "Port me"
4807#endif
4808 return off;
4809}
4810
4811
4812/**
4813 * Emits code for two complement negation of a 64-bit GPR.
4814 */
4815DECL_INLINE_THROW(uint32_t)
4816iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4817{
4818#if defined(RT_ARCH_AMD64)
4819 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4820#elif defined(RT_ARCH_ARM64)
4821 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4822#else
4823# error "Port me"
4824#endif
4825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4826 return off;
4827}
4828
4829
4830/**
4831 * Emits code for two complement negation of a 32-bit GPR.
4832 * @note bit 32 thru 63 are set to zero.
4833 */
4834DECL_FORCE_INLINE_THROW(uint32_t)
4835iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4836{
4837#if defined(RT_ARCH_AMD64)
4838 /* neg Ev */
4839 if (iGprDst >= 8)
4840 pCodeBuf[off++] = X86_OP_REX_B;
4841 pCodeBuf[off++] = 0xf7;
4842 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4843
4844#elif defined(RT_ARCH_ARM64)
4845 /* sub dst, xzr, dst */
4846 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4847
4848#else
4849# error "Port me"
4850#endif
4851 return off;
4852}
4853
4854
4855/**
4856 * Emits code for two complement negation of a 32-bit GPR.
4857 * @note bit 32 thru 63 are set to zero.
4858 */
4859DECL_INLINE_THROW(uint32_t)
4860iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4861{
4862#if defined(RT_ARCH_AMD64)
4863 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4864#elif defined(RT_ARCH_ARM64)
4865 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4866#else
4867# error "Port me"
4868#endif
4869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4870 return off;
4871}
4872
4873
4874
4875/*********************************************************************************************************************************
4876* Bit Operations *
4877*********************************************************************************************************************************/
4878
4879/**
4880 * Emits code for clearing bits 16 thru 63 in the GPR.
4881 */
4882DECL_INLINE_THROW(uint32_t)
4883iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4884{
4885#if defined(RT_ARCH_AMD64)
4886 /* movzx Gv,Ew */
4887 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4888 if (iGprDst >= 8)
4889 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4890 pbCodeBuf[off++] = 0x0f;
4891 pbCodeBuf[off++] = 0xb7;
4892 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4893
4894#elif defined(RT_ARCH_ARM64)
4895 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4896# if 1
4897 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4898# else
4899 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4900 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4901# endif
4902#else
4903# error "Port me"
4904#endif
4905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4906 return off;
4907}
4908
4909
4910/**
4911 * Emits code for AND'ing two 64-bit GPRs.
4912 *
4913 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4914 * and ARM64 hosts.
4915 */
4916DECL_FORCE_INLINE(uint32_t)
4917iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4918{
4919#if defined(RT_ARCH_AMD64)
4920 /* and Gv, Ev */
4921 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4922 pCodeBuf[off++] = 0x23;
4923 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);