- Timestamp:
- Dec 16, 2023 12:00:51 AM (10 months ago)
- Location:
- trunk
- Files:
-
- 4 edited
-
include/iprt/armv8.h (modified) (4 diffs)
-
src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp (modified) (5 diffs)
-
src/VBox/VMM/VMMAll/IEMAllThrdPython.py (modified) (1 diff)
-
src/VBox/VMM/include/IEMN8veRecompilerEmit.h (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/armv8.h
r102510 r102623 2283 2283 typedef enum /* Size VR Opc */ 2284 2284 { /* \ | / */ 2285 kArmv8A64InstrLdStType_Mask_Size = 0x300,2286 kArmv8A64InstrLdStType_Mask_VR = 0x010,2287 kArmv8A64InstrLdStType_Mask_Opc = 0x003,2288 kArmv8A64InstrLdStType_Shift_Size = 8,2289 kArmv8A64InstrLdStType_Shift_VR = 4,2290 kArmv8A64InstrLdStType_Shift_Opc = 0,2291 2292 kArmv8A64InstrLdStType_St_Byte = 0x000,2293 kArmv8A64InstrLdStType_Ld_Byte = 0x001,2294 kArmv8A64InstrLdStType_Ld_SignByte64 = 0x002,2295 kArmv8A64InstrLdStType_Ld_SignByte32 = 0x003,2296 2297 kArmv8A64InstrLdStType_St_Half = 0x100, /**< Half = 16-bit */2298 kArmv8A64InstrLdStType_Ld_Half = 0x101, /**< Half = 16-bit */2299 kArmv8A64InstrLdStType_Ld_SignHalf64 = 0x102, /**< Half = 16-bit */2300 kArmv8A64InstrLdStType_Ld_SignHalf32 = 0x103, /**< Half = 16-bit */2301 2302 kArmv8A64InstrLdStType_St_Word = 0x200, /**< Word = 32-bit */2303 kArmv8A64InstrLdStType_Ld_Word = 0x201, /**< Word = 32-bit */2304 kArmv8A64InstrLdStType_Ld_SignWord64 = 0x202, /**< Word = 32-bit */2305 2306 kArmv8A64InstrLdStType_St_Dword = 0x300, /**< Dword = 64-bit */2307 kArmv8A64InstrLdStType_Ld_Dword = 0x301, /**< Dword = 64-bit */2308 2309 kArmv8A64InstrLdStType_Prefetch = 0x302, /**< Not valid in all variations, check docs. */2310 2311 kArmv8A64InstrLdStType_St_Vr_Byte = 0x010,2312 kArmv8A64InstrLdStType_Ld_Vr_Byte = 0x011,2313 kArmv8A64InstrLdStType_St_Vr_128 = 0x012,2314 kArmv8A64InstrLdStType_Ld_Vr_128 = 0x013,2315 2316 kArmv8A64InstrLdStType_St_Vr_Half = 0x110, /**< Half = 16-bit */2317 kArmv8A64InstrLdStType_Ld_Vr_Half = 0x111, /**< Half = 16-bit */2318 2319 kArmv8A64InstrLdStType_St_Vr_Word = 0x210, /**< Word = 32-bit */2320 kArmv8A64InstrLdStType_Ld_Vr_Word = 0x211, /**< Word = 32-bit */2321 2322 kArmv8A64InstrLdStType_St_Vr_Dword = 0x310, /**< Dword = 64-bit */2323 kArmv8A64InstrLdStType_Ld_Vr_Dword = 0x311 /**< Dword = 64-bit */2285 kArmv8A64InstrLdStType_Mask_Size = 0x300, 2286 kArmv8A64InstrLdStType_Mask_VR = 0x010, 2287 kArmv8A64InstrLdStType_Mask_Opc = 0x003, 2288 kArmv8A64InstrLdStType_Shift_Size = 8, 2289 kArmv8A64InstrLdStType_Shift_VR = 4, 2290 kArmv8A64InstrLdStType_Shift_Opc = 0, 2291 2292 kArmv8A64InstrLdStType_St_Byte = 0x000, 2293 kArmv8A64InstrLdStType_Ld_Byte = 0x001, 2294 kArmv8A64InstrLdStType_Ld_SignByte64 = 0x002, 2295 kArmv8A64InstrLdStType_Ld_SignByte32 = 0x003, 2296 2297 kArmv8A64InstrLdStType_St_Half = 0x100, /**< Half = 16-bit */ 2298 kArmv8A64InstrLdStType_Ld_Half = 0x101, /**< Half = 16-bit */ 2299 kArmv8A64InstrLdStType_Ld_SignHalf64 = 0x102, /**< Half = 16-bit */ 2300 kArmv8A64InstrLdStType_Ld_SignHalf32 = 0x103, /**< Half = 16-bit */ 2301 2302 kArmv8A64InstrLdStType_St_Word = 0x200, /**< Word = 32-bit */ 2303 kArmv8A64InstrLdStType_Ld_Word = 0x201, /**< Word = 32-bit */ 2304 kArmv8A64InstrLdStType_Ld_SignWord64 = 0x202, /**< Word = 32-bit */ 2305 2306 kArmv8A64InstrLdStType_St_Dword = 0x300, /**< Dword = 64-bit */ 2307 kArmv8A64InstrLdStType_Ld_Dword = 0x301, /**< Dword = 64-bit */ 2308 2309 kArmv8A64InstrLdStType_Prefetch = 0x302, /**< Not valid in all variations, check docs. */ 2310 2311 kArmv8A64InstrLdStType_St_Vr_Byte = 0x010, 2312 kArmv8A64InstrLdStType_Ld_Vr_Byte = 0x011, 2313 kArmv8A64InstrLdStType_St_Vr_128 = 0x012, 2314 kArmv8A64InstrLdStType_Ld_Vr_128 = 0x013, 2315 2316 kArmv8A64InstrLdStType_St_Vr_Half = 0x110, /**< Half = 16-bit */ 2317 kArmv8A64InstrLdStType_Ld_Vr_Half = 0x111, /**< Half = 16-bit */ 2318 2319 kArmv8A64InstrLdStType_St_Vr_Word = 0x210, /**< Word = 32-bit */ 2320 kArmv8A64InstrLdStType_Ld_Vr_Word = 0x211, /**< Word = 32-bit */ 2321 2322 kArmv8A64InstrLdStType_St_Vr_Dword = 0x310, /**< Dword = 64-bit */ 2323 kArmv8A64InstrLdStType_Ld_Vr_Dword = 0x311 /**< Dword = 64-bit */ 2324 2324 2325 2325 } ARMV8A64INSTRLDSTTYPE; … … 3166 3166 | iRegResult; 3167 3167 } 3168 3169 3170 /** Alias for sub zxr, reg, #uimm12. */ 3171 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCmpUImm12(uint32_t iRegSrc, uint32_t uImm12Comprahend, 3172 bool f64Bit = true, bool fShift12 = false) 3173 { 3174 return Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iRegSrc, uImm12Comprahend, 3175 f64Bit, true /*fSetFlags*/, fShift12); 3176 } 3177 3178 3179 /** ADD dst, src, #uimm12 */ 3180 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrAddUImm12(uint32_t iRegResult, uint32_t iRegSrc, uint32_t uImm12Addend, 3181 bool f64Bit = true, bool fSetFlags = false, bool fShift12 = false) 3182 { 3183 return Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iRegResult, iRegSrc, uImm12Addend, f64Bit, fSetFlags, fShift12); 3184 } 3185 3186 3187 /** SUB dst, src, #uimm12 */ 3188 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrSubUImm12(uint32_t iRegResult, uint32_t iRegSrc, uint32_t uImm12Subtrahend, 3189 bool f64Bit = true, bool fSetFlags = false, bool fShift12 = false) 3190 { 3191 return Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iRegResult, iRegSrc, uImm12Subtrahend, f64Bit, fSetFlags, fShift12); 3192 } 3193 3168 3194 3169 3195 /** … … 3206 3232 | (iRegSrc1 << 5) 3207 3233 | iRegResult; 3234 } 3235 3236 3237 /** Alias for sub zxr, reg1, reg2 [, LSL/LSR/ASR/ROR #xx]. */ 3238 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCmpReg(uint32_t iRegSrc1, uint32_t iRegSrc2, bool f64Bit = true, uint32_t cShift = 0, 3239 ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsl) 3240 { 3241 return Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, iRegSrc1, iRegSrc2, 3242 f64Bit, true /*fSetFlags*/, cShift, enmShift); 3208 3243 } 3209 3244 … … 3425 3460 } 3426 3461 3462 /** @name RMA64_NZCV_F_XXX - readable NZCV mask for CCMP and friends. 3463 * @{ */ 3464 #define ARMA64_NZCV_F_N0_Z0_C0_V0 UINT32_C(0x0) 3465 #define ARMA64_NZCV_F_N0_Z0_C0_V1 UINT32_C(0x1) 3466 #define ARMA64_NZCV_F_N0_Z0_C1_V0 UINT32_C(0x2) 3467 #define ARMA64_NZCV_F_N0_Z0_C1_V1 UINT32_C(0x3) 3468 #define ARMA64_NZCV_F_N0_Z1_C0_V0 UINT32_C(0x4) 3469 #define ARMA64_NZCV_F_N0_Z1_C0_V1 UINT32_C(0x5) 3470 #define ARMA64_NZCV_F_N0_Z1_C1_V0 UINT32_C(0x6) 3471 #define ARMA64_NZCV_F_N0_Z1_C1_V1 UINT32_C(0x7) 3472 3473 #define ARMA64_NZCV_F_N1_Z0_C0_V0 UINT32_C(0x8) 3474 #define ARMA64_NZCV_F_N1_Z0_C0_V1 UINT32_C(0x9) 3475 #define ARMA64_NZCV_F_N1_Z0_C1_V0 UINT32_C(0xa) 3476 #define ARMA64_NZCV_F_N1_Z0_C1_V1 UINT32_C(0xb) 3477 #define ARMA64_NZCV_F_N1_Z1_C0_V0 UINT32_C(0xc) 3478 #define ARMA64_NZCV_F_N1_Z1_C0_V1 UINT32_C(0xd) 3479 #define ARMA64_NZCV_F_N1_Z1_C1_V0 UINT32_C(0xe) 3480 #define ARMA64_NZCV_F_N1_Z1_C1_V1 UINT32_C(0xf) 3481 /** @} */ 3482 3483 /** 3484 * A64: Encodes CCMP or CCMN with two register operands. 3485 * 3486 * @returns The encoded instruction. 3487 * @param iRegSrc1 The 1st register. SP is NOT valid, but ZR is. 3488 * @param iRegSrc2 The 2nd register. SP is NOT valid, but ZR is. 3489 * @param fNzcv The N, Z, C & V flags values to load if the condition 3490 * does not match. See RMA64_NZCV_F_XXX. 3491 * @param enmCond The condition guarding the compare. 3492 * @param fCCmp Set for CCMP (default), clear for CCMN. 3493 * @param f64Bit true for 64-bit GRPs (default), false for 32-bit GPRs. 3494 */ 3495 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCCmpCmnReg(uint32_t iRegSrc1, uint32_t iRegSrc2, uint32_t fNzcv, 3496 ARMV8INSTRCOND enmCond, bool fCCmp = true, bool f64Bit = true) 3497 { 3498 Assert(iRegSrc1 < 32); Assert(iRegSrc2 < 32); Assert(fNzcv < 16); 3499 3500 return ((uint32_t)f64Bit << 31) 3501 | ((uint32_t)fCCmp << 30) 3502 | UINT32_C(0x3a400000) 3503 | (iRegSrc2 << 16) 3504 | ((uint32_t)enmCond << 12) 3505 | (iRegSrc1 << 5) 3506 | fNzcv; 3507 } 3508 3509 /** CCMP w/ reg. */ 3510 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCCmpReg(uint32_t iRegSrc1, uint32_t iRegSrc2, uint32_t fNzcv, 3511 ARMV8INSTRCOND enmCond, bool f64Bit = true) 3512 { 3513 return Armv8A64MkInstrCCmpCmnReg(iRegSrc1, iRegSrc2, fNzcv, enmCond, true /*fCCmp*/, f64Bit); 3514 } 3515 3516 3517 /** CCMN w/ reg. */ 3518 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCCmnReg(uint32_t iRegSrc1, uint32_t iRegSrc2, uint32_t fNzcv, 3519 ARMV8INSTRCOND enmCond, bool f64Bit = true) 3520 { 3521 return Armv8A64MkInstrCCmpCmnReg(iRegSrc1, iRegSrc2, fNzcv, enmCond, false /*fCCmp*/, f64Bit); 3522 } 3523 3524 3525 /** 3526 * A64: Encodes CCMP or CCMN with register and 5-bit immediate. 3527 * 3528 * @returns The encoded instruction. 3529 * @param iRegSrc The register. SP is NOT valid, but ZR is. 3530 * @param uImm5 The immediate, to compare iRegSrc with. 3531 * @param fNzcv The N, Z, C & V flags values to load if the condition 3532 * does not match. See RMA64_NZCV_F_XXX. 3533 * @param enmCond The condition guarding the compare. 3534 * @param fCCmp Set for CCMP (default), clear for CCMN. 3535 * @param f64Bit true for 64-bit GRPs (default), false for 32-bit GPRs. 3536 */ 3537 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCCmpCmnImm(uint32_t iRegSrc, uint32_t uImm5, uint32_t fNzcv, ARMV8INSTRCOND enmCond, 3538 bool fCCmp = true, bool f64Bit = true) 3539 { 3540 Assert(iRegSrc < 32); Assert(uImm5 < 32); Assert(fNzcv < 16); 3541 3542 return ((uint32_t)f64Bit << 31) 3543 | ((uint32_t)fCCmp << 30) 3544 | UINT32_C(0x3a400800) 3545 | (uImm5 << 16) 3546 | ((uint32_t)enmCond << 12) 3547 | (iRegSrc << 5) 3548 | fNzcv; 3549 } 3550 3551 /** CCMP w/ immediate. */ 3552 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCCmpImm(uint32_t iRegSrc, uint32_t uImm5, uint32_t fNzcv, 3553 ARMV8INSTRCOND enmCond, bool f64Bit = true) 3554 { 3555 return Armv8A64MkInstrCCmpCmnImm(iRegSrc, uImm5, fNzcv, enmCond, true /*fCCmp*/, f64Bit); 3556 } 3557 3558 3559 /** CCMN w/ immediate. */ 3560 DECL_FORCE_INLINE(uint32_t) Armv8A64MkInstrCCmnImm(uint32_t iRegSrc, uint32_t uImm5, uint32_t fNzcv, 3561 ARMV8INSTRCOND enmCond, bool f64Bit = true) 3562 { 3563 return Armv8A64MkInstrCCmpCmnImm(iRegSrc, uImm5, fNzcv, enmCond, false /*fCCmp*/, f64Bit); 3564 } 3427 3565 3428 3566 /** @} */ -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r102603 r102623 10961 10961 10962 10962 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */ 10963 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo); 10963 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo, 10964 0 /*offAddend*/, true /*fVarAllowInVolatileReg*/); 10964 10965 10965 10966 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */ … … 11245 11246 * Macro that implements opcode (re-)checking. 11246 11247 */ 11247 #define BODY_CHECK_OPCODES _DISABLED(a_pTb, a_idxRange, a_offRange, a_cbInstr) \11248 #define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \ 11248 11249 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange)) 11249 11250 … … 11268 11269 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange; 11269 11270 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange; 11271 Assert(cbLeft > 0); 11270 11272 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes]; 11271 11273 uint32_t offConsolidatedJump = UINT32_MAX; … … 11464 11466 11465 11467 #elif defined(RT_ARCH_ARM64) 11466 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); 11467 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf)); 11468 # if 0 11469 11470 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 11471 /** @todo continue here */ 11472 # else 11473 AssertReleaseFailed(); 11474 RT_NOREF(pReNative, off, pTb, idxRange, offRange); 11475 # endif 11476 iemNativeRegFreeTmp(pReNative, idxRegTmp); 11468 /* We need pbInstrBuf in a register, whatever we do. */ 11469 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off); 11470 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf)); 11471 11472 /* We also need at least one more register for holding bytes & words we 11473 load via pbInstrBuf. */ 11474 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off); 11475 11476 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64); 11477 11478 /* One byte compare can be done with the opcode byte as an immediate. We'll 11479 do this to uint16_t align src1. */ 11480 bool fPendingJmp = RT_BOOL(offPage & 1); 11481 if (fPendingJmp) 11482 { 11483 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage); 11484 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/); 11485 offPage += 1; 11486 cbLeft -= 1; 11487 } 11488 11489 if (cbLeft > 0) 11490 { 11491 /* We need a register for holding the opcode bytes we're comparing with, 11492 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */ 11493 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off); 11494 11495 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */ 11496 if ((offPage & 3) && cbLeft >= 2) 11497 { 11498 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2); 11499 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1])); 11500 if (fPendingJmp) 11501 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11502 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/); 11503 else 11504 { 11505 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/); 11506 fPendingJmp = true; 11507 } 11508 pbOpcodes += 2; 11509 offPage += 2; 11510 cbLeft -= 2; 11511 } 11512 11513 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */ 11514 if ((offPage & 7) && cbLeft >= 4) 11515 { 11516 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4); 11517 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val, 11518 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0])); 11519 if (fPendingJmp) 11520 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11521 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/); 11522 else 11523 { 11524 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/); 11525 fPendingJmp = true; 11526 } 11527 pbOpcodes += 4; 11528 offPage += 4; 11529 cbLeft -= 4; 11530 } 11531 11532 /* 11533 * If we've got 16 bytes or more left, switch to memcmp-style. 11534 */ 11535 if (cbLeft >= 16) 11536 { 11537 /* We need a pointer to the copy of the original opcode bytes. */ 11538 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off); 11539 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes); 11540 11541 /* If there are more than 32 bytes to compare we create a loop, for 11542 which we'll need a loop register. */ 11543 if (cbLeft >= 64) 11544 { 11545 if (fPendingJmp) 11546 { 11547 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5); 11548 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0); 11549 fPendingJmp = false; 11550 } 11551 11552 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off); 11553 uint16_t const cLoops = cbLeft / 32; 11554 cbLeft = cbLeft % 32; 11555 pbOpcodes += cLoops * 32; 11556 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops); 11557 11558 if (offPage != 0) /** @todo optimize out this instruction. */ 11559 { 11560 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage); 11561 offPage = 0; 11562 } 11563 11564 uint32_t const offLoopStart = off; 11565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0); 11566 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0); 11567 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val); 11568 11569 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1); 11570 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1); 11571 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11572 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq); 11573 11574 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2); 11575 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2); 11576 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11577 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq); 11578 11579 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3); 11580 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3); 11581 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11582 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq); 11583 11584 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5); 11585 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0); 11586 11587 /* Advance and loop. */ 11588 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20); 11589 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20); 11590 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/); 11591 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off); 11592 11593 iemNativeRegFreeTmp(pReNative, idxRegLoop); 11594 } 11595 11596 /* Deal with any remaining dwords (uint64_t). There can be up to 11597 three if we looped and four if we didn't. */ 11598 uint32_t offSrc2 = 0; 11599 while (cbLeft >= 8) 11600 { 11601 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, 11602 idxRegSrc1Ptr, offPage / 8); 11603 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, 11604 idxRegSrc2Ptr, offSrc2 / 8); 11605 if (fPendingJmp) 11606 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11607 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq); 11608 else 11609 { 11610 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val); 11611 fPendingJmp = true; 11612 } 11613 pbOpcodes += 8; 11614 offPage += 8; 11615 offSrc2 += 8; 11616 cbLeft -= 8; 11617 } 11618 11619 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr); 11620 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */ 11621 } 11622 /* 11623 * Otherwise, we compare with constants and merge with the general mop-up. 11624 */ 11625 else 11626 { 11627 while (cbLeft >= 8) 11628 { 11629 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 11630 offPage / 8); 11631 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val, 11632 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4], 11633 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0])); 11634 if (fPendingJmp) 11635 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11636 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/); 11637 else 11638 { 11639 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/); 11640 fPendingJmp = true; 11641 } 11642 pbOpcodes += 8; 11643 offPage += 8; 11644 cbLeft -= 8; 11645 } 11646 /* max cost thus far: 21 */ 11647 } 11648 11649 /* Deal with any remaining bytes (7 or less). */ 11650 Assert(cbLeft < 8); 11651 if (cbLeft >= 4) 11652 { 11653 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, 11654 offPage / 4); 11655 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val, 11656 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0])); 11657 if (fPendingJmp) 11658 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11659 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/); 11660 else 11661 { 11662 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/); 11663 fPendingJmp = true; 11664 } 11665 pbOpcodes += 4; 11666 offPage += 4; 11667 cbLeft -= 4; 11668 11669 } 11670 11671 if (cbLeft >= 2) 11672 { 11673 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, 11674 offPage / 2); 11675 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1])); 11676 if (fPendingJmp) 11677 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11678 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/); 11679 else 11680 { 11681 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/); 11682 fPendingJmp = true; 11683 } 11684 pbOpcodes += 2; 11685 offPage += 2; 11686 cbLeft -= 2; 11687 } 11688 11689 if (cbLeft > 0) 11690 { 11691 Assert(cbLeft == 1); 11692 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage); 11693 if (fPendingJmp) 11694 { 11695 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]); 11696 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val, 11697 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/); 11698 } 11699 else 11700 { 11701 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/); 11702 fPendingJmp = true; 11703 } 11704 pbOpcodes += 1; 11705 offPage += 1; 11706 cbLeft -= 1; 11707 } 11708 11709 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val); 11710 } 11711 Assert(cbLeft == 0); 11712 11713 /* 11714 * Finally, the branch on difference. 11715 */ 11716 if (fPendingJmp) 11717 { 11718 iemNativeAddFixup(pReNative, off, idxLabelObsoleteTb, kIemNativeFixupType_RelImm19At5); 11719 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, 0); 11720 } 11721 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump, idxLabelObsoleteTb); 11722 11723 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */ 11724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 11725 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val); 11726 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr); 11727 11477 11728 #else 11478 11729 # error "Port me" … … 11524 11775 { 11525 11776 PCIEMTB const pTb = pReNative->pTbOrg; 11526 uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0];11777 //uint32_t const cbInstr = (uint32_t)pCallEntry->auParams[0]; 11527 11778 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1]; 11528 11779 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2]; -
trunk/src/VBox/VMM/VMMAll/IEMAllThrdPython.py
r102593 r102623 1966 1966 ( 'CheckCsLim', 1, True ), 1967 1967 1968 ( 'CheckCsLimAndOpcodes', 3, False),1969 ( 'CheckOpcodes', 3, False),1968 ( 'CheckCsLimAndOpcodes', 3, True ), 1969 ( 'CheckOpcodes', 3, True ), 1970 1970 ( 'CheckOpcodesConsiderCsLim', 3, False ), 1971 1971 -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r102593 r102623 167 167 168 168 /** 169 * Emits loading a constant into a 64-bit GPR 170 */ 171 DECL_INLINE_THROW(uint32_t) 172 iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64) 173 { 174 if (!uImm64) 175 return iemNativeEmitGprZero(pReNative, off, iGpr); 176 177 #ifdef RT_ARCH_AMD64 178 if (uImm64 <= UINT32_MAX) 169 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent 170 * buffer space. 171 * 172 * Max buffer consumption: 173 * - AMD64: 10 instruction bytes. 174 * - ARM64: 4 instruction words (16 bytes). 175 */ 176 DECLINLINE(uint32_t) iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64) 177 { 178 #ifdef RT_ARCH_AMD64 179 if (uImm64 == 0) 180 { 181 /* xor gpr, gpr */ 182 if (iGpr >= 8) 183 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B; 184 pCodeBuf[off++] = 0x33; 185 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7); 186 } 187 else if (uImm64 <= UINT32_MAX) 179 188 { 180 189 /* mov gpr, imm32 */ 181 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);182 190 if (iGpr >= 8) 183 p bCodeBuf[off++] = X86_OP_REX_B;184 p bCodeBuf[off++] = 0xb8 + (iGpr & 7);185 p bCodeBuf[off++] = RT_BYTE1(uImm64);186 p bCodeBuf[off++] = RT_BYTE2(uImm64);187 p bCodeBuf[off++] = RT_BYTE3(uImm64);188 p bCodeBuf[off++] = RT_BYTE4(uImm64);191 pCodeBuf[off++] = X86_OP_REX_B; 192 pCodeBuf[off++] = 0xb8 + (iGpr & 7); 193 pCodeBuf[off++] = RT_BYTE1(uImm64); 194 pCodeBuf[off++] = RT_BYTE2(uImm64); 195 pCodeBuf[off++] = RT_BYTE3(uImm64); 196 pCodeBuf[off++] = RT_BYTE4(uImm64); 189 197 } 190 198 else 191 199 { 192 200 /* mov gpr, imm64 */ 193 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);194 201 if (iGpr < 8) 195 p bCodeBuf[off++] = X86_OP_REX_W;202 pCodeBuf[off++] = X86_OP_REX_W; 196 203 else 197 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B; 198 pbCodeBuf[off++] = 0xb8 + (iGpr & 7); 199 pbCodeBuf[off++] = RT_BYTE1(uImm64); 200 pbCodeBuf[off++] = RT_BYTE2(uImm64); 201 pbCodeBuf[off++] = RT_BYTE3(uImm64); 202 pbCodeBuf[off++] = RT_BYTE4(uImm64); 203 pbCodeBuf[off++] = RT_BYTE5(uImm64); 204 pbCodeBuf[off++] = RT_BYTE6(uImm64); 205 pbCodeBuf[off++] = RT_BYTE7(uImm64); 206 pbCodeBuf[off++] = RT_BYTE8(uImm64); 207 } 208 209 #elif defined(RT_ARCH_ARM64) 210 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4); 211 204 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B; 205 pCodeBuf[off++] = 0xb8 + (iGpr & 7); 206 pCodeBuf[off++] = RT_BYTE1(uImm64); 207 pCodeBuf[off++] = RT_BYTE2(uImm64); 208 pCodeBuf[off++] = RT_BYTE3(uImm64); 209 pCodeBuf[off++] = RT_BYTE4(uImm64); 210 pCodeBuf[off++] = RT_BYTE5(uImm64); 211 pCodeBuf[off++] = RT_BYTE6(uImm64); 212 pCodeBuf[off++] = RT_BYTE7(uImm64); 213 pCodeBuf[off++] = RT_BYTE8(uImm64); 214 } 215 216 #elif defined(RT_ARCH_ARM64) 212 217 /* 213 218 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and … … 220 225 * the remainder. 221 226 */ 222 uint32_t fMovK = 0; 223 /* mov gpr, imm16 */ 224 uint32_t uImmPart = ((uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)) << 5); 225 if (uImmPart) 226 { 227 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | uImmPart | iGpr; 228 fMovK |= RT_BIT_32(29); 229 } 230 /* mov[k] gpr, imm16, lsl #16 */ 231 uImmPart = ((uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)) << 5); 232 if (uImmPart) 233 { 234 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(1) << 21) | uImmPart | iGpr; 235 fMovK |= RT_BIT_32(29); 236 } 237 /* mov[k] gpr, imm16, lsl #32 */ 238 uImmPart = ((uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)) << 5); 239 if (uImmPart) 240 { 241 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(2) << 21) | uImmPart | iGpr; 242 fMovK |= RT_BIT_32(29); 243 } 244 /* mov[k] gpr, imm16, lsl #48 */ 245 uImmPart = ((uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)) << 5); 246 if (uImmPart) 247 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(3) << 21) | uImmPart | iGpr; 248 249 /** @todo there is an inverted mask variant we might want to explore if it 250 * reduces the number of instructions... */ 227 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX) 228 + !((uImm64 >> 16) & UINT16_MAX) 229 + !((uImm64 >> 32) & UINT16_MAX) 230 + !((uImm64 >> 48) & UINT16_MAX); 231 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */ 232 : ( (uImm64 & UINT16_MAX) == UINT16_MAX) 233 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX) 234 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX) 235 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX); 236 if (cFfffHalfWords <= cZeroHalfWords) 237 { 238 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr; 239 240 /* movz gpr, imm16 */ 241 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)); 242 if (uImmPart || cZeroHalfWords == 4) 243 { 244 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5); 245 fMovBase |= RT_BIT_32(29); 246 } 247 /* mov[z/k] gpr, imm16, lsl #16 */ 248 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)); 249 if (uImmPart) 250 { 251 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5); 252 fMovBase |= RT_BIT_32(29); 253 } 254 /* mov[z/k] gpr, imm16, lsl #32 */ 255 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)); 256 if (uImmPart) 257 { 258 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5); 259 fMovBase |= RT_BIT_32(29); 260 } 261 /* mov[z/k] gpr, imm16, lsl #48 */ 262 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)); 263 if (uImmPart) 264 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5); 265 } 266 else 267 { 268 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr; 269 270 /* find the first half-word that isn't UINT16_MAX. */ 271 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0 272 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1 273 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3; 274 275 /* movn gpr, imm16, lsl #iHwNotFfff*16 */ 276 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5; 277 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart; 278 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */ 279 /* movk gpr, imm16 */ 280 if (iHwNotFfff != 0) 281 { 282 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)); 283 if (uImmPart != UINT32_C(0xffff)) 284 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5); 285 } 286 /* movk gpr, imm16, lsl #16 */ 287 if (iHwNotFfff != 1) 288 { 289 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)); 290 if (uImmPart != UINT32_C(0xffff)) 291 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5); 292 } 293 /* movk gpr, imm16, lsl #32 */ 294 if (iHwNotFfff != 2) 295 { 296 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)); 297 if (uImmPart != UINT32_C(0xffff)) 298 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5); 299 } 300 /* movk gpr, imm16, lsl #48 */ 301 if (iHwNotFfff != 3) 302 { 303 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)); 304 if (uImmPart != UINT32_C(0xffff)) 305 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5); 306 } 307 } 308 251 309 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX? 252 310 * clang 12.x does that, only to use the 'x' version for the 253 311 * addressing in the following ldr). */ 254 312 313 #else 314 # error "port me" 315 #endif 316 return off; 317 } 318 319 320 /** 321 * Emits loading a constant into a 64-bit GPR 322 */ 323 DECL_INLINE_THROW(uint32_t) 324 iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64) 325 { 326 #ifdef RT_ARCH_AMD64 327 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64); 328 #elif defined(RT_ARCH_ARM64) 329 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64); 330 #else 331 # error "port me" 332 #endif 333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 334 return off; 335 } 336 337 338 /** 339 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent 340 * buffer space. 341 * 342 * Max buffer consumption: 343 * - AMD64: 6 instruction bytes. 344 * - ARM64: 2 instruction words (8 bytes). 345 * 346 * @note The top 32 bits will be cleared. 347 */ 348 DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32) 349 { 350 #ifdef RT_ARCH_AMD64 351 if (uImm32 == 0) 352 { 353 /* xor gpr, gpr */ 354 if (iGpr >= 8) 355 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B; 356 pCodeBuf[off++] = 0x33; 357 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7); 358 } 359 else 360 { 361 /* mov gpr, imm32 */ 362 if (iGpr >= 8) 363 pCodeBuf[off++] = X86_OP_REX_B; 364 pCodeBuf[off++] = 0xb8 + (iGpr & 7); 365 pCodeBuf[off++] = RT_BYTE1(uImm32); 366 pCodeBuf[off++] = RT_BYTE2(uImm32); 367 pCodeBuf[off++] = RT_BYTE3(uImm32); 368 pCodeBuf[off++] = RT_BYTE4(uImm32); 369 } 370 371 #elif defined(RT_ARCH_ARM64) 372 if ((uImm32 >> 16) == 0) 373 /* movz gpr, imm16 */ 374 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/); 375 else if ((uImm32 & UINT32_C(0xffff)) == 0) 376 /* movz gpr, imm16, lsl #16 */ 377 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/); 378 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff)) 379 /* movn gpr, imm16, lsl #16 */ 380 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/); 381 else if ((uImm32 >> 16) == UINT32_C(0xffff)) 382 /* movn gpr, imm16 */ 383 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/); 384 else 385 { 386 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/); 387 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/); 388 } 389 390 #else 391 # error "port me" 392 #endif 393 return off; 394 } 395 396 397 /** 398 * Emits loading a constant into a 32-bit GPR. 399 * @note The top 32 bits will be cleared. 400 */ 401 DECL_INLINE_THROW(uint32_t) 402 iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32) 403 { 404 #ifdef RT_ARCH_AMD64 405 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32); 406 #elif defined(RT_ARCH_ARM64) 407 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32); 255 408 #else 256 409 # error "port me"
Note:
See TracChangeset
for help on using the changeset viewer.

