Index: /trunk/Makefile.kmk
===================================================================
--- /trunk/Makefile.kmk	(revision 36767)
+++ /trunk/Makefile.kmk	(revision 36768)
@@ -1384,5 +1384,4 @@
 vslick.h: include/VBox/cdefs.h include/iprt/cdefs.h $(MAKEFILE)
 	$(RM) -f -- $@ $@.tmp $@.tmp2 $@.tmp3
-	$(APPEND) $@.tmp '// autogenerated'
 	@$(APPEND) $@.tmp '#define IN_SLICKEDIT '
 	@$(APPEND) $@.tmp '#define RT_C_DECLS_BEGIN '
@@ -1503,4 +1502,16 @@
 	@$(APPEND) $@.tmp '#define PGM_BTH_DECL(type, name)        type PGM_BTH_NAME(name)'
 
+	@$(APPEND) $@.tmp '#define FNIEMOP_STUB(a_Name)        	   static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu) { return VERR_NOT_IMPLEMENTED; }'
+	@$(APPEND) $@.tmp '#define FNIEMOP_DEF(a_Name)        	   static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu)'
+	@$(APPEND) $@.tmp '#define FNIEMOP_DEF_1(a_Name, a_Type0, a_Name0) static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0)'
+	@$(APPEND) $@.tmp '#define FNIEMOP_DEF_2(a_Name, a_Type0, a_Name0, a_Type1, a_Name1) static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0, a_Type1 a_Name1)'
+	@$(APPEND) $@.tmp '#define IEM_CIMPL_DEF_0(a_Name)         static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu)'
+	@$(APPEND) $@.tmp '#define IEM_CIMPL_DEF_1(a_Name, a_Type0, a_Name0) static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu, , a_Type0 a_Name0)'
+	@$(APPEND) $@.tmp '#define IEM_CIMPL_DEF_2(a_Name, a_Type0, a_Name0, a_Type1, a_Name1) static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu, , a_Type0 a_Name0, a_Type1 a_Name1)'
+	@$(APPEND) $@.tmp '#define IEM_CIMPL_DEF_3(a_Name, a_Type0, a_Name0, a_Type1, a_Name1, a_Type2, a_Name2)  static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu, , a_Type0 a_Name0, a_Type1 a_Name1, , a_Type2 a_Name2)'
+	@$(APPEND) $@.tmp '#define IEM_MC_LOCAL(a_Type, a_Name)                       a_Type a_Name'
+	@$(APPEND) $@.tmp '#define IEM_MC_ARG(a_Type, a_Name, a_iArg)                 a_Type a_Name'
+	@$(APPEND) $@.tmp '#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg)  a_Type const a_Name = a_Value'
+
 	$(SED)  -e '/__cdecl/d' \
 		-e '/^ *# *define.*DECL/!d' \
@@ -1532,5 +1543,5 @@
 		| $(SED_EXT) -e 's/\x1F/_/g' -e 's/\x1E/(/g' -e 's/\x1C/ /g' \
 		| $(SED_EXT) -e 's/$$/\n/' --output $@.tmp2
-	$(MV) -f $@.tmp2 $@
+	$(SED) -e '/#define/s/$$/ \/\/ vbox/' --output $@ $@.tmp2
 	$(RM) -f $@.tmp $@.tmp2 $@.tmp3
 ifeq ($(KBUILD_HOST),win)
Index: /trunk/include/VBox/vmm/iem.h
===================================================================
--- /trunk/include/VBox/vmm/iem.h	(revision 36768)
+++ /trunk/include/VBox/vmm/iem.h	(revision 36768)
@@ -0,0 +1,58 @@
+/** @file
+ * IEM - Interpreted Execution Manager.
+ */
+
+/*
+ * Copyright (C) 2011 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+#ifndef ___VBox_vmm_iem_h
+#define ___VBox_vmm_iem_h
+
+#include <VBox/types.h>
+
+
+RT_C_DECLS_BEGIN
+
+/** @defgroup grp_iem       The Interpreted Execution Manager API.
+ * @{
+ */
+
+
+
+VMMDECL(VBOXSTRICTRC) IEMExecOne(PVMCPU pVCpu);
+
+
+
+/** @defgroup grp_em_r3     The IEM Host Context Ring-3 API.
+ * @ingroup grp_em
+ * @{
+ */
+VMMR3DECL(int)      IEMR3Init(PVM pVM);
+VMMR3DECL(int)      IEMR3Term(PVM pVM);
+VMMR3DECL(void)     IEMR3Relocate(PVM pVM);
+/** @} */
+
+/** @} */
+
+RT_C_DECLS_END
+
+#endif
+
Index: /trunk/include/VBox/vmm/vm.h
===================================================================
--- /trunk/include/VBox/vmm/vm.h	(revision 36767)
+++ /trunk/include/VBox/vmm/vm.h	(revision 36768)
@@ -150,4 +150,13 @@
     } em;
 
+    /** IEM part. */
+    union
+    {
+#ifdef ___IEMInternal_h
+        struct IEMCPU       s;
+#endif
+        uint8_t             padding[1024];      /* multiple of 64 */
+    } iem;
+
     /** TRPM part. */
     union
@@ -205,8 +214,6 @@
     } dbgf;
 
-#if 0
     /** Align the following members on page boundary. */
-    uint8_t                 abAlignment2[32];
-#endif
+    uint8_t                 abAlignment2[3072];
 
     /** PGM part. */
Index: /trunk/src/VBox/VMM/Makefile.kmk
===================================================================
--- /trunk/src/VBox/VMM/Makefile.kmk	(revision 36767)
+++ /trunk/src/VBox/VMM/Makefile.kmk	(revision 36768)
@@ -46,4 +46,9 @@
 endif
 # VMM_COMMON_DEFS += VBOX_WITH_NS_ACCOUNTING_STATS
+
+# Special IEM debug mode which compares the result with REM.
+ifdef IEM_VERIFICATION_MODE
+ VMM_COMMON_DEFS += IEM_VERIFICATION_MODE
+endif
 
 
@@ -100,4 +105,5 @@
 	VMMR3/EMHwaccm.cpp \
 	VMMR3/FTM.cpp \
+	VMMR3/IEMR3.cpp \
 	VMMR3/IOM.cpp \
 	VMMR3/GMM.cpp \
@@ -166,4 +172,7 @@
 	VMMAll/EMAllA.asm \
 	VMMAll/FTMAll.cpp \
+	VMMAll/IEMAll.cpp \
+	VMMAll/IEMAllAImpl.asm \
+	VMMAll/IEMAllAImplC.cpp \
 	VMMAll/TMAll.cpp \
 	VMMAll/TMAllCpu.cpp \
@@ -570,2 +579,7 @@
 PGMInline.o 	PGMInline.obj: 			PGMDbg.o
 
+# Alias the IEM templates to the object in which they are instantiated.
+IEMInternal.o \
+IEMAllInstructions.cpp.o  IEMAllInstructions.cpp.obj \
+IEMAllCImplStrInstr.cpp.o IEMAllCImplStrInstr.cpp.obj: IEMAll.o
+
Index: /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp	(revision 36768)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp	(revision 36768)
@@ -0,0 +1,6129 @@
+/* $Id$ */
+/** @file
+ * IEM - Interpreted Execution Manager - All Contexts.
+ */
+
+/*
+ * Copyright (C) 2011 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @page pg_iem    IEM - Interpreted Execution Manager
+ *
+ * The interpreted exeuction manager (IEM) is for executing short guest code
+ * sequences that are causing too many exits / virtualization traps.  It will
+ * also be used to interpret single instructions, thus replacing the selective
+ * interpreters in EM and IOM.
+ *
+ * Design goals:
+ *      - Relatively small footprint, although we favour speed and correctness
+ *        over size.
+ *      - Reasonably fast.
+ *      - Correctly handle lock prefixed instructions.
+ *      - Complete instruction set - eventually.
+ *      - Refactorable into a recompiler, maybe.
+ *      - Replace EMInterpret*.
+ *
+ * Using the existing disassembler has been considered, however this is thought
+ * to conflict with speed as the disassembler chews things a bit too much while
+ * leaving us with a somewhat complicated state to interpret afterwards.
+ *
+ *
+ * The current code is very much work in progress. You've been warned!
+ *
+ */
+
+/*******************************************************************************
+*   Header Files                                                               *
+*******************************************************************************/
+#define LOG_GROUP   LOG_GROUP_EM /** @todo add log group */
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/dbgf.h>
+#ifdef IEM_VERIFICATION_MODE
+# include <VBox/vmm/rem.h>
+#endif
+#include "IEMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <VBox/param.h>
+#include <VBox/x86.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+
+
+/*******************************************************************************
+*   Structures and Typedefs                                                    *
+*******************************************************************************/
+/** @typedef PFNIEMOP
+ * Pointer to an opcode decoder function.
+ */
+
+/** @def FNIEMOP_DEF
+ * Define an opcode decoder function.
+ *
+ * We're using macors for this so that adding and removing parameters as well as
+ * tweaking compiler specific attributes becomes easier.  See FNIEMOP_CALL
+ *
+ * @param   a_Name      The function name.
+ */
+
+
+#if defined(__GNUC__) && defined(RT_ARCH_X86)
+typedef VBOXSTRICTRC (__attribute__((__fastcall__)) * PFNIEMOP)(PIEMCPU pIemCpu);
+# define FNIEMOP_DEF(a_Name) \
+    static VBOXSTRICTRC __attribute__((__fastcall__, __nothrow__)) a_Name (PIEMCPU pIemCpu)
+# define FNIEMOP_DEF_1(a_Name, a_Type0, a_Name0) \
+    static VBOXSTRICTRC __attribute__((__fastcall__, __nothrow__)) a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0) RT_NO_THROW
+# define FNIEMOP_DEF_2(a_Name, a_Type0, a_Name0, a_Type1, a_Name1) \
+    static VBOXSTRICTRC __attribute__((__fastcall__, __nothrow__)) a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0, a_Type1 a_Name1) RT_NO_THROW
+
+#elif defined(_MSC_VER) && defined(RT_ARCH_X86)
+typedef VBOXSTRICTRC (__fastcall * PFNIEMOP)(PIEMCPU pIemCpu);
+# define FNIEMOP_PROTO(a_Name) \
+    static /*__declspec(naked)*/ VBOXSTRICTRC __fastcall a_Name(PIEMCPU pIemCpu) RT_NO_THROW
+# define FNIEMOP_DEF_1(a_Name, a_Type0, a_Name0) \
+    static /*__declspec(naked)*/ VBOXSTRICTRC __fastcall a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0) RT_NO_THROW
+# define FNIEMOP_DEF_2(a_Name, a_Type0, a_Name0, a_Type1, a_Name1) \
+    static /*__declspec(naked)*/ VBOXSTRICTRC __fastcall a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0, a_Type1 a_Name1) RT_NO_THROW
+
+#else
+typedef VBOXSTRICTRC (* PFNIEMOP)(PIEMCPU pIemCpu);
+# define FNIEMOP_DEF(a_Name) \
+    static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu) RT_NO_THROW
+# define FNIEMOP_DEF_1(a_Name, a_Type0, a_Name0) \
+    static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0) RT_NO_THROW
+# define FNIEMOP_DEF_2(a_Name, a_Type0, a_Name0, a_Type1, a_Name1) \
+    static VBOXSTRICTRC a_Name(PIEMCPU pIemCpu, a_Type0 a_Name0, a_Type1 a_Name1) RT_NO_THROW
+
+#endif
+
+
+/**
+ * Function table for a binary operator providing implementation based on
+ * operand size.
+ */
+typedef struct IEMOPBINSIZES
+{
+    PFNIEMAIMPLBINU8  pfnNormalU8,    pfnLockedU8;
+    PFNIEMAIMPLBINU16 pfnNormalU16,   pfnLockedU16;
+    PFNIEMAIMPLBINU32 pfnNormalU32,   pfnLockedU32;
+    PFNIEMAIMPLBINU64 pfnNormalU64,   pfnLockedU64;
+} IEMOPBINSIZES;
+/** Pointer to a binary operator function table. */
+typedef IEMOPBINSIZES const *PCIEMOPBINSIZES;
+
+
+/**
+ * Function table for a unary operator providing implementation based on
+ * operand size.
+ */
+typedef struct IEMOPUNARYSIZES
+{
+    PFNIEMAIMPLUNARYU8  pfnNormalU8,    pfnLockedU8;
+    PFNIEMAIMPLUNARYU16 pfnNormalU16,   pfnLockedU16;
+    PFNIEMAIMPLUNARYU32 pfnNormalU32,   pfnLockedU32;
+    PFNIEMAIMPLUNARYU64 pfnNormalU64,   pfnLockedU64;
+} IEMOPUNARYSIZES;
+/** Pointer to a unary operator function table. */
+typedef IEMOPUNARYSIZES const *PCIEMOPUNARYSIZES;
+
+
+/**
+ * Function table for a shift operator providing implementation based on
+ * operand size.
+ */
+typedef struct IEMOPSHIFTSIZES
+{
+    PFNIEMAIMPLSHIFTU8  pfnNormalU8;
+    PFNIEMAIMPLSHIFTU16 pfnNormalU16;
+    PFNIEMAIMPLSHIFTU32 pfnNormalU32;
+    PFNIEMAIMPLSHIFTU64 pfnNormalU64;
+} IEMOPSHIFTSIZES;
+/** Pointer to a shift operator function table. */
+typedef IEMOPSHIFTSIZES const *PCIEMOPSHIFTSIZES;
+
+
+/**
+ * Function table for a multiplication or division operation.
+ */
+typedef struct IEMOPMULDIVSIZES
+{
+    PFNIEMAIMPLMULDIVU8  pfnU8;
+    PFNIEMAIMPLMULDIVU16 pfnU16;
+    PFNIEMAIMPLMULDIVU32 pfnU32;
+    PFNIEMAIMPLMULDIVU64 pfnU64;
+} IEMOPMULDIVSIZES;
+/** Pointer to a multiplication or division operation function table. */
+typedef IEMOPMULDIVSIZES const *PCIEMOPMULDIVSIZES;
+
+
+/**
+ * Selector descriptor table entry as fetched by iemMemFetchSelDesc.
+ */
+typedef union IEMSELDESC
+{
+    /** The legacy view. */
+    X86DESC     Legacy;
+    /** The long mode view. */
+    X86DESC64   Long;
+} IEMSELDESC;
+/** Pointer to a selector descriptor table entry. */
+typedef IEMSELDESC *PIEMSELDESC;
+
+
+/*******************************************************************************
+*   Defined Constants And Macros                                               *
+*******************************************************************************/
+/** Used to shut up GCC warnings about variables that 'may be used uninitialized'
+ * due to GCC lacking knowledge about the value range of a switch. */
+#define IEM_NOT_REACHED_DEFAULT_CASE_RET() default: AssertFailedReturn(VERR_INTERNAL_ERROR_4)
+
+/**
+ * Call an opcode decoder function.
+ *
+ * We're using macors for this so that adding and removing parameters can be
+ * done as we please.  See FNIEMOP_DEF.
+ */
+#define FNIEMOP_CALL(a_pfn) (a_pfn)(pIemCpu)
+
+/**
+ * Call a common opcode decoder function taking one extra argument.
+ *
+ * We're using macors for this so that adding and removing parameters can be
+ * done as we please.  See FNIEMOP_DEF_1.
+ */
+#define FNIEMOP_CALL_1(a_pfn, a0)           (a_pfn)(pIemCpu, a0)
+
+/**
+ * Call a common opcode decoder function taking one extra argument.
+ *
+ * We're using macors for this so that adding and removing parameters can be
+ * done as we please.  See FNIEMOP_DEF_1.
+ */
+#define FNIEMOP_CALL_2(a_pfn, a0, a1)       (a_pfn)(pIemCpu, a0, a1)
+
+/**
+ * Check if we're currently executing in real or virtual 8086 mode.
+ *
+ * @returns @c true if it is, @c false if not.
+ * @param   a_pIemCpu       The IEM state of the current CPU.
+ */
+#define IEM_IS_REAL_OR_V86_MODE(a_pIemCpu)  (CPUMIsGuestInRealOrV86ModeEx((a_pIemCpu)->CTX_SUFF(pCtx)))
+
+/**
+ * Check if we're currently executing in long mode.
+ *
+ * @returns @c true if it is, @c false if not.
+ * @param   a_pIemCpu       The IEM state of the current CPU.
+ */
+#define IEM_IS_LONG_MODE(a_pIemCpu)         (CPUMIsGuestInLongModeEx((a_pIemCpu)->CTX_SUFF(pCtx)))
+
+/**
+ * Check if we're currently executing in real mode.
+ *
+ * @returns @c true if it is, @c false if not.
+ * @param   a_pIemCpu       The IEM state of the current CPU.
+ */
+#define IEM_IS_REAL_MODE(a_pIemCpu)         (CPUMIsGuestInRealModeEx((a_pIemCpu)->CTX_SUFF(pCtx)))
+
+/**
+ * Tests if an AMD CPUID feature (extended) is marked present - ECX.
+ */
+#define IEM_IS_AMD_CPUID_FEATURE_PRESENT_ECX(a_fEcx)    iemRegIsAmdCpuIdFeaturePresent(pIemCpu, 0, (a_fEcx))
+
+/**
+ * Check if the address is canonical.
+ */
+#define IEM_IS_CANONICAL(a_u64Addr)         ((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000) < UINT64_C(0x1000000000000))
+
+
+/*******************************************************************************
+*   Global Variables                                                           *
+*******************************************************************************/
+extern const PFNIEMOP g_apfnOneByteMap[256]; /* not static since we need to forward declare it. */
+
+
+/** Function table for the ADD instruction. */
+static const IEMOPBINSIZES g_iemAImpl_add =
+{
+    iemAImpl_add_u8,  iemAImpl_add_u8_locked,
+    iemAImpl_add_u16, iemAImpl_add_u16_locked,
+    iemAImpl_add_u32, iemAImpl_add_u32_locked,
+    iemAImpl_add_u64, iemAImpl_add_u64_locked
+};
+
+/** Function table for the ADC instruction. */
+static const IEMOPBINSIZES g_iemAImpl_adc =
+{
+    iemAImpl_adc_u8,  iemAImpl_adc_u8_locked,
+    iemAImpl_adc_u16, iemAImpl_adc_u16_locked,
+    iemAImpl_adc_u32, iemAImpl_adc_u32_locked,
+    iemAImpl_adc_u64, iemAImpl_adc_u64_locked
+};
+
+/** Function table for the SUB instruction. */
+static const IEMOPBINSIZES g_iemAImpl_sub =
+{
+    iemAImpl_sub_u8,  iemAImpl_sub_u8_locked,
+    iemAImpl_sub_u16, iemAImpl_sub_u16_locked,
+    iemAImpl_sub_u32, iemAImpl_sub_u32_locked,
+    iemAImpl_sub_u64, iemAImpl_sub_u64_locked
+};
+
+/** Function table for the SBB instruction. */
+static const IEMOPBINSIZES g_iemAImpl_sbb =
+{
+    iemAImpl_sbb_u8,  iemAImpl_sbb_u8_locked,
+    iemAImpl_sbb_u16, iemAImpl_sbb_u16_locked,
+    iemAImpl_sbb_u32, iemAImpl_sbb_u32_locked,
+    iemAImpl_sbb_u64, iemAImpl_sbb_u64_locked
+};
+
+/** Function table for the OR instruction. */
+static const IEMOPBINSIZES g_iemAImpl_or =
+{
+    iemAImpl_or_u8,  iemAImpl_or_u8_locked,
+    iemAImpl_or_u16, iemAImpl_or_u16_locked,
+    iemAImpl_or_u32, iemAImpl_or_u32_locked,
+    iemAImpl_or_u64, iemAImpl_or_u64_locked
+};
+
+/** Function table for the XOR instruction. */
+static const IEMOPBINSIZES g_iemAImpl_xor =
+{
+    iemAImpl_xor_u8,  iemAImpl_xor_u8_locked,
+    iemAImpl_xor_u16, iemAImpl_xor_u16_locked,
+    iemAImpl_xor_u32, iemAImpl_xor_u32_locked,
+    iemAImpl_xor_u64, iemAImpl_xor_u64_locked
+};
+
+/** Function table for the AND instruction. */
+static const IEMOPBINSIZES g_iemAImpl_and =
+{
+    iemAImpl_and_u8,  iemAImpl_and_u8_locked,
+    iemAImpl_and_u16, iemAImpl_and_u16_locked,
+    iemAImpl_and_u32, iemAImpl_and_u32_locked,
+    iemAImpl_and_u64, iemAImpl_and_u64_locked
+};
+
+/** Function table for the CMP instruction.
+ * @remarks Making operand order ASSUMPTIONS.
+ */
+static const IEMOPBINSIZES g_iemAImpl_cmp =
+{
+    iemAImpl_cmp_u8,  NULL,
+    iemAImpl_cmp_u16, NULL,
+    iemAImpl_cmp_u32, NULL,
+    iemAImpl_cmp_u64, NULL
+};
+
+/** Function table for the TEST instruction.
+ * @remarks Making operand order ASSUMPTIONS.
+ */
+static const IEMOPBINSIZES g_iemAImpl_test =
+{
+    iemAImpl_test_u8,  NULL,
+    iemAImpl_test_u16, NULL,
+    iemAImpl_test_u32, NULL,
+    iemAImpl_test_u64, NULL
+};
+
+/** Group 1 /r lookup table. */
+static const PCIEMOPBINSIZES g_apIemImplGrp1[8] =
+{
+    &g_iemAImpl_add,
+    &g_iemAImpl_or,
+    &g_iemAImpl_adc,
+    &g_iemAImpl_sbb,
+    &g_iemAImpl_and,
+    &g_iemAImpl_sub,
+    &g_iemAImpl_xor,
+    &g_iemAImpl_cmp
+};
+
+/** Function table for the INC instruction. */
+static const IEMOPUNARYSIZES g_iemAImpl_inc =
+{
+    iemAImpl_inc_u8,  iemAImpl_inc_u8_locked,
+    iemAImpl_inc_u16, iemAImpl_inc_u16_locked,
+    iemAImpl_inc_u32, iemAImpl_inc_u32_locked,
+    iemAImpl_inc_u64, iemAImpl_inc_u64_locked
+};
+
+/** Function table for the DEC instruction. */
+static const IEMOPUNARYSIZES g_iemAImpl_dec =
+{
+    iemAImpl_dec_u8,  iemAImpl_dec_u8_locked,
+    iemAImpl_dec_u16, iemAImpl_dec_u16_locked,
+    iemAImpl_dec_u32, iemAImpl_dec_u32_locked,
+    iemAImpl_dec_u64, iemAImpl_dec_u64_locked
+};
+
+/** Function table for the NEG instruction. */
+static const IEMOPUNARYSIZES g_iemAImpl_neg =
+{
+    iemAImpl_neg_u8,  iemAImpl_neg_u8_locked,
+    iemAImpl_neg_u16, iemAImpl_neg_u16_locked,
+    iemAImpl_neg_u32, iemAImpl_neg_u32_locked,
+    iemAImpl_neg_u64, iemAImpl_neg_u64_locked
+};
+
+/** Function table for the NOT instruction. */
+static const IEMOPUNARYSIZES g_iemAImpl_not =
+{
+    iemAImpl_not_u8,  iemAImpl_not_u8_locked,
+    iemAImpl_not_u16, iemAImpl_not_u16_locked,
+    iemAImpl_not_u32, iemAImpl_not_u32_locked,
+    iemAImpl_not_u64, iemAImpl_not_u64_locked
+};
+
+
+/** Function table for the ROL instruction. */
+static const IEMOPSHIFTSIZES g_iemAImpl_rol =
+{
+    iemAImpl_rol_u8,
+    iemAImpl_rol_u16,
+    iemAImpl_rol_u32,
+    iemAImpl_rol_u64
+};
+
+/** Function table for the ROR instruction. */
+static const IEMOPSHIFTSIZES g_iemAImpl_ror =
+{
+    iemAImpl_ror_u8,
+    iemAImpl_ror_u16,
+    iemAImpl_ror_u32,
+    iemAImpl_ror_u64
+};
+
+/** Function table for the RCL instruction. */
+static const IEMOPSHIFTSIZES g_iemAImpl_rcl =
+{
+    iemAImpl_rcl_u8,
+    iemAImpl_rcl_u16,
+    iemAImpl_rcl_u32,
+    iemAImpl_rcl_u64
+};
+
+/** Function table for the RCR instruction. */
+static const IEMOPSHIFTSIZES g_iemAImpl_rcr =
+{
+    iemAImpl_rcr_u8,
+    iemAImpl_rcr_u16,
+    iemAImpl_rcr_u32,
+    iemAImpl_rcr_u64
+};
+
+/** Function table for the SHL instruction. */
+static const IEMOPSHIFTSIZES g_iemAImpl_shl =
+{
+    iemAImpl_shl_u8,
+    iemAImpl_shl_u16,
+    iemAImpl_shl_u32,
+    iemAImpl_shl_u64
+};
+
+/** Function table for the SHR instruction. */
+static const IEMOPSHIFTSIZES g_iemAImpl_shr =
+{
+    iemAImpl_shr_u8,
+    iemAImpl_shr_u16,
+    iemAImpl_shr_u32,
+    iemAImpl_shr_u64
+};
+
+/** Function table for the SAR instruction. */
+static const IEMOPSHIFTSIZES g_iemAImpl_sar =
+{
+    iemAImpl_sar_u8,
+    iemAImpl_sar_u16,
+    iemAImpl_sar_u32,
+    iemAImpl_sar_u64
+};
+
+
+/** Function table for the MUL instruction. */
+static const IEMOPMULDIVSIZES g_iemAImpl_mul =
+{
+    iemAImpl_mul_u8,
+    iemAImpl_mul_u16,
+    iemAImpl_mul_u32,
+    iemAImpl_mul_u64
+};
+
+/** Function table for the IMUL instruction working implicitly on rAX. */
+static const IEMOPMULDIVSIZES g_iemAImpl_imul =
+{
+    iemAImpl_imul_u8,
+    iemAImpl_imul_u16,
+    iemAImpl_imul_u32,
+    iemAImpl_imul_u64
+};
+
+/** Function table for the DIV instruction. */
+static const IEMOPMULDIVSIZES g_iemAImpl_div =
+{
+    iemAImpl_div_u8,
+    iemAImpl_div_u16,
+    iemAImpl_div_u32,
+    iemAImpl_div_u64
+};
+
+/** Function table for the MUL instruction. */
+static const IEMOPMULDIVSIZES g_iemAImpl_idiv =
+{
+    iemAImpl_idiv_u8,
+    iemAImpl_idiv_u16,
+    iemAImpl_idiv_u32,
+    iemAImpl_idiv_u64
+};
+
+
+/*******************************************************************************
+*   Internal Functions                                                         *
+*******************************************************************************/
+static VBOXSTRICTRC     iemRaiseGeneralProtectionFault0(PIEMCPU pIemCpu);
+static VBOXSTRICTRC     iemRaiseSelectorBounds(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess);
+static VBOXSTRICTRC     iemRaiseSelectorInvalidAccess(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess);
+static VBOXSTRICTRC     iemRaiseSelectorNotPresent(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess);
+static VBOXSTRICTRC     iemRaisePageFault(PIEMCPU pIemCpu, RTGCPTR GCPtrWhere, uint32_t fAccess, int rc);
+
+
+/**
+ * Initializes the decoder state.
+ *
+ * @param   pIemCpu             The per CPU IEM state.
+ */
+DECLINLINE(void) iemInitDecode(PIEMCPU pIemCpu)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    pIemCpu->uCpl               = CPUMGetGuestCPL(IEMCPU_TO_VMCPU(pIemCpu), CPUMCTX2CORE(pCtx));
+    IEMMODE enmMode = CPUMIsGuestIn64BitCodeEx(pCtx)
+                    ? IEMMODE_64BIT
+                    : pCtx->csHid.Attr.n.u1DefBig /** @todo check if this is correct... */
+                    ? IEMMODE_32BIT
+                    : IEMMODE_16BIT;
+    pIemCpu->enmCpuMode         = enmMode;
+    pIemCpu->enmDefAddrMode     = enmMode;  /** @todo check if this is correct... */
+    pIemCpu->enmEffAddrMode     = enmMode;
+    pIemCpu->enmDefOpSize       = enmMode;  /** @todo check if this is correct... */
+    pIemCpu->enmEffOpSize       = enmMode;
+    pIemCpu->fPrefixes          = 0;
+    pIemCpu->uRexReg            = 0;
+    pIemCpu->uRexB              = 0;
+    pIemCpu->uRexIndex          = 0;
+    pIemCpu->iEffSeg            = X86_SREG_DS;
+    pIemCpu->offOpcode          = 0;
+    pIemCpu->cbOpcode           = 0;
+    pIemCpu->cActiveMappings    = 0;
+    pIemCpu->iNextMapping       = 0;
+}
+
+
+/**
+ * Prefetch opcodes the first time when starting executing.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ */
+static VBOXSTRICTRC iemInitDecoderAndPrefetchOpcodes(PIEMCPU pIemCpu)
+{
+    iemInitDecode(pIemCpu);
+
+    /*
+     * What we're doing here is very similar to iemMemMap/iemMemBounceBufferMap.
+     *
+     * First translate CS:rIP to a physical address.
+     */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    uint32_t    cbToTryRead;
+    RTGCPTR     GCPtrPC;
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        cbToTryRead = PAGE_SIZE;
+        GCPtrPC     = pCtx->rip;
+        if (!IEM_IS_CANONICAL(GCPtrPC))
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+        cbToTryRead = PAGE_SIZE - (GCPtrPC & PAGE_OFFSET_MASK);
+    }
+    else
+    {
+        uint32_t GCPtrPC32 = pCtx->eip;
+        Assert(!(GCPtrPC32 & ~(uint32_t)UINT16_MAX) || pIemCpu->enmCpuMode == IEMMODE_32BIT);
+        if (GCPtrPC32 > pCtx->csHid.u32Limit)
+            return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
+        cbToTryRead = pCtx->csHid.u32Limit - GCPtrPC32 + 1;
+        GCPtrPC = pCtx->csHid.u64Base + GCPtrPC32;
+    }
+
+    RTGCPHYS    GCPhys;
+    uint64_t    fFlags;
+    int rc = PGMGstGetPage(IEMCPU_TO_VMCPU(pIemCpu), GCPtrPC, &fFlags, &GCPhys);
+    if (RT_FAILURE(rc))
+        return iemRaisePageFault(pIemCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, rc);
+    if ((fFlags & X86_PTE_US) && pIemCpu->uCpl == 2)
+        return iemRaisePageFault(pIemCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
+    if ((fFlags & X86_PTE_PAE_NX) && (pCtx->msrEFER & MSR_K6_EFER_NXE))
+        return iemRaisePageFault(pIemCpu, GCPtrPC, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
+    GCPhys |= GCPtrPC & PAGE_OFFSET_MASK;
+    /** @todo Check reserved bits and such stuff. PGM is better at doing
+     *        that, so do it when implementing the guest virtual address
+     *        TLB... */
+
+    /*
+     * Read the bytes at this address.
+     */
+    uint32_t cbLeftOnPage = PAGE_SIZE - (GCPtrPC & PAGE_OFFSET_MASK);
+    if (cbToTryRead > cbLeftOnPage)
+        cbToTryRead = cbLeftOnPage;
+    if (cbToTryRead > sizeof(pIemCpu->abOpcode))
+        cbToTryRead = sizeof(pIemCpu->abOpcode);
+    if (!pIemCpu->fByPassHandlers)
+        rc = PGMPhysRead(IEMCPU_TO_VM(pIemCpu), GCPhys, pIemCpu->abOpcode, cbToTryRead);
+    else
+        rc = PGMPhysSimpleReadGCPhys(IEMCPU_TO_VM(pIemCpu), pIemCpu->abOpcode, GCPhys, cbToTryRead);
+    if (rc != VINF_SUCCESS)
+        return rc;
+    pIemCpu->cbOpcode = cbToTryRead;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Try fetch at least @a cbMin bytes more opcodes, raise the appropriate
+ * exception if it fails.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   cbMin               Where to return the opcode byte.
+ */
+static VBOXSTRICTRC iemOpcodeFetchMoreBytes(PIEMCPU pIemCpu, size_t cbMin)
+{
+    /*
+     * What we're doing here is very similar to iemMemMap/iemMemBounceBufferMap.
+     *
+     * First translate CS:rIP to a physical address.
+     */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    uint32_t    cbToTryRead;
+    RTGCPTR     GCPtrNext;
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        cbToTryRead = PAGE_SIZE;
+        GCPtrNext   = pCtx->rip + pIemCpu->cbOpcode;
+        if (!IEM_IS_CANONICAL(GCPtrNext))
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+        cbToTryRead = PAGE_SIZE - (GCPtrNext & PAGE_OFFSET_MASK);
+        Assert(cbToTryRead >= cbMin); /* ASSUMPTION based on iemInitDecoderAndPrefetchOpcodes. */
+    }
+    else
+    {
+        uint32_t GCPtrNext32 = pCtx->eip;
+        Assert(!(GCPtrNext32 & ~(uint32_t)UINT16_MAX) || pIemCpu->enmCpuMode == IEMMODE_32BIT);
+        GCPtrNext32 += pIemCpu->cbOpcode;
+        if (GCPtrNext32 > pCtx->csHid.u32Limit)
+            return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
+        cbToTryRead = pCtx->csHid.u32Limit - GCPtrNext32 + 1;
+        if (cbToTryRead < cbMin)
+            return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
+        GCPtrNext = pCtx->csHid.u64Base + GCPtrNext32;
+    }
+
+    RTGCPHYS    GCPhys;
+    uint64_t    fFlags;
+    int rc = PGMGstGetPage(IEMCPU_TO_VMCPU(pIemCpu), GCPtrNext, &fFlags, &GCPhys);
+    if (RT_FAILURE(rc))
+        return iemRaisePageFault(pIemCpu, GCPtrNext, IEM_ACCESS_INSTRUCTION, rc);
+    if ((fFlags & X86_PTE_US) && pIemCpu->uCpl == 2)
+        return iemRaisePageFault(pIemCpu, GCPtrNext, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
+    if ((fFlags & X86_PTE_PAE_NX) && (pCtx->msrEFER & MSR_K6_EFER_NXE))
+        return iemRaisePageFault(pIemCpu, GCPtrNext, IEM_ACCESS_INSTRUCTION, VERR_ACCESS_DENIED);
+    GCPhys |= GCPtrNext & PAGE_OFFSET_MASK;
+    /** @todo Check reserved bits and such stuff. PGM is better at doing
+     *        that, so do it when implementing the guest virtual address
+     *        TLB... */
+
+    /*
+     * Read the bytes at this address.
+     */
+    uint32_t cbLeftOnPage = PAGE_SIZE - (GCPtrNext & PAGE_OFFSET_MASK);
+    if (cbToTryRead > cbLeftOnPage)
+        cbToTryRead = cbLeftOnPage;
+    if (cbToTryRead > sizeof(pIemCpu->abOpcode) - pIemCpu->cbOpcode)
+        cbToTryRead = sizeof(pIemCpu->abOpcode) - pIemCpu->cbOpcode;
+    if (!pIemCpu->fByPassHandlers)
+        rc = PGMPhysRead(IEMCPU_TO_VM(pIemCpu), GCPhys, &pIemCpu->abOpcode[pIemCpu->cbOpcode], cbToTryRead);
+    else
+        rc = PGMPhysSimpleReadGCPhys(IEMCPU_TO_VM(pIemCpu), &pIemCpu->abOpcode[pIemCpu->cbOpcode], GCPhys, cbToTryRead);
+    if (rc != VINF_SUCCESS)
+        return rc;
+    pIemCpu->cbOpcode += cbToTryRead;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Deals with the problematic cases that iemOpcodeGetNextByte doesn't like.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pb                  Where to return the opcode byte.
+ */
+static VBOXSTRICTRC iemOpcodeGetNextByteSlow(PIEMCPU pIemCpu, uint8_t *pb)
+{
+    VBOXSTRICTRC rcStrict = iemOpcodeFetchMoreBytes(pIemCpu, 1);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        uint8_t offOpcode = pIemCpu->offOpcode;
+        *pb = pIemCpu->abOpcode[offOpcode];
+        pIemCpu->offOpcode = offOpcode + 1;
+    }
+    else
+        *pb = 0;
+    return rcStrict;
+}
+
+
+/**
+ * Deals with the problematic cases that iemOpcodeGetNextS8SxU16 doesn't like.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu16                Where to return the opcode dword.
+ */
+static VBOXSTRICTRC iemOpcodeGetNextS8SxU16Slow(PIEMCPU pIemCpu, uint16_t *pu16)
+{
+    uint8_t     u8;
+    VBOXSTRICTRC rcStrict = iemOpcodeGetNextByteSlow(pIemCpu, &u8);
+    if (rcStrict == VINF_SUCCESS)
+        *pu16 = (int8_t)u8;
+    return rcStrict;
+}
+
+
+/**
+ * Deals with the problematic cases that iemOpcodeGetNextU16 doesn't like.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu16                Where to return the opcode word.
+ */
+static VBOXSTRICTRC iemOpcodeGetNextU16Slow(PIEMCPU pIemCpu, uint16_t *pu16)
+{
+    VBOXSTRICTRC rcStrict = iemOpcodeFetchMoreBytes(pIemCpu, 2);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        uint8_t offOpcode = pIemCpu->offOpcode;
+        *pu16 = RT_MAKE_U16(pIemCpu->abOpcode[offOpcode], pIemCpu->abOpcode[offOpcode + 1]);
+        pIemCpu->offOpcode = offOpcode + 2;
+    }
+    else
+        *pu16 = 0;
+    return rcStrict;
+}
+
+
+/**
+ * Deals with the problematic cases that iemOpcodeGetNextU32 doesn't like.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu32                Where to return the opcode dword.
+ */
+static VBOXSTRICTRC iemOpcodeGetNextU32Slow(PIEMCPU pIemCpu, uint32_t *pu32)
+{
+    VBOXSTRICTRC rcStrict = iemOpcodeFetchMoreBytes(pIemCpu, 4);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        uint8_t offOpcode = pIemCpu->offOpcode;
+        *pu32 = RT_MAKE_U32_FROM_U8(pIemCpu->abOpcode[offOpcode],
+                                    pIemCpu->abOpcode[offOpcode + 1],
+                                    pIemCpu->abOpcode[offOpcode + 2],
+                                    pIemCpu->abOpcode[offOpcode + 3]);
+        pIemCpu->offOpcode = offOpcode + 4;
+    }
+    else
+        *pu32 = 0;
+    return rcStrict;
+}
+
+
+/**
+ * Deals with the problematic cases that iemOpcodeGetNextS32SxU64 doesn't like.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu64                Where to return the opcode qword.
+ */
+static VBOXSTRICTRC iemOpcodeGetNextS32SxU64Slow(PIEMCPU pIemCpu, uint64_t *pu64)
+{
+    VBOXSTRICTRC rcStrict = iemOpcodeFetchMoreBytes(pIemCpu, 4);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        uint8_t offOpcode = pIemCpu->offOpcode;
+        *pu64 = (int32_t)RT_MAKE_U32_FROM_U8(pIemCpu->abOpcode[offOpcode],
+                                             pIemCpu->abOpcode[offOpcode + 1],
+                                             pIemCpu->abOpcode[offOpcode + 2],
+                                             pIemCpu->abOpcode[offOpcode + 3]);
+        pIemCpu->offOpcode = offOpcode + 4;
+    }
+    else
+        *pu64 = 0;
+    return rcStrict;
+}
+
+
+/**
+ * Deals with the problematic cases that iemOpcodeGetNextU64 doesn't like.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu64                Where to return the opcode qword.
+ */
+static VBOXSTRICTRC iemOpcodeGetNextU64Slow(PIEMCPU pIemCpu, uint64_t *pu64)
+{
+    VBOXSTRICTRC rcStrict = iemOpcodeFetchMoreBytes(pIemCpu, 8);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        uint8_t offOpcode = pIemCpu->offOpcode;
+        *pu64 = RT_MAKE_U64_FROM_U8(pIemCpu->abOpcode[offOpcode],
+                                    pIemCpu->abOpcode[offOpcode + 1],
+                                    pIemCpu->abOpcode[offOpcode + 2],
+                                    pIemCpu->abOpcode[offOpcode + 3],
+                                    pIemCpu->abOpcode[offOpcode + 4],
+                                    pIemCpu->abOpcode[offOpcode + 5],
+                                    pIemCpu->abOpcode[offOpcode + 6],
+                                    pIemCpu->abOpcode[offOpcode + 7]);
+        pIemCpu->offOpcode = offOpcode + 8;
+    }
+    else
+        *pu64 = 0;
+    return rcStrict;
+}
+
+
+/**
+ * Fetches the next opcode byte.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu8                 Where to return the opcode byte.
+ */
+DECLINLINE(VBOXSTRICTRC) iemOpcodeGetNextU8(PIEMCPU pIemCpu, uint8_t *pu8)
+{
+    uint8_t const offOpcode = pIemCpu->offOpcode;
+    if (RT_UNLIKELY(offOpcode >= pIemCpu->cbOpcode))
+        return iemOpcodeGetNextByteSlow(pIemCpu, pu8);
+
+    *pu8 = pIemCpu->abOpcode[offOpcode];
+    pIemCpu->offOpcode = offOpcode + 1;
+    return VINF_SUCCESS;
+}
+
+/**
+ * Fetches the next opcode byte, returns automatically on failure.
+ *
+ * @param   pIemCpu             The IEM state.
+ * @param   a_pu8               Where to return the opcode byte.
+ */
+#define IEM_OPCODE_GET_NEXT_BYTE(a_pIemCpu, a_pu8) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = iemOpcodeGetNextU8((a_pIemCpu), (a_pu8)); \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+
+/**
+ * Fetches the next signed byte from the opcode stream.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pi8                 Where to return the signed byte.
+ */
+DECLINLINE(VBOXSTRICTRC) iemOpcodeGetNextS8(PIEMCPU pIemCpu, int8_t *pi8)
+{
+    return iemOpcodeGetNextU8(pIemCpu, (uint8_t *)pi8);
+}
+
+/**
+ * Fetches the next signed byte from the opcode stream, returning automatically
+ * on failure.
+ *
+ * @param   pIemCpu             The IEM state.
+ * @param   pi8                 Where to return the signed byte.
+ */
+#define IEM_OPCODE_GET_NEXT_S8(a_pIemCpu, a_pi8) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = iemOpcodeGetNextS8((a_pIemCpu), (a_pi8)); \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+
+/**
+ * Fetches the next signed byte from the opcode stream, extending it to
+ * unsigned 16-bit.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu16                Where to return the unsigned word.
+ */
+DECLINLINE(VBOXSTRICTRC) iemOpcodeGetNextS8SxU16(PIEMCPU pIemCpu, uint16_t *pu16)
+{
+    uint8_t const offOpcode = pIemCpu->offOpcode;
+    if (RT_UNLIKELY(offOpcode >= pIemCpu->cbOpcode))
+        return iemOpcodeGetNextS8SxU16Slow(pIemCpu, pu16);
+
+    *pu16 = (int8_t)pIemCpu->abOpcode[offOpcode];
+    pIemCpu->offOpcode = offOpcode + 1;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Fetches the next signed byte from the opcode stream and sign-extending it to
+ * a word, returning automatically on failure.
+ *
+ * @param   pIemCpu             The IEM state.
+ * @param   pu16                Where to return the word.
+ */
+#define IEM_OPCODE_GET_NEXT_S8_SX_U16(a_pIemCpu, a_pu16) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = iemOpcodeGetNextS8SxU16((a_pIemCpu), (a_pu16)); \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+
+/**
+ * Fetches the next opcode word.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu16                Where to return the opcode word.
+ */
+DECLINLINE(VBOXSTRICTRC) iemOpcodeGetNextU16(PIEMCPU pIemCpu, uint16_t *pu16)
+{
+    uint8_t const offOpcode = pIemCpu->offOpcode;
+    if (RT_UNLIKELY(offOpcode + 2 > pIemCpu->cbOpcode))
+        return iemOpcodeGetNextU16Slow(pIemCpu, pu16);
+
+    *pu16 = RT_MAKE_U16(pIemCpu->abOpcode[offOpcode], pIemCpu->abOpcode[offOpcode + 1]);
+    pIemCpu->offOpcode = offOpcode + 2;
+    return VINF_SUCCESS;
+}
+
+/**
+ * Fetches the next opcode word, returns automatically on failure.
+ *
+ * @param   pIemCpu             The IEM state.
+ * @param   a_pu16              Where to return the opcode word.
+ */
+#define IEM_OPCODE_GET_NEXT_U16(a_pIemCpu, a_pu16) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = iemOpcodeGetNextU16((a_pIemCpu), (a_pu16)); \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+
+/**
+ * Fetches the next opcode dword.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu32                Where to return the opcode double word.
+ */
+DECLINLINE(VBOXSTRICTRC) iemOpcodeGetNextU32(PIEMCPU pIemCpu, uint32_t *pu32)
+{
+    uint8_t const offOpcode = pIemCpu->offOpcode;
+    if (RT_UNLIKELY(offOpcode + 4 > pIemCpu->cbOpcode))
+        return iemOpcodeGetNextU32Slow(pIemCpu, pu32);
+
+    *pu32 = RT_MAKE_U32_FROM_U8(pIemCpu->abOpcode[offOpcode],
+                                pIemCpu->abOpcode[offOpcode + 1],
+                                pIemCpu->abOpcode[offOpcode + 2],
+                                pIemCpu->abOpcode[offOpcode + 3]);
+    pIemCpu->offOpcode = offOpcode + 4;
+    return VINF_SUCCESS;
+}
+
+/**
+ * Fetches the next opcode dword, returns automatically on failure.
+ *
+ * @param   pIemCpu             The IEM state.
+ * @param   a_u32               Where to return the opcode dword.
+ */
+#define IEM_OPCODE_GET_NEXT_U32(a_pIemCpu, a_pu32) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = iemOpcodeGetNextU32((a_pIemCpu), (a_pu32)); \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+
+/**
+ * Fetches the next opcode dword, sign extending it into a quad word.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu64                Where to return the opcode quad word.
+ */
+DECLINLINE(VBOXSTRICTRC) iemOpcodeGetNextS32SxU64(PIEMCPU pIemCpu, uint64_t *pu64)
+{
+    uint8_t const offOpcode = pIemCpu->offOpcode;
+    if (RT_UNLIKELY(offOpcode + 4 > pIemCpu->cbOpcode))
+        return iemOpcodeGetNextS32SxU64Slow(pIemCpu, pu64);
+
+    int32_t i32 = RT_MAKE_U32_FROM_U8(pIemCpu->abOpcode[offOpcode],
+                                      pIemCpu->abOpcode[offOpcode + 1],
+                                      pIemCpu->abOpcode[offOpcode + 2],
+                                      pIemCpu->abOpcode[offOpcode + 3]);
+    *pu64 = i32;
+    pIemCpu->offOpcode = offOpcode + 4;
+    return VINF_SUCCESS;
+}
+
+/**
+ * Fetches the next opcode double word and sign extends it to a quad word,
+ * returns automatically on failure.
+ *
+ * @param   pIemCpu             The IEM state.
+ * @param   a_pu64              Where to return the opcode quad word.
+ */
+#define IEM_OPCODE_GET_NEXT_S32_SX_U64(a_pIemCpu, a_pu64) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = iemOpcodeGetNextS32SxU64((a_pIemCpu), (a_pu64)); \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+
+/**
+ * Fetches the next opcode qword.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM state.
+ * @param   pu64                Where to return the opcode qword.
+ */
+DECLINLINE(VBOXSTRICTRC) iemOpcodeGetNextU64(PIEMCPU pIemCpu, uint64_t *pu64)
+{
+    uint8_t const offOpcode = pIemCpu->offOpcode;
+    if (RT_UNLIKELY(offOpcode + 8 > pIemCpu->cbOpcode))
+        return iemOpcodeGetNextU64Slow(pIemCpu, pu64);
+
+    *pu64 = RT_MAKE_U64_FROM_U8(pIemCpu->abOpcode[offOpcode],
+                                pIemCpu->abOpcode[offOpcode + 1],
+                                pIemCpu->abOpcode[offOpcode + 2],
+                                pIemCpu->abOpcode[offOpcode + 3],
+                                pIemCpu->abOpcode[offOpcode + 4],
+                                pIemCpu->abOpcode[offOpcode + 5],
+                                pIemCpu->abOpcode[offOpcode + 6],
+                                pIemCpu->abOpcode[offOpcode + 7]);
+    pIemCpu->offOpcode = offOpcode + 8;
+    return VINF_SUCCESS;
+}
+
+/**
+ * Fetches the next opcode word, returns automatically on failure.
+ *
+ * @param   pIemCpu             The IEM state.
+ * @param   a_pu64              Where to return the opcode qword.
+ */
+#define IEM_OPCODE_GET_NEXT_U64(a_pIemCpu, a_pu64) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = iemOpcodeGetNextU64((a_pIemCpu), (a_pu64)); \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+
+/** @name  Raising Exceptions.
+ *
+ * @{
+ */
+
+static VBOXSTRICTRC iemRaiseDivideError(PIEMCPU pIemCpu)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaiseGeneralProtectionFault(PIEMCPU pIemCpu, uint16_t uErr)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaiseGeneralProtectionFault0(PIEMCPU pIemCpu)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaiseSelectorBounds(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaiseSelectorInvalidAccess(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaiseSelectorNotPresentBySegReg(PIEMCPU pIemCpu, uint32_t iSegReg)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaiseSelectorNotPresentBySelector(PIEMCPU pIemCpu, uint16_t uSel)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaisePageFault(PIEMCPU pIemCpu, RTGCPTR GCPtrWhere, uint32_t fAccess, int rc)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Macro for calling iemCImplRaiseInvalidLockPrefix().
+ *
+ * This enables us to add/remove arguments and force different levels of
+ * inlining as we wish.
+ *
+ * @return  Strict VBox status code.
+ */
+#define IEMOP_RAISE_INVALID_LOCK_PREFIX()   IEM_MC_DEFER_TO_CIMPL_0(iemCImplRaiseInvalidLockPrefix)
+IEM_CIMPL_DEF_0(iemCImplRaiseInvalidLockPrefix)
+{
+    AssertFailed();
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Macro for calling iemCImplRaiseInvalidOpcode().
+ *
+ * This enables us to add/remove arguments and force different levels of
+ * inlining as we wish.
+ *
+ * @return  Strict VBox status code.
+ */
+#define IEMOP_RAISE_INVALID_OPCODE()        IEM_MC_DEFER_TO_CIMPL_0(iemCImplRaiseInvalidOpcode)
+IEM_CIMPL_DEF_0(iemCImplRaiseInvalidOpcode)
+{
+    AssertFailed();
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/** @}  */
+
+
+/*
+ *
+ * Helpers routines.
+ * Helpers routines.
+ * Helpers routines.
+ *
+ */
+
+/**
+ * Recalculates the effective operand size.
+ *
+ * @param   pIemCpu             The IEM state.
+ */
+static void iemRecalEffOpSize(PIEMCPU pIemCpu)
+{
+    switch (pIemCpu->enmCpuMode)
+    {
+        case IEMMODE_16BIT:
+            pIemCpu->enmEffOpSize = pIemCpu->fPrefixes & IEM_OP_PRF_SIZE_OP ? IEMMODE_32BIT : IEMMODE_16BIT;
+            break;
+        case IEMMODE_32BIT:
+            pIemCpu->enmEffOpSize = pIemCpu->fPrefixes & IEM_OP_PRF_SIZE_OP ? IEMMODE_16BIT : IEMMODE_32BIT;
+            break;
+        case IEMMODE_64BIT:
+            switch (pIemCpu->fPrefixes & (IEM_OP_PRF_SIZE_REX_W | IEM_OP_PRF_SIZE_OP))
+            {
+                case 0:
+                    pIemCpu->enmEffOpSize = pIemCpu->enmDefOpSize;
+                    break;
+                case IEM_OP_PRF_SIZE_OP:
+                    pIemCpu->enmEffOpSize = IEMMODE_16BIT;
+                    break;
+                case IEM_OP_PRF_SIZE_REX_W:
+                case IEM_OP_PRF_SIZE_REX_W | IEM_OP_PRF_SIZE_OP:
+                    pIemCpu->enmEffOpSize = IEMMODE_64BIT;
+                    break;
+            }
+            break;
+        default:
+            AssertFailed();
+    }
+}
+
+
+/**
+ * Sets the default operand size to 64-bit and recalculates the effective
+ * operand size.
+ *
+ * @param   pIemCpu             The IEM state.
+ */
+static void iemRecalEffOpSize64Default(PIEMCPU pIemCpu)
+{
+    Assert(pIemCpu->enmCpuMode == IEMMODE_64BIT);
+    pIemCpu->enmDefOpSize = IEMMODE_64BIT;
+    if ((pIemCpu->fPrefixes & (IEM_OP_PRF_SIZE_REX_W | IEM_OP_PRF_SIZE_OP)) != IEM_OP_PRF_SIZE_OP)
+        pIemCpu->enmEffOpSize = IEMMODE_64BIT;
+    else
+        pIemCpu->enmEffOpSize = IEMMODE_16BIT;
+}
+
+
+/*
+ *
+ * Common opcode decoders.
+ * Common opcode decoders.
+ * Common opcode decoders.
+ *
+ */
+
+/** Stubs an opcode. */
+#define FNIEMOP_STUB(a_Name) \
+    FNIEMOP_DEF(a_Name) \
+    { \
+        IEMOP_MNEMONIC(#a_Name); \
+        AssertMsgFailed(("After %d instructions\n", pIemCpu->cInstructions)); \
+        return VERR_NOT_IMPLEMENTED; \
+    } \
+    typedef int ignore_semicolon
+
+
+
+/** @name   Register Access.
+ * @{
+ */
+
+/**
+ * Gets a reference (pointer) to the specified hidden segment register.
+ *
+ * @returns Hidden register reference.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iSegReg             The segment register.
+ */
+static PCPUMSELREGHID iemSRegGetHid(PIEMCPU pIemCpu, uint8_t iSegReg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    switch (iSegReg)
+    {
+        case X86_SREG_ES: return &pCtx->esHid;
+        case X86_SREG_CS: return &pCtx->csHid;
+        case X86_SREG_SS: return &pCtx->ssHid;
+        case X86_SREG_DS: return &pCtx->dsHid;
+        case X86_SREG_FS: return &pCtx->fsHid;
+        case X86_SREG_GS: return &pCtx->gsHid;
+    }
+    AssertFailedReturn(NULL);
+}
+
+
+/**
+ * Gets a reference (pointer) to the specified segment register (the selector
+ * value).
+ *
+ * @returns Pointer to the selector variable.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iSegReg             The segment register.
+ */
+static uint16_t *iemSRegRef(PIEMCPU pIemCpu, uint8_t iSegReg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    switch (iSegReg)
+    {
+        case X86_SREG_ES: return &pCtx->es;
+        case X86_SREG_CS: return &pCtx->cs;
+        case X86_SREG_SS: return &pCtx->ss;
+        case X86_SREG_DS: return &pCtx->ds;
+        case X86_SREG_FS: return &pCtx->fs;
+        case X86_SREG_GS: return &pCtx->gs;
+    }
+    AssertFailedReturn(NULL);
+}
+
+
+/**
+ * Fetches the selector value of a segment register.
+ *
+ * @returns The selector value.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iSegReg             The segment register.
+ */
+static uint16_t iemSRegFetchU16(PIEMCPU pIemCpu, uint8_t iSegReg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    switch (iSegReg)
+    {
+        case X86_SREG_ES: return pCtx->es;
+        case X86_SREG_CS: return pCtx->cs;
+        case X86_SREG_SS: return pCtx->ss;
+        case X86_SREG_DS: return pCtx->ds;
+        case X86_SREG_FS: return pCtx->fs;
+        case X86_SREG_GS: return pCtx->gs;
+    }
+    AssertFailedReturn(0xffff);
+}
+
+
+/**
+ * Gets a reference (pointer) to the specified general register.
+ *
+ * @returns Register reference.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iReg                The general register.
+ */
+static void *iemGRegRef(PIEMCPU pIemCpu, uint8_t iReg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    switch (iReg)
+    {
+        case X86_GREG_xAX: return &pCtx->rax;
+        case X86_GREG_xCX: return &pCtx->rcx;
+        case X86_GREG_xDX: return &pCtx->rdx;
+        case X86_GREG_xBX: return &pCtx->rbx;
+        case X86_GREG_xSP: return &pCtx->rsp;
+        case X86_GREG_xBP: return &pCtx->rbp;
+        case X86_GREG_xSI: return &pCtx->rsi;
+        case X86_GREG_xDI: return &pCtx->rdi;
+        case X86_GREG_x8:  return &pCtx->r8;
+        case X86_GREG_x9:  return &pCtx->r9;
+        case X86_GREG_x10: return &pCtx->r10;
+        case X86_GREG_x11: return &pCtx->r11;
+        case X86_GREG_x12: return &pCtx->r12;
+        case X86_GREG_x13: return &pCtx->r13;
+        case X86_GREG_x14: return &pCtx->r14;
+        case X86_GREG_x15: return &pCtx->r15;
+    }
+    AssertFailedReturn(NULL);
+}
+
+
+/**
+ * Gets a reference (pointer) to the specified 8-bit general register.
+ *
+ * Because of AH, CH, DH and BH we cannot use iemGRegRef directly here.
+ *
+ * @returns Register reference.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iReg                The register.
+ */
+static uint8_t *iemGRegRefU8(PIEMCPU pIemCpu, uint8_t iReg)
+{
+    if (pIemCpu->fPrefixes & IEM_OP_PRF_REX)
+        return (uint8_t *)iemGRegRef(pIemCpu, iReg);
+
+    uint8_t *pu8Reg = (uint8_t *)iemGRegRef(pIemCpu, iReg & 3);
+    if (iReg >= 4)
+        pu8Reg++;
+    return pu8Reg;
+}
+
+
+/**
+ * Fetches the value of a 8-bit general register.
+ *
+ * @returns The register value.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iReg                The register.
+ */
+static uint8_t iemGRegFetchU8(PIEMCPU pIemCpu, uint8_t iReg)
+{
+    uint8_t const *pbSrc = iemGRegRefU8(pIemCpu, iReg);
+    return *pbSrc;
+}
+
+
+/**
+ * Fetches the value of a 16-bit general register.
+ *
+ * @returns The register value.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iReg                The register.
+ */
+static uint16_t iemGRegFetchU16(PIEMCPU pIemCpu, uint8_t iReg)
+{
+    return *(uint16_t *)iemGRegRef(pIemCpu, iReg);
+}
+
+
+/**
+ * Fetches the value of a 32-bit general register.
+ *
+ * @returns The register value.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iReg                The register.
+ */
+static uint32_t iemGRegFetchU32(PIEMCPU pIemCpu, uint8_t iReg)
+{
+    return *(uint32_t *)iemGRegRef(pIemCpu, iReg);
+}
+
+
+/**
+ * Fetches the value of a 64-bit general register.
+ *
+ * @returns The register value.
+ * @param   pIemCpu             The per CPU data.
+ * @param   iReg                The register.
+ */
+static uint64_t iemGRegFetchU64(PIEMCPU pIemCpu, uint8_t iReg)
+{
+    return *(uint64_t *)iemGRegRef(pIemCpu, iReg);
+}
+
+
+/**
+ * Adds a 8-bit signed jump offset to RIP/EIP/IP.
+ *
+ * May raise a \#GP(0) if the new RIP is non-canonical or outside the code
+ * segment limit.
+ *
+ * @param   pIemCpu             The per CPU data.
+ * @param   offNextInstr        The offset of the next instruction.
+ */
+static VBOXSTRICTRC iemRegRipRelativeJumpS8(PIEMCPU pIemCpu, int8_t offNextInstr)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t uNewIp = pCtx->ip + offNextInstr + pIemCpu->offOpcode;
+            if (   uNewIp > pCtx->csHid.u32Limit
+                && pIemCpu->enmCpuMode != IEMMODE_64BIT) /* no need to check for non-canonical. */
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            pCtx->rip = uNewIp;
+            break;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            Assert(pCtx->rip <= UINT32_MAX);
+            Assert(pIemCpu->enmCpuMode != IEMMODE_64BIT);
+
+            uint32_t uNewEip = pCtx->eip + offNextInstr + pIemCpu->offOpcode;
+            if (uNewEip > pCtx->csHid.u32Limit)
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            pCtx->rip = uNewEip;
+            break;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            Assert(pIemCpu->enmCpuMode == IEMMODE_64BIT);
+
+            uint64_t uNewRip = pCtx->rip + offNextInstr + pIemCpu->offOpcode;
+            if (!IEM_IS_CANONICAL(uNewRip))
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            pCtx->rip = uNewRip;
+            break;
+        }
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Adds a 16-bit signed jump offset to RIP/EIP/IP.
+ *
+ * May raise a \#GP(0) if the new RIP is non-canonical or outside the code
+ * segment limit.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The per CPU data.
+ * @param   offNextInstr        The offset of the next instruction.
+ */
+static VBOXSTRICTRC iemRegRipRelativeJumpS16(PIEMCPU pIemCpu, int16_t offNextInstr)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    Assert(pIemCpu->enmEffOpSize == IEMMODE_16BIT);
+
+    uint16_t uNewIp = pCtx->ip + offNextInstr + pIemCpu->offOpcode;
+    if (   uNewIp > pCtx->csHid.u32Limit
+        && pIemCpu->enmCpuMode != IEMMODE_64BIT) /* no need to check for non-canonical. */
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+    /** @todo Test 16-bit jump in 64-bit mode.  */
+    pCtx->rip = uNewIp;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Adds a 32-bit signed jump offset to RIP/EIP/IP.
+ *
+ * May raise a \#GP(0) if the new RIP is non-canonical or outside the code
+ * segment limit.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The per CPU data.
+ * @param   offNextInstr        The offset of the next instruction.
+ */
+static VBOXSTRICTRC iemRegRipRelativeJumpS32(PIEMCPU pIemCpu, int32_t offNextInstr)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    Assert(pIemCpu->enmEffOpSize != IEMMODE_16BIT);
+
+    if (pIemCpu->enmEffOpSize == IEMMODE_32BIT)
+    {
+        Assert(pCtx->rip <= UINT32_MAX); Assert(pIemCpu->enmCpuMode != IEMMODE_64BIT);
+
+        uint32_t uNewEip = pCtx->eip + offNextInstr + pIemCpu->offOpcode;
+        if (uNewEip > pCtx->csHid.u32Limit)
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+        pCtx->rip = uNewEip;
+    }
+    else
+    {
+        Assert(pIemCpu->enmCpuMode == IEMMODE_64BIT);
+
+        uint64_t uNewRip = pCtx->rip + offNextInstr + pIemCpu->offOpcode;
+        if (!IEM_IS_CANONICAL(uNewRip))
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+        pCtx->rip = uNewRip;
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Performs a near jump to the specified address.
+ *
+ * May raise a \#GP(0) if the new RIP is non-canonical or outside the code
+ * segment limit.
+ *
+ * @param   pIemCpu             The per CPU data.
+ * @param   uNewRip             The new RIP value.
+ */
+static VBOXSTRICTRC iemRegRipJump(PIEMCPU pIemCpu, uint64_t uNewRip)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            Assert(uNewRip <= UINT16_MAX);
+            if (   uNewRip > pCtx->csHid.u32Limit
+                && pIemCpu->enmCpuMode != IEMMODE_64BIT) /* no need to check for non-canonical. */
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            /** @todo Test 16-bit jump in 64-bit mode.  */
+            pCtx->rip = uNewRip;
+            break;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            Assert(uNewRip <= UINT32_MAX);
+            Assert(pCtx->rip <= UINT32_MAX);
+            Assert(pIemCpu->enmCpuMode != IEMMODE_64BIT);
+
+            if (uNewRip > pCtx->csHid.u32Limit)
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            pCtx->rip = uNewRip;
+            break;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            Assert(pIemCpu->enmCpuMode == IEMMODE_64BIT);
+
+            if (!IEM_IS_CANONICAL(uNewRip))
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            pCtx->rip = uNewRip;
+            break;
+        }
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Get the address of the top of the stack.
+ *
+ * @param   pCtx                The CPU context which SP/ESP/RSP should be
+ *                              read.
+ */
+DECLINLINE(RTGCPTR) iemRegGetEffRsp(PCCPUMCTX pCtx)
+{
+    if (pCtx->ssHid.Attr.n.u1Long)
+        return pCtx->rsp;
+    if (pCtx->ssHid.Attr.n.u1DefBig)
+        return pCtx->esp;
+    return pCtx->sp;
+}
+
+
+/**
+ * Updates the RIP/EIP/IP to point to the next instruction.
+ *
+ * @param   pIemCpu             The per CPU data.
+ * @param   cbInstr             The number of bytes to add.
+ */
+static void iemRegAddToRip(PIEMCPU pIemCpu, uint8_t cbInstr)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    switch (pIemCpu->enmCpuMode)
+    {
+        case IEMMODE_16BIT:
+            Assert(pCtx->rip <= UINT16_MAX);
+            pCtx->eip += cbInstr;
+            pCtx->eip &= UINT32_C(0xffff);
+            break;
+
+        case IEMMODE_32BIT:
+            pCtx->eip += cbInstr;
+            Assert(pCtx->rip <= UINT32_MAX);
+            break;
+
+        case IEMMODE_64BIT:
+            pCtx->rip += cbInstr;
+            break;
+        default: AssertFailed();
+    }
+}
+
+
+/**
+ * Updates the RIP/EIP/IP to point to the next instruction.
+ *
+ * @param   pIemCpu             The per CPU data.
+ */
+static void iemRegUpdateRip(PIEMCPU pIemCpu)
+{
+    return iemRegAddToRip(pIemCpu, pIemCpu->offOpcode);
+}
+
+
+/**
+ * Adds to the stack pointer.
+ *
+ * @param   pCtx                The CPU context which SP/ESP/RSP should be
+ *                              updated.
+ * @param   cbToAdd             The number of bytes to add.
+ */
+DECLINLINE(void) iemRegAddToRsp(PCPUMCTX pCtx, uint8_t cbToAdd)
+{
+    if (pCtx->ssHid.Attr.n.u1Long)
+        pCtx->rsp += cbToAdd;
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+        pCtx->esp += cbToAdd;
+    else
+        pCtx->sp  += cbToAdd;
+}
+
+
+/**
+ * Subtracts from the stack pointer.
+ *
+ * @param   pCtx                The CPU context which SP/ESP/RSP should be
+ *                              updated.
+ * @param   cbToSub             The number of bytes to subtract.
+ */
+DECLINLINE(void) iemRegSubFromRsp(PCPUMCTX pCtx, uint8_t cbToSub)
+{
+    if (pCtx->ssHid.Attr.n.u1Long)
+        pCtx->rsp -= cbToSub;
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+        pCtx->esp -= cbToSub;
+    else
+        pCtx->sp  -= cbToSub;
+}
+
+
+/**
+ * Adds to the temporary stack pointer.
+ *
+ * @param   pTmpRsp             The temporary SP/ESP/RSP to update.
+ * @param   cbToAdd             The number of bytes to add.
+ * @param   pCtx                Where to get the current stack mode.
+ */
+DECLINLINE(void) iemRegAddToRspEx(PRTUINT64U pTmpRsp, uint8_t cbToAdd, PCCPUMCTX pCtx)
+{
+    if (pCtx->ssHid.Attr.n.u1Long)
+        pTmpRsp->u           += cbToAdd;
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+        pTmpRsp->DWords.dw0  += cbToAdd;
+    else
+        pTmpRsp->Words.w0    += cbToAdd;
+}
+
+
+/**
+ * Subtracts from the temporary stack pointer.
+ *
+ * @param   pTmpRsp             The temporary SP/ESP/RSP to update.
+ * @param   cbToSub             The number of bytes to subtract.
+ * @param   pCtx                Where to get the current stack mode.
+ */
+DECLINLINE(void) iemRegSubFromRspEx(PRTUINT64U pTmpRsp, uint8_t cbToSub, PCCPUMCTX pCtx)
+{
+    if (pCtx->ssHid.Attr.n.u1Long)
+        pTmpRsp->u          -= cbToSub;
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+        pTmpRsp->DWords.dw0 -= cbToSub;
+    else
+        pTmpRsp->Words.w0   -= cbToSub;
+}
+
+
+/**
+ * Calculates the effective stack address for a push of the specified size as
+ * well as the new RSP value (upper bits may be masked).
+ *
+ * @returns Effective stack addressf for the push.
+ * @param   pCtx                Where to get the current stack mode.
+ * @param   cbItem              The size of the stack item to pop.
+ * @param   puNewRsp            Where to return the new RSP value.
+ */
+DECLINLINE(RTGCPTR) iemRegGetRspForPush(PCCPUMCTX pCtx, uint8_t cbItem, uint64_t *puNewRsp)
+{
+    RTUINT64U   uTmpRsp;
+    RTGCPTR     GCPtrTop;
+    uTmpRsp.u = pCtx->rsp;
+
+    if (pCtx->ssHid.Attr.n.u1Long)
+        GCPtrTop = uTmpRsp.u            -= cbItem;
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+        GCPtrTop = uTmpRsp.DWords.dw0   -= cbItem;
+    else
+        GCPtrTop = uTmpRsp.Words.w0     -= cbItem;
+    *puNewRsp = uTmpRsp.u;
+    return GCPtrTop;
+}
+
+
+/**
+ * Gets the current stack pointer and calculates the value after a pop of the
+ * specified size.
+ *
+ * @returns Current stack pointer.
+ * @param   pCtx                Where to get the current stack mode.
+ * @param   cbItem              The size of the stack item to pop.
+ * @param   puNewRsp            Where to return the new RSP value.
+ */
+DECLINLINE(RTGCPTR) iemRegGetRspForPop(PCCPUMCTX pCtx, uint8_t cbItem, uint64_t *puNewRsp)
+{
+    RTUINT64U   uTmpRsp;
+    RTGCPTR     GCPtrTop;
+    uTmpRsp.u = pCtx->rsp;
+
+    if (pCtx->ssHid.Attr.n.u1Long)
+    {
+        GCPtrTop = uTmpRsp.u;
+        uTmpRsp.u += cbItem;
+    }
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+    {
+        GCPtrTop = uTmpRsp.DWords.dw0;
+        uTmpRsp.DWords.dw0 += cbItem;
+    }
+    else
+    {
+        GCPtrTop = uTmpRsp.Words.w0;
+        uTmpRsp.Words.w0 += cbItem;
+    }
+    *puNewRsp = uTmpRsp.u;
+    return GCPtrTop;
+}
+
+
+/**
+ * Calculates the effective stack address for a push of the specified size as
+ * well as the new temporary RSP value (upper bits may be masked).
+ *
+ * @returns Effective stack addressf for the push.
+ * @param   pTmpRsp             The temporary stack pointer.  This is updated.
+ * @param   cbItem              The size of the stack item to pop.
+ * @param   puNewRsp            Where to return the new RSP value.
+ */
+DECLINLINE(RTGCPTR) iemRegGetRspForPushEx(PRTUINT64U pTmpRsp, uint8_t cbItem, PCCPUMCTX pCtx)
+{
+    RTGCPTR GCPtrTop;
+
+    if (pCtx->ssHid.Attr.n.u1Long)
+        GCPtrTop = pTmpRsp->u          -= cbItem;
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+        GCPtrTop = pTmpRsp->DWords.dw0 -= cbItem;
+    else
+        GCPtrTop = pTmpRsp->Words.w0   -= cbItem;
+    return GCPtrTop;
+}
+
+
+/**
+ * Gets the effective stack address for a pop of the specified size and
+ * calculates and updates the temporary RSP.
+ *
+ * @returns Current stack pointer.
+ * @param   pTmpRsp             The temporary stack pointer.  This is updated.
+ * @param   pCtx                Where to get the current stack mode.
+ * @param   cbItem              The size of the stack item to pop.
+ */
+DECLINLINE(RTGCPTR) iemRegGetRspForPopEx(PRTUINT64U pTmpRsp, uint8_t cbItem, PCCPUMCTX pCtx)
+{
+    RTGCPTR GCPtrTop;
+    if (pCtx->ssHid.Attr.n.u1Long)
+    {
+        GCPtrTop = pTmpRsp->u;
+        pTmpRsp->u          += cbItem;
+    }
+    else if (pCtx->ssHid.Attr.n.u1DefBig)
+    {
+        GCPtrTop = pTmpRsp->DWords.dw0;
+        pTmpRsp->DWords.dw0 += cbItem;
+    }
+    else
+    {
+        GCPtrTop = pTmpRsp->Words.w0;
+        pTmpRsp->Words.w0   += cbItem;
+    }
+    return GCPtrTop;
+}
+
+
+/**
+ * Checks if an AMD CPUID feature bit is set.
+ *
+ * @returns true / false.
+ *
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   fEdx                The EDX bit to test, or 0 if ECX.
+ * @param   fEcx                The ECX bit to test, or 0 if EDX.
+ * @remarks Used via IEM_IS_AMD_CPUID_FEATURE_PRESENT_ECX.
+ */
+static bool iemRegIsAmdCpuIdFeaturePresent(PIEMCPU pIemCpu, uint32_t fEdx, uint32_t fEcx)
+{
+    uint32_t uEax, uEbx, uEcx, uEdx;
+    CPUMGetGuestCpuId(IEMCPU_TO_VMCPU(pIemCpu), 0x80000001, &uEax, &uEbx, &uEcx, &uEdx);
+    return (fEcx && (uEcx & fEcx))
+        || (fEdx && (uEdx & fEdx));
+}
+
+/** @}  */
+
+
+/** @name   Memory access.
+ *
+ * @{
+ */
+
+
+/**
+ * Checks if the given segment can be written to, raise the appropriate
+ * exception if not.
+ *
+ * @returns VBox strict status code.
+ *
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pHid                Pointer to the hidden register.
+ * @param   iSegReg             The register number.
+ */
+static VBOXSTRICTRC iemMemSegCheckWriteAccessEx(PIEMCPU pIemCpu, PCCPUMSELREGHID pHid, uint8_t iSegReg)
+{
+    if (!pHid->Attr.n.u1Present)
+        return iemRaiseSelectorNotPresentBySegReg(pIemCpu, iSegReg);
+
+    if (   (   (pHid->Attr.n.u4Type & X86_SEL_TYPE_CODE)
+            || !(pHid->Attr.n.u4Type & X86_SEL_TYPE_WRITE) )
+        &&  pIemCpu->enmCpuMode != IEMMODE_64BIT )
+        return iemRaiseSelectorInvalidAccess(pIemCpu, iSegReg, IEM_ACCESS_DATA_W);
+
+    /** @todo DPL/RPL/CPL? */
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks if the given segment can be read from, raise the appropriate
+ * exception if not.
+ *
+ * @returns VBox strict status code.
+ *
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pHid                Pointer to the hidden register.
+ * @param   iSegReg             The register number.
+ */
+static VBOXSTRICTRC iemMemSegCheckReadAccessEx(PIEMCPU pIemCpu, PCCPUMSELREGHID pHid, uint8_t iSegReg)
+{
+    if (!pHid->Attr.n.u1Present)
+        return iemRaiseSelectorNotPresentBySegReg(pIemCpu, iSegReg);
+
+    if (   (pHid->Attr.n.u4Type & (X86_SEL_TYPE_CODE | X86_SEL_TYPE_READ)) == X86_SEL_TYPE_CODE
+        &&  pIemCpu->enmCpuMode != IEMMODE_64BIT )
+        return iemRaiseSelectorInvalidAccess(pIemCpu, iSegReg, IEM_ACCESS_DATA_R);
+
+    /** @todo DPL/RPL/CPL? */
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Applies the segment limit, base and attributes.
+ *
+ * This may raise a \#GP or \#SS.
+ *
+ * @returns VBox strict status code.
+ *
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   fAccess             The kind of access which is being performed.
+ * @param   iSegReg             The index of the segment register to apply.
+ *                              This is UINT8_MAX if none (for IDT, GDT, LDT,
+ *                              TSS, ++).
+ * @param   pGCPtrMem           Pointer to the guest memory address to apply
+ *                              segmentation to.  Input and output parameter.
+ */
+static VBOXSTRICTRC iemMemApplySegment(PIEMCPU pIemCpu, uint32_t fAccess, uint8_t iSegReg,
+                                       size_t cbMem, PRTGCPTR pGCPtrMem)
+{
+    if (iSegReg == UINT8_MAX)
+        return VINF_SUCCESS;
+
+    PCPUMSELREGHID pSel = iemSRegGetHid(pIemCpu, iSegReg);
+    switch (pIemCpu->enmCpuMode)
+    {
+        case IEMMODE_16BIT:
+        case IEMMODE_32BIT:
+        {
+            RTGCPTR32 GCPtrFirst32 = *pGCPtrMem;
+            RTGCPTR32 GCPtrLast32  = GCPtrFirst32 + cbMem - 1;
+
+            Assert(pSel->Attr.n.u1Present);
+            Assert(pSel->Attr.n.u1DescType);
+            if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_CODE))
+            {
+                if (   (fAccess & IEM_ACCESS_TYPE_WRITE)
+                    && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_WRITE) )
+                    return iemRaiseSelectorInvalidAccess(pIemCpu, iSegReg, fAccess);
+
+                if (!IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+                {
+                    /** @todo CPL check. */
+                }
+
+                /*
+                 * There are two kinds of data selectors, normal and expand down.
+                 */
+                if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_DOWN))
+                {
+                    if (   GCPtrFirst32 > pSel->u32Limit
+                        || GCPtrLast32  > pSel->u32Limit) /* yes, in real mode too (since 80286). */
+                        return iemRaiseSelectorBounds(pIemCpu, iSegReg, fAccess);
+
+                    *pGCPtrMem = GCPtrFirst32 += (uint32_t)pSel->u64Base;
+                }
+                else
+                {
+                    /** @todo implement expand down segments. */
+                    AssertFailed(/** @todo implement this */);
+                    return VERR_NOT_IMPLEMENTED;
+                }
+            }
+            else
+            {
+
+                /*
+                 * Code selector and usually be used to read thru, writing is
+                 * only permitted in real and V8086 mode.
+                 */
+                if (   (   (fAccess & IEM_ACCESS_TYPE_WRITE)
+                        || (   (fAccess & IEM_ACCESS_TYPE_READ)
+                           && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_READ)) )
+                    && !IEM_IS_REAL_OR_V86_MODE(pIemCpu) )
+                    return iemRaiseSelectorInvalidAccess(pIemCpu, iSegReg, fAccess);
+
+                if (   GCPtrFirst32 > pSel->u32Limit
+                    || GCPtrLast32  > pSel->u32Limit) /* yes, in real mode too (since 80286). */
+                    return iemRaiseSelectorBounds(pIemCpu, iSegReg, fAccess);
+
+                if (!IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+                {
+                    /** @todo CPL check. */
+                }
+
+                *pGCPtrMem  = GCPtrFirst32 += (uint32_t)pSel->u64Base;
+            }
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_64BIT:
+            if (iSegReg == X86_SREG_GS || iSegReg == X86_SREG_FS)
+                *pGCPtrMem += pSel->u64Base;
+            return VINF_SUCCESS;
+
+        default:
+            AssertFailedReturn(VERR_INTERNAL_ERROR_5);
+    }
+}
+
+
+/**
+ * Translates a virtual address to a physical physical address and checks if we
+ * can access the page as specified.
+ *
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   GCPtrMem            The virtual address.
+ * @param   fAccess             The intended access.
+ * @param   pGCPhysMem          Where to return the physical address.
+ */
+static VBOXSTRICTRC iemMemPageTranslateAndCheckAccess(PIEMCPU pIemCpu, RTGCPTR GCPtrMem, uint32_t fAccess,
+                                                      PRTGCPHYS pGCPhysMem)
+{
+    /** @todo Need a different PGM interface here.  We're currently using
+     *        generic / REM interfaces. this won't cut it for R0 & RC. */
+    RTGCPHYS    GCPhys;
+    uint64_t    fFlags;
+    int rc = PGMGstGetPage(IEMCPU_TO_VMCPU(pIemCpu), GCPtrMem, &fFlags, &GCPhys);
+    if (RT_FAILURE(rc))
+    {
+        /** @todo Check unassigned memory in unpaged mode. */
+        *pGCPhysMem = NIL_RTGCPHYS;
+        return iemRaisePageFault(pIemCpu, GCPtrMem, fAccess, rc);
+    }
+
+    if (    (fFlags & (X86_PTE_RW | X86_PTE_US | X86_PTE_PAE_NX)) != (X86_PTE_RW | X86_PTE_US)
+        &&  (   (   (fAccess & IEM_ACCESS_TYPE_WRITE) /* Write to read only memory? */
+                 && !(fFlags & X86_PTE_RW)
+                 && (   pIemCpu->uCpl != 0
+                     || (pIemCpu->CTX_SUFF(pCtx)->cr0 & X86_CR0_WP)) )
+             || (   !(fFlags & X86_PTE_US)            /* Kernel memory */
+                 &&  pIemCpu->uCpl == 3)
+             || (   (fAccess & IEM_ACCESS_TYPE_EXEC)  /* Executing non-executable memory? */
+                 && (fFlags & X86_PTE_PAE_NX)
+                 && (pIemCpu->CTX_SUFF(pCtx)->msrEFER & MSR_K6_EFER_NXE) )
+            )
+       )
+    {
+        *pGCPhysMem = NIL_RTGCPHYS;
+        return iemRaisePageFault(pIemCpu, GCPtrMem, fAccess, VERR_ACCESS_DENIED);
+    }
+
+    GCPhys |= GCPtrMem & PAGE_OFFSET_MASK;
+    *pGCPhysMem = GCPhys;
+    return VINF_SUCCESS;
+}
+
+
+
+/**
+ * Maps a physical page.
+ *
+ * @returns VBox status code (see PGMR3PhysTlbGCPhys2Ptr).
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   GCPhysMem           The physical address.
+ * @param   fAccess             The intended access.
+ * @param   ppvMem              Where to return the mapping address.
+ */
+static int iemMemPageMap(PIEMCPU pIemCpu, RTGCPHYS GCPhysMem, uint32_t fAccess, void **ppvMem)
+{
+#ifdef IEM_VERIFICATION_MODE
+    /* Force the alternative path so we can ignore writes. */
+    if (fAccess & IEM_ACCESS_TYPE_WRITE)
+        return VERR_PGM_PHYS_TLB_CATCH_ALL;
+#endif
+
+    /*
+     * If we can map the page without trouble, do a block processing
+     * until the end of the current page.
+     */
+    /** @todo need some better API. */
+    return PGMR3PhysTlbGCPhys2Ptr(IEMCPU_TO_VM(pIemCpu),
+                                  GCPhysMem,
+                                  RT_BOOL(fAccess & IEM_ACCESS_TYPE_WRITE),
+                                  ppvMem);
+}
+
+
+/**
+ * Looks up a memory mapping entry.
+ *
+ * @returns The mapping index (positive) or VERR_NOT_FOUND (negative).
+ * @param   pIemCpu         The IEM per CPU data.
+ * @param   pvMem           The memory address.
+ * @param   fAccess         The access to.
+ */
+DECLINLINE(int) iemMapLookup(PIEMCPU pIemCpu, void *pvMem, uint32_t fAccess)
+{
+    fAccess &= IEM_ACCESS_WHAT_MASK | IEM_ACCESS_TYPE_MASK;
+    if (   pIemCpu->aMemMappings[0].pv == pvMem
+        && (pIemCpu->aMemMappings[0].fAccess & (IEM_ACCESS_WHAT_MASK | IEM_ACCESS_TYPE_MASK)) == fAccess)
+        return 0;
+    if (   pIemCpu->aMemMappings[1].pv == pvMem
+        && (pIemCpu->aMemMappings[1].fAccess & (IEM_ACCESS_WHAT_MASK | IEM_ACCESS_TYPE_MASK)) == fAccess)
+        return 1;
+    if (   pIemCpu->aMemMappings[2].pv == pvMem
+        && (pIemCpu->aMemMappings[2].fAccess & (IEM_ACCESS_WHAT_MASK | IEM_ACCESS_TYPE_MASK)) == fAccess)
+        return 2;
+    return VERR_NOT_FOUND;
+}
+
+
+/**
+ * Finds a free memmap entry when using iNextMapping doesn't work.
+ *
+ * @returns Memory mapping index, 1024 on failure.
+ * @param   pIemCpu             The IEM per CPU data.
+ */
+static unsigned iemMemMapFindFree(PIEMCPU pIemCpu)
+{
+    /*
+     * The easy case.
+     */
+    if (pIemCpu->cActiveMappings == 0)
+    {
+        pIemCpu->iNextMapping = 1;
+        return 0;
+    }
+
+    /* There should be enough mappings for all instructions. */
+    AssertReturn(pIemCpu->cActiveMappings < RT_ELEMENTS(pIemCpu->aMemMappings), 1024);
+
+    AssertFailed(); /** @todo implement me. */
+    return 1024;
+
+}
+
+
+/**
+ * Commits a bounce buffer that needs writing back and unmaps it.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu         The IEM per CPU data.
+ * @param   iMemMap         The index of the buffer to commit.
+ */
+static VBOXSTRICTRC iemMemBounceBufferCommitAndUnmap(PIEMCPU pIemCpu, unsigned iMemMap)
+{
+    Assert(pIemCpu->aMemMappings[iMemMap].fAccess & IEM_ACCESS_BOUNCE_BUFFERED);
+    Assert(pIemCpu->aMemMappings[iMemMap].fAccess & IEM_ACCESS_TYPE_WRITE);
+
+    /*
+     * Do the writing.
+     */
+    int rc;
+#ifndef IEM_VERIFICATION_MODE /* No memory changes in verification mode. */
+    if (!pIemCpu->aMemBbMappings[iMemMap].fUnassigned)
+    {
+        uint16_t const  cbFirst  = pIemCpu->aMemBbMappings[iMemMap].cbFirst;
+        uint16_t const  cbSecond = pIemCpu->aMemBbMappings[iMemMap].cbSecond;
+        uint8_t const  *pbBuf    = &pIemCpu->aBounceBuffers[iMemMap].ab[0];
+        if (!pIemCpu->fByPassHandlers)
+        {
+            rc = PGMPhysWrite(IEMCPU_TO_VM(pIemCpu),
+                              pIemCpu->aMemBbMappings[iMemMap].GCPhysFirst,
+                              pbBuf,
+                              cbFirst);
+            if (cbSecond && rc == VINF_SUCCESS)
+                rc = PGMPhysWrite(IEMCPU_TO_VM(pIemCpu),
+                                  pIemCpu->aMemBbMappings[iMemMap].GCPhysSecond,
+                                  pbBuf + cbFirst,
+                                  cbSecond);
+        }
+        else
+        {
+            rc = PGMPhysSimpleWriteGCPhys(IEMCPU_TO_VM(pIemCpu),
+                                          pIemCpu->aMemBbMappings[iMemMap].GCPhysFirst,
+                                          pbBuf,
+                                          cbFirst);
+            if (cbSecond && rc == VINF_SUCCESS)
+                rc = PGMPhysSimpleWriteGCPhys(IEMCPU_TO_VM(pIemCpu),
+                                              pIemCpu->aMemBbMappings[iMemMap].GCPhysSecond,
+                                              pbBuf + cbFirst,
+                                              cbSecond);
+        }
+    }
+    else
+#endif
+        rc = VINF_SUCCESS;
+
+    /*
+     * Free the mapping entry.
+     */
+    pIemCpu->aMemMappings[iMemMap].fAccess = IEM_ACCESS_INVALID;
+    Assert(pIemCpu->cActiveMappings != 0);
+    pIemCpu->cActiveMappings--;
+    return rc;
+}
+
+
+/**
+ * iemMemMap worker that deals with a request crossing pages.
+ */
+static VBOXSTRICTRC iemMemBounceBufferMapCrossPage(PIEMCPU pIemCpu, int iMemMap, void **ppvMem,
+                                                   size_t cbMem, RTGCPTR GCPtrFirst, uint32_t fAccess)
+{
+    /*
+     * Do the address translations.
+     */
+    RTGCPHYS GCPhysFirst;
+    VBOXSTRICTRC rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, GCPtrFirst, fAccess, &GCPhysFirst);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    RTGCPHYS GCPhysSecond;
+    rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, GCPtrFirst + (cbMem - 1), fAccess, &GCPhysSecond);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+    GCPhysSecond &= ~(RTGCPHYS)PAGE_OFFSET_MASK;
+
+    /*
+     * Read in the current memory content if it's a read of execute access.
+     */
+    uint8_t        *pbBuf        = &pIemCpu->aBounceBuffers[iMemMap].ab[0];
+    uint32_t const  cbFirstPage  = PAGE_SIZE - (GCPhysFirst & PAGE_OFFSET_MASK);
+    uint32_t const  cbSecondPage = cbMem - cbFirstPage;
+
+    if (fAccess & (IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_EXEC))
+    {
+        int rc;
+        if (!pIemCpu->fByPassHandlers)
+        {
+            rc = PGMPhysRead(IEMCPU_TO_VM(pIemCpu), GCPhysFirst, pbBuf, cbFirstPage);
+            if (rc != VINF_SUCCESS)
+                return rc;
+            rc = PGMPhysRead(IEMCPU_TO_VM(pIemCpu), GCPhysSecond, pbBuf + cbFirstPage, cbSecondPage);
+            if (rc != VINF_SUCCESS)
+                return rc;
+        }
+        else
+        {
+            rc = PGMPhysSimpleReadGCPhys(IEMCPU_TO_VM(pIemCpu), pbBuf, GCPhysFirst, cbFirstPage);
+            if (rc != VINF_SUCCESS)
+                return rc;
+            rc = PGMPhysSimpleReadGCPhys(IEMCPU_TO_VM(pIemCpu), pbBuf + cbFirstPage, GCPhysSecond, cbSecondPage);
+            if (rc != VINF_SUCCESS)
+                return rc;
+        }
+    }
+#ifdef VBOX_STRICT
+    else
+        memset(pbBuf, 0xcc, cbMem);
+#endif
+#ifdef VBOX_STRICT
+    if (cbMem < sizeof(pIemCpu->aBounceBuffers[iMemMap].ab))
+        memset(pbBuf + cbMem, 0xaa, sizeof(pIemCpu->aBounceBuffers[iMemMap].ab) - cbMem);
+#endif
+
+    /*
+     * Commit the bounce buffer entry.
+     */
+    pIemCpu->aMemBbMappings[iMemMap].GCPhysFirst    = GCPhysFirst;
+    pIemCpu->aMemBbMappings[iMemMap].GCPhysSecond   = GCPhysSecond;
+    pIemCpu->aMemBbMappings[iMemMap].cbFirst        = (uint16_t)cbFirstPage;
+    pIemCpu->aMemBbMappings[iMemMap].cbSecond       = (uint16_t)cbSecondPage;
+    pIemCpu->aMemBbMappings[iMemMap].fUnassigned    = false;
+    pIemCpu->aMemMappings[iMemMap].pv               = pbBuf;
+    pIemCpu->aMemMappings[iMemMap].fAccess          = fAccess | IEM_ACCESS_BOUNCE_BUFFERED;
+    pIemCpu->cActiveMappings++;
+
+    *ppvMem = pbBuf;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * iemMemMap woker that deals with iemMemPageMap failures.
+ */
+static VBOXSTRICTRC iemMemBounceBufferMapPhys(PIEMCPU pIemCpu, unsigned iMemMap, void **ppvMem, size_t cbMem,
+                                              RTGCPHYS GCPhysFirst, uint32_t fAccess, VBOXSTRICTRC rcMap)
+{
+    /*
+     * Filter out conditions we can handle and the ones which shouldn't happen.
+     */
+    if (   rcMap != VINF_PGM_PHYS_TLB_CATCH_WRITE
+        && rcMap != VERR_PGM_PHYS_TLB_CATCH_ALL
+        && rcMap != VERR_PGM_PHYS_TLB_UNASSIGNED)
+    {
+        AssertReturn(RT_FAILURE_NP(rcMap), VERR_INTERNAL_ERROR_3);
+        return rcMap;
+    }
+    pIemCpu->cPotentialExits++;
+
+    /*
+     * Read in the current memory content if it's a read of execute access.
+     */
+    uint8_t *pbBuf = &pIemCpu->aBounceBuffers[iMemMap].ab[0];
+    if (fAccess & (IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_EXEC))
+    {
+        if (rcMap == VERR_PGM_PHYS_TLB_UNASSIGNED)
+            memset(pbBuf, 0xff, cbMem);
+        else
+        {
+            int rc;
+            if (!pIemCpu->fByPassHandlers)
+                rc = PGMPhysRead(IEMCPU_TO_VM(pIemCpu), GCPhysFirst, pbBuf, cbMem);
+            else
+                rc = PGMPhysSimpleReadGCPhys(IEMCPU_TO_VM(pIemCpu), pbBuf, GCPhysFirst, cbMem);
+            if (rc != VINF_SUCCESS)
+                return rc;
+        }
+    }
+#ifdef VBOX_STRICT
+    else
+        memset(pbBuf, 0xcc, cbMem);
+#endif
+#ifdef VBOX_STRICT
+    if (cbMem < sizeof(pIemCpu->aBounceBuffers[iMemMap].ab))
+        memset(pbBuf + cbMem, 0xaa, sizeof(pIemCpu->aBounceBuffers[iMemMap].ab) - cbMem);
+#endif
+
+    /*
+     * Commit the bounce buffer entry.
+     */
+    pIemCpu->aMemBbMappings[iMemMap].GCPhysFirst    = GCPhysFirst;
+    pIemCpu->aMemBbMappings[iMemMap].GCPhysSecond   = NIL_RTGCPHYS;
+    pIemCpu->aMemBbMappings[iMemMap].cbFirst        = (uint16_t)cbMem;
+    pIemCpu->aMemBbMappings[iMemMap].cbSecond       = 0;
+    pIemCpu->aMemBbMappings[iMemMap].fUnassigned    = rcMap == VERR_PGM_PHYS_TLB_UNASSIGNED;
+    pIemCpu->aMemMappings[iMemMap].pv               = pbBuf;
+    pIemCpu->aMemMappings[iMemMap].fAccess          = fAccess | IEM_ACCESS_BOUNCE_BUFFERED;
+    pIemCpu->cActiveMappings++;
+
+    *ppvMem = pbBuf;
+    return VINF_SUCCESS;
+}
+
+
+
+/**
+ * Maps the specified guest memory for the given kind of access.
+ *
+ * This may be using bounce buffering of the memory if it's crossing a page
+ * boundary or if there is an access handler installed for any of it.  Because
+ * of lock prefix guarantees, we're in for some extra clutter when this
+ * happens.
+ *
+ * This may raise a \#GP, \#SS, \#PF or \#AC.
+ *
+ * @returns VBox strict status code.
+ *
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   ppvMem              Where to return the pointer to the mapped
+ *                              memory.
+ * @param   cbMem               The number of bytes to map.  This is usually 1,
+ *                              2, 4, 6, 8, 12 or 16.  When used by string
+ *                              operations it can be up to a page.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ *                              Use UINT8_MAX to indicate that no segmentation
+ *                              is required (for IDT, GDT and LDT accesses).
+ * @param   GCPtrMem            The address of the guest memory.
+ * @param   a_fAccess           How the memory is being accessed.  The
+ *                              IEM_ACCESS_TYPE_XXX bit is used to figure out
+ *                              how to map the memory, while the
+ *                              IEM_ACCESS_WHAT_XXX bit is used when raising
+ *                              exceptions.
+ */
+static VBOXSTRICTRC iemMemMap(PIEMCPU pIemCpu, void **ppvMem, size_t cbMem, uint8_t iSegReg, RTGCPTR GCPtrMem, uint32_t fAccess)
+{
+    /*
+     * Check the input and figure out which mapping entry to use.
+     */
+    Assert(cbMem <= 16);
+    Assert(~(fAccess & ~(IEM_ACCESS_TYPE_MASK | IEM_ACCESS_WHAT_MASK)));
+
+    unsigned iMemMap = pIemCpu->iNextMapping;
+    if (iMemMap >= RT_ELEMENTS(pIemCpu->aMemMappings))
+    {
+        iMemMap = iemMemMapFindFree(pIemCpu);
+        AssertReturn(iMemMap < RT_ELEMENTS(pIemCpu->aMemMappings), VERR_INTERNAL_ERROR_3);
+    }
+
+    /*
+     * Map the memory, checking that we can actually access it.  If something
+     * slightly complicated happens, fall back on bounce buffering.
+     */
+    VBOXSTRICTRC rcStrict = iemMemApplySegment(pIemCpu, fAccess, iSegReg, cbMem, &GCPtrMem);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    if ((GCPtrMem & PAGE_OFFSET_MASK) + cbMem > PAGE_SIZE) /* Crossing a page boundary? */
+        return iemMemBounceBufferMapCrossPage(pIemCpu, iMemMap, ppvMem, cbMem, GCPtrMem, fAccess);
+
+    RTGCPHYS GCPhysFirst;
+    rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, GCPtrMem, fAccess, &GCPhysFirst);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    void *pvMem;
+    rcStrict = iemMemPageMap(pIemCpu, GCPhysFirst, fAccess, &pvMem);
+    if (rcStrict != VINF_SUCCESS)
+        return iemMemBounceBufferMapPhys(pIemCpu, iMemMap, ppvMem, cbMem, GCPhysFirst, fAccess, rcStrict);
+
+    /*
+     * Fill in the mapping table entry.
+     */
+    pIemCpu->aMemMappings[iMemMap].pv      = pvMem;
+    pIemCpu->aMemMappings[iMemMap].fAccess = fAccess;
+    pIemCpu->iNextMapping = iMemMap + 1;
+    pIemCpu->cActiveMappings++;
+
+    *ppvMem = pvMem;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Commits the guest memory if bounce buffered and unmaps it.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pvMem               The mapping.
+ * @param   fAccess             The kind of access.
+ */
+static VBOXSTRICTRC iemMemCommitAndUnmap(PIEMCPU pIemCpu, void *pvMem, uint32_t fAccess)
+{
+    int iMemMap = iemMapLookup(pIemCpu, pvMem, fAccess);
+    AssertReturn(iMemMap >= 0, iMemMap);
+
+    /*
+     * If it's bounce buffered, we need to write back the buffer.
+     */
+    if (   (pIemCpu->aMemMappings[iMemMap].fAccess & (IEM_ACCESS_BOUNCE_BUFFERED | IEM_ACCESS_TYPE_WRITE))
+        == (IEM_ACCESS_BOUNCE_BUFFERED | IEM_ACCESS_TYPE_WRITE))
+        return iemMemBounceBufferCommitAndUnmap(pIemCpu, iMemMap);
+
+    /* Free the entry. */
+    pIemCpu->aMemMappings[iMemMap].fAccess = IEM_ACCESS_INVALID;
+    Assert(pIemCpu->cActiveMappings != 0);
+    pIemCpu->cActiveMappings--;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Fetches a data byte.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu8Dst              Where to return the byte.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ */
+static VBOXSTRICTRC iemMemFetchDataU8(PIEMCPU pIemCpu, uint8_t *pu8Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+{
+    /* The lazy approach for now... */
+    uint8_t const *pu8Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu8Src, sizeof(*pu8Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu8Dst = *pu8Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu8Src, IEM_ACCESS_DATA_R);
+    }
+    return rc;
+}
+
+
+/**
+ * Fetches a data word.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu16Dst             Where to return the word.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ */
+static VBOXSTRICTRC iemMemFetchDataU16(PIEMCPU pIemCpu, uint16_t *pu16Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+{
+    /* The lazy approach for now... */
+    uint16_t const *pu16Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu16Src, sizeof(*pu16Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu16Dst = *pu16Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu16Src, IEM_ACCESS_DATA_R);
+    }
+    return rc;
+}
+
+
+/**
+ * Fetches a data dword.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu32Dst             Where to return the dword.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ */
+static VBOXSTRICTRC iemMemFetchDataU32(PIEMCPU pIemCpu, uint32_t *pu32Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+{
+    /* The lazy approach for now... */
+    uint32_t const *pu32Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu32Src, sizeof(*pu32Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu32Dst = *pu32Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu32Src, IEM_ACCESS_DATA_R);
+    }
+    return rc;
+}
+
+
+/**
+ * Fetches a data dword and sign extends it to a qword.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu64Dst             Where to return the sign extended value.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ */
+static VBOXSTRICTRC iemMemFetchDataS32SxU64(PIEMCPU pIemCpu, uint64_t *pu64Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+{
+    /* The lazy approach for now... */
+    int32_t const *pi32Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pi32Src, sizeof(*pi32Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu64Dst = *pi32Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pi32Src, IEM_ACCESS_DATA_R);
+    }
+    return rc;
+}
+
+
+/**
+ * Fetches a data qword.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu64Dst             Where to return the qword.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ */
+static VBOXSTRICTRC iemMemFetchDataU64(PIEMCPU pIemCpu, uint64_t *pu64Dst, uint8_t iSegReg, RTGCPTR GCPtrMem)
+{
+    /* The lazy approach for now... */
+    uint64_t const *pu64Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu64Src, sizeof(*pu64Src), iSegReg, GCPtrMem, IEM_ACCESS_DATA_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu64Dst = *pu64Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu64Src, IEM_ACCESS_DATA_R);
+    }
+    return rc;
+}
+
+
+/**
+ * Fetches a descriptor register (lgdt, lidt).
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pcbLimit            Where to return the limit.
+ * @param   pGCPTrBase          Where to return the base.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ * @param   enmOpSize           The effective operand size.
+ */
+static VBOXSTRICTRC iemMemFetchDataXdtr(PIEMCPU pIemCpu, uint16_t *pcbLimit, PRTGCPTR pGCPtrBase,
+                                        uint8_t iSegReg, RTGCPTR GCPtrMem, IEMMODE enmOpSize)
+{
+    uint8_t const *pu8Src;
+    VBOXSTRICTRC rcStrict = iemMemMap(pIemCpu,
+                                      (void **)&pu8Src,
+                                      enmOpSize == IEMMODE_64BIT
+                                      ? 2 + 8
+                                      : enmOpSize == IEMMODE_32BIT
+                                      ? 2 + 4
+                                      : 2 + 3,
+                                      iSegReg,
+                                      GCPtrMem,
+                                      IEM_ACCESS_DATA_R);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        *pcbLimit = RT_MAKE_U16(pu8Src[0], pu8Src[1]);
+        switch (enmOpSize)
+        {
+            case IEMMODE_16BIT:
+                *pGCPtrBase = RT_MAKE_U32_FROM_U8(pu8Src[2], pu8Src[3], pu8Src[4], 0);
+                break;
+            case IEMMODE_32BIT:
+                *pGCPtrBase = RT_MAKE_U32_FROM_U8(pu8Src[2], pu8Src[3], pu8Src[4], pu8Src[5]);
+                break;
+            case IEMMODE_64BIT:
+                *pGCPtrBase = RT_MAKE_U64_FROM_U8(pu8Src[2], pu8Src[3], pu8Src[4], pu8Src[5],
+                                                  pu8Src[6], pu8Src[7], pu8Src[8], pu8Src[9]);
+                break;
+
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+        rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)pu8Src, IEM_ACCESS_DATA_R);
+    }
+    return rcStrict;
+}
+
+
+
+/**
+ * Stores a data byte.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ * @param   u8Value             The value to store.
+ */
+static VBOXSTRICTRC iemMemStoreDataU8(PIEMCPU pIemCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint8_t u8Value)
+{
+    /* The lazy approach for now... */
+    uint8_t *pu8Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu8Dst, sizeof(*pu8Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu8Dst = u8Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu8Dst, IEM_ACCESS_DATA_W);
+    }
+    return rc;
+}
+
+
+/**
+ * Stores a data word.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ * @param   u16Value            The value to store.
+ */
+static VBOXSTRICTRC iemMemStoreDataU16(PIEMCPU pIemCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint16_t u16Value)
+{
+    /* The lazy approach for now... */
+    uint16_t *pu16Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu16Dst, sizeof(*pu16Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu16Dst = u16Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu16Dst, IEM_ACCESS_DATA_W);
+    }
+    return rc;
+}
+
+
+/**
+ * Stores a data dword.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ * @param   u32Value            The value to store.
+ */
+static VBOXSTRICTRC iemMemStoreDataU32(PIEMCPU pIemCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint32_t u32Value)
+{
+    /* The lazy approach for now... */
+    uint32_t *pu32Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu32Dst, sizeof(*pu32Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu32Dst = u32Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu32Dst, IEM_ACCESS_DATA_W);
+    }
+    return rc;
+}
+
+
+/**
+ * Stores a data qword.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   iSegReg             The index of the segment register to use for
+ *                              this access.  The base and limits are checked.
+ * @param   GCPtrMem            The address of the guest memory.
+ * @param   u64Value            The value to store.
+ */
+static VBOXSTRICTRC iemMemStoreDataU64(PIEMCPU pIemCpu, uint8_t iSegReg, RTGCPTR GCPtrMem, uint64_t u64Value)
+{
+    /* The lazy approach for now... */
+    uint64_t *pu64Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu64Dst, sizeof(*pu64Dst), iSegReg, GCPtrMem, IEM_ACCESS_DATA_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu64Dst = u64Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu64Dst, IEM_ACCESS_DATA_W);
+    }
+    return rc;
+}
+
+
+/**
+ * Pushes a word onto the stack.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   u16Value            The value to push.
+ */
+static VBOXSTRICTRC iemMemStackPushU16(PIEMCPU pIemCpu, uint16_t u16Value)
+{
+    /* Increment the stack pointer. */
+    uint64_t    uNewRsp;
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPush(pCtx, 2, &uNewRsp);
+
+    /* Write the word the lazy way. */
+    uint16_t *pu16Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu16Dst, sizeof(*pu16Dst), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu16Dst = u16Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu16Dst, IEM_ACCESS_STACK_W);
+    }
+
+    /* Commit the new RSP value unless we an access handler made trouble. */
+    if (rc == VINF_SUCCESS)
+        pCtx->rsp = uNewRsp;
+
+    return rc;
+}
+
+
+/**
+ * Pushes a dword onto the stack.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   u32Value            The value to push.
+ */
+static VBOXSTRICTRC iemMemStackPushU32(PIEMCPU pIemCpu, uint32_t u32Value)
+{
+    /* Increment the stack pointer. */
+    uint64_t    uNewRsp;
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPush(pCtx, 4, &uNewRsp);
+
+    /* Write the word the lazy way. */
+    uint32_t *pu32Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu32Dst, sizeof(*pu32Dst), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu32Dst = u32Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu32Dst, IEM_ACCESS_STACK_W);
+    }
+
+    /* Commit the new RSP value unless we an access handler made trouble. */
+    if (rc == VINF_SUCCESS)
+        pCtx->rsp = uNewRsp;
+
+    return rc;
+}
+
+
+/**
+ * Pushes a qword onto the stack.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   u64Value            The value to push.
+ */
+static VBOXSTRICTRC iemMemStackPushU64(PIEMCPU pIemCpu, uint64_t u64Value)
+{
+    /* Increment the stack pointer. */
+    uint64_t    uNewRsp;
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPush(pCtx, 8, &uNewRsp);
+
+    /* Write the word the lazy way. */
+    uint64_t *pu64Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu64Dst, sizeof(*pu64Dst), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu64Dst = u64Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu64Dst, IEM_ACCESS_STACK_W);
+    }
+
+    /* Commit the new RSP value unless we an access handler made trouble. */
+    if (rc == VINF_SUCCESS)
+        pCtx->rsp = uNewRsp;
+
+    return rc;
+}
+
+
+/**
+ * Pops a word from the stack.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu16Value           Where to store the popped value.
+ */
+static VBOXSTRICTRC iemMemStackPopU16(PIEMCPU pIemCpu, uint16_t *pu16Value)
+{
+    /* Increment the stack pointer. */
+    uint64_t    uNewRsp;
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPop(pCtx, 2, &uNewRsp);
+
+    /* Write the word the lazy way. */
+    uint16_t const *pu16Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu16Src, sizeof(*pu16Src), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu16Value = *pu16Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu16Src, IEM_ACCESS_STACK_R);
+
+        /* Commit the new RSP value. */
+        if (rc == VINF_SUCCESS)
+            pCtx->rsp = uNewRsp;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Pops a dword from the stack.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu32Value           Where to store the popped value.
+ */
+static VBOXSTRICTRC iemMemStackPopU32(PIEMCPU pIemCpu, uint32_t *pu32Value)
+{
+    /* Increment the stack pointer. */
+    uint64_t    uNewRsp;
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPop(pCtx, 4, &uNewRsp);
+
+    /* Write the word the lazy way. */
+    uint32_t const *pu32Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu32Src, sizeof(*pu32Src), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu32Value = *pu32Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu32Src, IEM_ACCESS_STACK_R);
+
+        /* Commit the new RSP value. */
+        if (rc == VINF_SUCCESS)
+            pCtx->rsp = uNewRsp;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Pops a qword from the stack.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu64Value           Where to store the popped value.
+ */
+static VBOXSTRICTRC iemMemStackPopU64(PIEMCPU pIemCpu, uint64_t *pu64Value)
+{
+    /* Increment the stack pointer. */
+    uint64_t    uNewRsp;
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPop(pCtx, 8, &uNewRsp);
+
+    /* Write the word the lazy way. */
+    uint64_t const *pu64Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu64Src, sizeof(*pu64Src), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu64Value = *pu64Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu64Src, IEM_ACCESS_STACK_R);
+
+        /* Commit the new RSP value. */
+        if (rc == VINF_SUCCESS)
+            pCtx->rsp = uNewRsp;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Pushes a word onto the stack, using a temporary stack pointer.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   u16Value            The value to push.
+ * @param   pTmpRsp             Pointer to the temporary stack pointer.
+ */
+static VBOXSTRICTRC iemMemStackPushU16Ex(PIEMCPU pIemCpu, uint16_t u16Value, PRTUINT64U pTmpRsp)
+{
+    /* Increment the stack pointer. */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    RTUINT64U   NewRsp = *pTmpRsp;
+    RTGCPTR     GCPtrTop = iemRegGetRspForPushEx(&NewRsp, 2, pCtx);
+
+    /* Write the word the lazy way. */
+    uint16_t *pu16Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu16Dst, sizeof(*pu16Dst), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu16Dst = u16Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu16Dst, IEM_ACCESS_STACK_W);
+    }
+
+    /* Commit the new RSP value unless we an access handler made trouble. */
+    if (rc == VINF_SUCCESS)
+        *pTmpRsp = NewRsp;
+
+    return rc;
+}
+
+
+/**
+ * Pushes a dword onto the stack, using a temporary stack pointer.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   u32Value            The value to push.
+ * @param   pTmpRsp             Pointer to the temporary stack pointer.
+ */
+static VBOXSTRICTRC iemMemStackPushU32Ex(PIEMCPU pIemCpu, uint32_t u32Value, PRTUINT64U pTmpRsp)
+{
+    /* Increment the stack pointer. */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    RTUINT64U   NewRsp = *pTmpRsp;
+    RTGCPTR     GCPtrTop = iemRegGetRspForPushEx(&NewRsp, 4, pCtx);
+
+    /* Write the word the lazy way. */
+    uint32_t *pu32Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu32Dst, sizeof(*pu32Dst), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu32Dst = u32Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu32Dst, IEM_ACCESS_STACK_W);
+    }
+
+    /* Commit the new RSP value unless we an access handler made trouble. */
+    if (rc == VINF_SUCCESS)
+        *pTmpRsp = NewRsp;
+
+    return rc;
+}
+
+
+/**
+ * Pushes a dword onto the stack, using a temporary stack pointer.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   u64Value            The value to push.
+ * @param   pTmpRsp             Pointer to the temporary stack pointer.
+ */
+static VBOXSTRICTRC iemMemStackPushU64Ex(PIEMCPU pIemCpu, uint64_t u64Value, PRTUINT64U pTmpRsp)
+{
+    /* Increment the stack pointer. */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    RTUINT64U   NewRsp = *pTmpRsp;
+    RTGCPTR     GCPtrTop = iemRegGetRspForPushEx(&NewRsp, 8, pCtx);
+
+    /* Write the word the lazy way. */
+    uint64_t *pu64Dst;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu64Dst, sizeof(*pu64Dst), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_W);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu64Dst = u64Value;
+        rc = iemMemCommitAndUnmap(pIemCpu, pu64Dst, IEM_ACCESS_STACK_W);
+    }
+
+    /* Commit the new RSP value unless we an access handler made trouble. */
+    if (rc == VINF_SUCCESS)
+        *pTmpRsp = NewRsp;
+
+    return rc;
+}
+
+
+/**
+ * Pops a word from the stack, using a temporary stack pointer.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu16Value           Where to store the popped value.
+ * @param   pTmpRsp             Pointer to the temporary stack pointer.
+ */
+static VBOXSTRICTRC iemMemStackPopU16Ex(PIEMCPU pIemCpu, uint16_t *pu16Value, PRTUINT64U pTmpRsp)
+{
+    /* Increment the stack pointer. */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    RTUINT64U   NewRsp = *pTmpRsp;
+    RTGCPTR     GCPtrTop = iemRegGetRspForPopEx(&NewRsp, 2, pCtx);
+
+    /* Write the word the lazy way. */
+    uint16_t const *pu16Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu16Src, sizeof(*pu16Src), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu16Value = *pu16Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu16Src, IEM_ACCESS_STACK_R);
+
+        /* Commit the new RSP value. */
+        if (rc == VINF_SUCCESS)
+            *pTmpRsp = NewRsp;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Pops a dword from the stack, using a temporary stack pointer.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu32Value           Where to store the popped value.
+ * @param   pTmpRsp             Pointer to the temporary stack pointer.
+ */
+static VBOXSTRICTRC iemMemStackPopU32Ex(PIEMCPU pIemCpu, uint32_t *pu32Value, PRTUINT64U pTmpRsp)
+{
+    /* Increment the stack pointer. */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    RTUINT64U   NewRsp = *pTmpRsp;
+    RTGCPTR     GCPtrTop = iemRegGetRspForPopEx(&NewRsp, 4, pCtx);
+
+    /* Write the word the lazy way. */
+    uint32_t const *pu32Src;
+    VBOXSTRICTRC rc = iemMemMap(pIemCpu, (void **)&pu32Src, sizeof(*pu32Src), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_R);
+    if (rc == VINF_SUCCESS)
+    {
+        *pu32Value = *pu32Src;
+        rc = iemMemCommitAndUnmap(pIemCpu, (void *)pu32Src, IEM_ACCESS_STACK_R);
+
+        /* Commit the new RSP value. */
+        if (rc == VINF_SUCCESS)
+            *pTmpRsp = NewRsp;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Pops a qword from the stack, using a temporary stack pointer.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pu64Value           Where to store the popped value.
+ * @param   pTmpRsp             Pointer to the temporary stack pointer.
+ */
+static VBOXSTRICTRC iemMemStackPopU64Ex(PIEMCPU pIemCpu, uint64_t *pu64Value, PRTUINT64U pTmpRsp)
+{
+    /* Increment the stack pointer. */
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+    RTUINT64U   NewRsp = *pTmpRsp;
+    RTGCPTR     GCPtrTop = iemRegGetRspForPopEx(&NewRsp, 8, pCtx);
+
+    /* Write the word the lazy way. */
+    uint64_t const *pu64Src;
+    VBOXSTRICTRC rcStrict = iemMemMap(pIemCpu, (void **)&pu64Src, sizeof(*pu64Src), X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_R);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        *pu64Value = *pu64Src;
+        rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)pu64Src, IEM_ACCESS_STACK_R);
+
+        /* Commit the new RSP value. */
+        if (rcStrict == VINF_SUCCESS)
+            *pTmpRsp = NewRsp;
+    }
+
+    return rcStrict;
+}
+
+
+/**
+ * Begin a special stack push (used by interrupt, exceptions and such).
+ *
+ * This will raise #SS or #PF if appropriate.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   cbMem               The number of bytes to push onto the stack.
+ * @param   ppvMem              Where to return the pointer to the stack memory.
+ *                              As with the other memory functions this could be
+ *                              direct access or bounce buffered access, so
+ *                              don't commit register until the commit call
+ *                              succeeds.
+ * @param   puNewRsp            Where to return the new RSP value.  This must be
+ *                              passed unchanged to
+ *                              iemMemStackPushCommitSpecial().
+ */
+static VBOXSTRICTRC iemMemStackPushBeginSpecial(PIEMCPU pIemCpu, size_t cbMem, void **ppvMem, uint64_t *puNewRsp)
+{
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPush(pCtx, cbMem, puNewRsp);
+    return iemMemMap(pIemCpu, ppvMem, cbMem, X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_W);
+}
+
+
+/**
+ * Commits a special stack push (started by iemMemStackPushBeginSpecial).
+ *
+ * This will update the rSP.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pvMem               The pointer returned by
+ *                              iemMemStackPushBeginSpecial().
+ * @param   uNewRsp             The new RSP value returned by
+ *                              iemMemStackPushBeginSpecial().
+ */
+static VBOXSTRICTRC iemMemStackPushCommitSpecial(PIEMCPU pIemCpu, void *pvMem, uint64_t uNewRsp)
+{
+    VBOXSTRICTRC rcStrict = iemMemCommitAndUnmap(pIemCpu, pvMem, IEM_ACCESS_STACK_W);
+    if (rcStrict == VINF_SUCCESS)
+        pIemCpu->CTX_SUFF(pCtx)->rsp = uNewRsp;
+    return rcStrict;
+}
+
+
+/**
+ * Begin a special stack pop (used by iret, retf and such).
+ *
+ * This will raise #SS or #PF if appropriate.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   cbMem               The number of bytes to push onto the stack.
+ * @param   ppvMem              Where to return the pointer to the stack memory.
+ * @param   puNewRsp            Where to return the new RSP value.  This must be
+ *                              passed unchanged to
+ *                              iemMemStackPopCommitSpecial().
+ */
+static VBOXSTRICTRC iemMemStackPopBeginSpecial(PIEMCPU pIemCpu, size_t cbMem, void const **ppvMem, uint64_t *puNewRsp)
+{
+    PCPUMCTX    pCtx     = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR     GCPtrTop = iemRegGetRspForPop(pCtx, cbMem, puNewRsp);
+    return iemMemMap(pIemCpu, (void **)ppvMem, cbMem, X86_SREG_SS, GCPtrTop, IEM_ACCESS_STACK_R);
+}
+
+
+/**
+ * Commits a special stack pop (started by iemMemStackPopBeginSpecial).
+ *
+ * This will update the rSP.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pvMem               The pointer returned by
+ *                              iemMemStackPopBeginSpecial().
+ * @param   uNewRsp             The new RSP value returned by
+ *                              iemMemStackPopBeginSpecial().
+ */
+static VBOXSTRICTRC iemMemStackPopCommitSpecial(PIEMCPU pIemCpu, void const *pvMem, uint64_t uNewRsp)
+{
+    VBOXSTRICTRC rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)pvMem, IEM_ACCESS_STACK_R);
+    if (rcStrict == VINF_SUCCESS)
+        pIemCpu->CTX_SUFF(pCtx)->rsp = uNewRsp;
+    return rcStrict;
+}
+
+
+/**
+ * Fetches a descriptor table entry.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU.
+ * @param   pDesc               Where to return the descriptor table entry.
+ * @param   uSel                The selector which table entry to fetch.
+ */
+static VBOXSTRICTRC iemMemFetchSelDesc(PIEMCPU pIemCpu, PIEMSELDESC pDesc, uint16_t uSel)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /** @todo did the 286 require all 8 bytes to be accessible? */
+    /*
+     * Get the selector table base and check bounds.
+     */
+    RTGCPTR GCPtrBase;
+    if (uSel & X86_SEL_LDT)
+    {
+        if (   !pCtx->ldtrHid.Attr.n.u1Present
+            || (uSel | 0x7) > pCtx->ldtrHid.u32Limit )
+        {
+            Log(("iemMemFetchSelDesc: LDT selector %#x is out of bounds (%3x) or ldtr is NP (%#x)\n",
+                 uSel, pCtx->ldtrHid.u32Limit, pCtx->ldtr));
+            /** @todo is this the right exception? */
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+
+        Assert(pCtx->ldtrHid.Attr.n.u1Present);
+        GCPtrBase = pCtx->ldtrHid.u64Base;
+    }
+    else
+    {
+        if ((uSel | 0x7) > pCtx->gdtr.cbGdt)
+        {
+            Log(("iemMemFetchSelDesc: GDT selector %#x is out of bounds (%3x)\n", uSel, pCtx->gdtr.cbGdt));
+            /** @todo is this the right exception? */
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+        GCPtrBase = pCtx->gdtr.pGdt;
+    }
+
+    /*
+     * Read the legacy descriptor and maybe the long mode extensions if
+     * required.
+     */
+    VBOXSTRICTRC rcStrict = iemMemFetchDataU64(pIemCpu, &pDesc->Legacy.u, UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK));
+    if (rcStrict == VINF_SUCCESS)
+    {
+        if (   !IEM_IS_LONG_MODE(pIemCpu)
+            || pDesc->Legacy.Gen.u1DescType)
+            pDesc->Long.au64[1] = 0;
+        else if ((uint32_t)(uSel & X86_SEL_MASK) + 15 < (uSel & X86_SEL_LDT ? pCtx->ldtrHid.u32Limit : pCtx->gdtr.cbGdt))
+            rcStrict = iemMemFetchDataU64(pIemCpu, &pDesc->Legacy.u, UINT8_MAX, GCPtrBase + (uSel & X86_SEL_MASK));
+        else
+        {
+            Log(("iemMemFetchSelDesc: system selector %#x is out of bounds\n", uSel));
+            /** @todo is this the right exception? */
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Marks the selector descriptor as accessed (only non-system descriptors).
+ *
+ * This function ASSUMES that iemMemFetchSelDesc has be called previously and
+ * will therefore skip the limit checks.
+ *
+ * @returns Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU.
+ * @param   uSel                The selector.
+ */
+static VBOXSTRICTRC iemMemMarkSelDescAccessed(PIEMCPU pIemCpu, uint16_t uSel)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Get the selector table base and check bounds.
+     */
+    RTGCPTR GCPtr = uSel & X86_SEL_LDT
+                  ? pCtx->ldtrHid.u64Base
+                  : pCtx->gdtr.pGdt;
+    GCPtr += uSel & X86_SEL_MASK;
+    GCPtr += 2 + 2;
+    uint32_t volatile *pu32; /** @todo Does the CPU do a 32-bit or 8-bit access here? */
+    VBOXSTRICTRC rcStrict = iemMemMap(pIemCpu, (void **)&pu32, 4, UINT8_MAX, GCPtr, IEM_ACCESS_DATA_RW);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        ASMAtomicBitSet(pu32, 0); /* X86_SEL_TYPE_ACCESSED is 1 */
+
+        rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)pu32, IEM_ACCESS_DATA_RW);
+    }
+
+    return rcStrict;
+}
+
+/** @} */
+
+
+/** @name Misc Helpers
+ * @{
+ */
+
+/**
+ * Checks if we are allowed to access the given I/O port, raising the
+ * appropriate exceptions if we aren't (or if the I/O bitmap is not
+ * accessible).
+ *
+ * @returns Strict VBox status code.
+ *
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   pCtx                The register context.
+ * @param   u16Port             The port number.
+ * @param   cbOperand           The operand size.
+ */
+DECLINLINE(VBOXSTRICTRC) iemHlpCheckPortIOPermission(PIEMCPU pIemCpu, PCCPUMCTX pCtx, uint16_t u16Port, uint8_t cbOperand)
+{
+    if (   (pCtx->cr0 & X86_CR0_PE)
+        && (    pIemCpu->uCpl > pCtx->eflags.Bits.u2IOPL
+            ||  pCtx->eflags.Bits.u1VM) )
+    {
+        /** @todo I/O port permission bitmap check */
+        AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+    }
+    return VINF_SUCCESS;
+}
+
+/** @} */
+
+
+/** @name C Implementations
+ * @{
+ */
+
+/**
+ * Implements a 16-bit popa.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_popa_16)
+{
+    PCPUMCTX        pCtx        = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR         GCPtrStart  = iemRegGetEffRsp(pCtx);
+    RTGCPTR         GCPtrLast   = GCPtrStart + 15;
+    VBOXSTRICTRC    rcStrict;
+
+    /*
+     * The docs are a bit hard to comprehend here, but it looks like we wrap
+     * around in real mode as long as none of the individual "popa" crosses the
+     * end of the stack segment.  In protected mode we check the whole access
+     * in one go.  For efficiency, only do the word-by-word thing if we're in
+     * danger of wrapping around.
+     */
+    /** @todo do popa boundary / wrap-around checks.  */
+    if (RT_UNLIKELY(   IEM_IS_REAL_OR_V86_MODE(pIemCpu)
+                    && (pCtx->csHid.u32Limit < GCPtrLast)) ) /* ASSUMES 64-bit RTGCPTR */
+    {
+        /* word-by-word */
+        RTUINT64U TmpRsp;
+        TmpRsp.u = pCtx->rsp;
+        rcStrict = iemMemStackPopU16Ex(pIemCpu, &pCtx->di, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &pCtx->si, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &pCtx->bp, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            iemRegAddToRspEx(&TmpRsp, 2, pCtx); /* sp */
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &pCtx->bx, &TmpRsp);
+        }
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &pCtx->dx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &pCtx->cx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &pCtx->ax, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            pCtx->rsp = TmpRsp.u;
+            iemRegAddToRip(pIemCpu, cbInstr);
+        }
+    }
+    else
+    {
+        uint16_t const *pa16Mem = NULL;
+        rcStrict = iemMemMap(pIemCpu, (void **)&pa16Mem, 16, X86_SREG_SS, GCPtrStart, IEM_ACCESS_STACK_R);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            pCtx->di = pa16Mem[7 - X86_GREG_xDI];
+            pCtx->si = pa16Mem[7 - X86_GREG_xSI];
+            pCtx->bp = pa16Mem[7 - X86_GREG_xBP];
+            /* skip sp */
+            pCtx->bx = pa16Mem[7 - X86_GREG_xBX];
+            pCtx->dx = pa16Mem[7 - X86_GREG_xDX];
+            pCtx->cx = pa16Mem[7 - X86_GREG_xCX];
+            pCtx->ax = pa16Mem[7 - X86_GREG_xAX];
+            rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)pa16Mem, IEM_ACCESS_STACK_R);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                iemRegAddToRsp(pCtx, 16);
+                iemRegAddToRip(pIemCpu, cbInstr);
+            }
+        }
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements a 32-bit popa.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_popa_32)
+{
+    PCPUMCTX        pCtx        = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR         GCPtrStart  = iemRegGetEffRsp(pCtx);
+    RTGCPTR         GCPtrLast   = GCPtrStart + 31;
+    VBOXSTRICTRC    rcStrict;
+
+    /*
+     * The docs are a bit hard to comprehend here, but it looks like we wrap
+     * around in real mode as long as none of the individual "popa" crosses the
+     * end of the stack segment.  In protected mode we check the whole access
+     * in one go.  For efficiency, only do the word-by-word thing if we're in
+     * danger of wrapping around.
+     */
+    /** @todo do popa boundary / wrap-around checks.  */
+    if (RT_UNLIKELY(   IEM_IS_REAL_OR_V86_MODE(pIemCpu)
+                    && (pCtx->csHid.u32Limit < GCPtrLast)) ) /* ASSUMES 64-bit RTGCPTR */
+    {
+        /* word-by-word */
+        RTUINT64U TmpRsp;
+        TmpRsp.u = pCtx->rsp;
+        rcStrict = iemMemStackPopU32Ex(pIemCpu, &pCtx->edi, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &pCtx->esi, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &pCtx->ebp, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            iemRegAddToRspEx(&TmpRsp, 2, pCtx); /* sp */
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &pCtx->ebx, &TmpRsp);
+        }
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &pCtx->edx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &pCtx->ecx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &pCtx->eax, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+        {
+#if 1  /** @todo what actually happens with the high bits when we're in 16-bit mode? */
+            pCtx->rdi &= UINT32_MAX;
+            pCtx->rsi &= UINT32_MAX;
+            pCtx->rbp &= UINT32_MAX;
+            pCtx->rbx &= UINT32_MAX;
+            pCtx->rdx &= UINT32_MAX;
+            pCtx->rcx &= UINT32_MAX;
+            pCtx->rax &= UINT32_MAX;
+#endif
+            pCtx->rsp = TmpRsp.u;
+            iemRegAddToRip(pIemCpu, cbInstr);
+        }
+    }
+    else
+    {
+        uint32_t const *pa32Mem;
+        rcStrict = iemMemMap(pIemCpu, (void **)&pa32Mem, 32, X86_SREG_SS, GCPtrStart, IEM_ACCESS_STACK_R);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            pCtx->rdi = pa32Mem[7 - X86_GREG_xDI];
+            pCtx->rsi = pa32Mem[7 - X86_GREG_xSI];
+            pCtx->rbp = pa32Mem[7 - X86_GREG_xBP];
+            /* skip esp */
+            pCtx->rbx = pa32Mem[7 - X86_GREG_xBX];
+            pCtx->rdx = pa32Mem[7 - X86_GREG_xDX];
+            pCtx->rcx = pa32Mem[7 - X86_GREG_xCX];
+            pCtx->rax = pa32Mem[7 - X86_GREG_xAX];
+            rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)pa32Mem, IEM_ACCESS_STACK_R);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                iemRegAddToRsp(pCtx, 32);
+                iemRegAddToRip(pIemCpu, cbInstr);
+            }
+        }
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements a 16-bit pusha.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_pusha_16)
+{
+    PCPUMCTX        pCtx        = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR         GCPtrTop    = iemRegGetEffRsp(pCtx);
+    RTGCPTR         GCPtrBottom = GCPtrTop - 15;
+    VBOXSTRICTRC    rcStrict;
+
+    /*
+     * The docs are a bit hard to comprehend here, but it looks like we wrap
+     * around in real mode as long as none of the individual "pushd" crosses the
+     * end of the stack segment.  In protected mode we check the whole access
+     * in one go.  For efficiency, only do the word-by-word thing if we're in
+     * danger of wrapping around.
+     */
+    /** @todo do pusha boundary / wrap-around checks.  */
+    if (RT_UNLIKELY(   GCPtrBottom > GCPtrTop
+                    && IEM_IS_REAL_OR_V86_MODE(pIemCpu) ) )
+    {
+        /* word-by-word */
+        RTUINT64U TmpRsp;
+        TmpRsp.u = pCtx->rsp;
+        rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->ax, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->cx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->dx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->bx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->sp, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->bp, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->si, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU16Ex(pIemCpu, pCtx->di, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            pCtx->rsp = TmpRsp.u;
+            iemRegAddToRip(pIemCpu, cbInstr);
+        }
+    }
+    else
+    {
+        uint16_t *pa16Mem = NULL;
+        rcStrict = iemMemMap(pIemCpu, (void **)&pa16Mem, 16, X86_SREG_SS, GCPtrBottom, IEM_ACCESS_STACK_W);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            pa16Mem[7 - X86_GREG_xDI] = pCtx->di;
+            pa16Mem[7 - X86_GREG_xSI] = pCtx->si;
+            pa16Mem[7 - X86_GREG_xBP] = pCtx->bp;
+            pa16Mem[7 - X86_GREG_xSP] = pCtx->sp;
+            pa16Mem[7 - X86_GREG_xBX] = pCtx->bx;
+            pa16Mem[7 - X86_GREG_xDX] = pCtx->dx;
+            pa16Mem[7 - X86_GREG_xCX] = pCtx->cx;
+            pa16Mem[7 - X86_GREG_xAX] = pCtx->ax;
+            rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)pa16Mem, IEM_ACCESS_STACK_W);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                iemRegSubFromRsp(pCtx, 16);
+                iemRegAddToRip(pIemCpu, cbInstr);
+            }
+        }
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements a 32-bit pusha.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_pusha_32)
+{
+    PCPUMCTX        pCtx        = pIemCpu->CTX_SUFF(pCtx);
+    RTGCPTR         GCPtrTop    = iemRegGetEffRsp(pCtx);
+    RTGCPTR         GCPtrBottom = GCPtrTop - 31;
+    VBOXSTRICTRC    rcStrict;
+
+    /*
+     * The docs are a bit hard to comprehend here, but it looks like we wrap
+     * around in real mode as long as none of the individual "pusha" crosses the
+     * end of the stack segment.  In protected mode we check the whole access
+     * in one go.  For efficiency, only do the word-by-word thing if we're in
+     * danger of wrapping around.
+     */
+    /** @todo do pusha boundary / wrap-around checks.  */
+    if (RT_UNLIKELY(   GCPtrBottom > GCPtrTop
+                    && IEM_IS_REAL_OR_V86_MODE(pIemCpu) ) )
+    {
+        /* word-by-word */
+        RTUINT64U TmpRsp;
+        TmpRsp.u = pCtx->rsp;
+        rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->eax, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->ecx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->edx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->ebx, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->esp, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->ebp, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->esi, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+            rcStrict = iemMemStackPushU32Ex(pIemCpu, pCtx->edi, &TmpRsp);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            pCtx->rsp = TmpRsp.u;
+            iemRegAddToRip(pIemCpu, cbInstr);
+        }
+    }
+    else
+    {
+        uint32_t *pa32Mem;
+        rcStrict = iemMemMap(pIemCpu, (void **)&pa32Mem, 32, X86_SREG_SS, GCPtrBottom, IEM_ACCESS_STACK_W);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            pa32Mem[7 - X86_GREG_xDI] = pCtx->edi;
+            pa32Mem[7 - X86_GREG_xSI] = pCtx->esi;
+            pa32Mem[7 - X86_GREG_xBP] = pCtx->ebp;
+            pa32Mem[7 - X86_GREG_xSP] = pCtx->esp;
+            pa32Mem[7 - X86_GREG_xBX] = pCtx->ebx;
+            pa32Mem[7 - X86_GREG_xDX] = pCtx->edx;
+            pa32Mem[7 - X86_GREG_xCX] = pCtx->ecx;
+            pa32Mem[7 - X86_GREG_xAX] = pCtx->eax;
+            rcStrict = iemMemCommitAndUnmap(pIemCpu, pa32Mem, IEM_ACCESS_STACK_W);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                iemRegSubFromRsp(pCtx, 32);
+                iemRegAddToRip(pIemCpu, cbInstr);
+            }
+        }
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements pushf.
+ *
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_pushf, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * If we're in V8086 mode some care is required (which is why we're in
+     * doing this in a C implementation).
+     */
+    uint32_t fEfl = pCtx->eflags.u;
+    if (   (fEfl & X86_EFL_VM)
+        && X86_EFL_GET_IOPL(fEfl) != 3 )
+    {
+        Assert(pCtx->cr0 & X86_CR0_PE);
+        if (   enmEffOpSize != IEMMODE_16BIT
+            || !(pCtx->cr4 & X86_CR4_VME))
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+        fEfl &= ~X86_EFL_IF;          /* (RF and VM are out of range) */
+        fEfl |= (fEfl & X86_EFL_VIF) >> (19 - 9);
+        return iemMemStackPushU16(pIemCpu, (uint16_t)fEfl);
+    }
+
+    /*
+     * Ok, clear RF and VM and push the flags.
+     */
+    fEfl &= ~(X86_EFL_RF | X86_EFL_VM);
+
+    VBOXSTRICTRC rcStrict;
+    switch (enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            rcStrict = iemMemStackPushU16(pIemCpu, (uint16_t)fEfl);
+            break;
+        case IEMMODE_32BIT:
+            rcStrict = iemMemStackPushU32(pIemCpu, fEfl);
+            break;
+        case IEMMODE_64BIT:
+            rcStrict = iemMemStackPushU64(pIemCpu, fEfl);
+            break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    iemRegAddToRip(pIemCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements popf.
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_popf, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX        pCtx    = pIemCpu->CTX_SUFF(pCtx);
+    uint32_t const  fEflOld = pCtx->eflags.u;
+    VBOXSTRICTRC    rcStrict;
+    uint32_t        fEflNew;
+
+    /*
+     * V8086 is special as usual.
+     */
+    if (fEflOld & X86_EFL_VM)
+    {
+        /*
+         * Almost anything goes if IOPL is 3.
+         */
+        if (X86_EFL_GET_IOPL(fEflOld) == 3)
+        {
+            switch (enmEffOpSize)
+            {
+                case IEMMODE_16BIT:
+                {
+                    uint16_t u16Value;
+                    rcStrict = iemMemStackPopU16(pIemCpu, &u16Value);
+                    if (rcStrict != VINF_SUCCESS)
+                        return rcStrict;
+                    fEflNew = u16Value | (fEflOld & UINT32_C(0xffff0000));
+                    break;
+                }
+                case IEMMODE_32BIT:
+                    rcStrict = iemMemStackPopU32(pIemCpu, &fEflNew);
+                    if (rcStrict != VINF_SUCCESS)
+                        return rcStrict;
+                    break;
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+
+            fEflNew &=   X86_EFL_POPF_BITS & ~(X86_EFL_IOPL);
+            fEflNew |= ~(X86_EFL_POPF_BITS & ~(X86_EFL_IOPL)) & fEflOld;
+        }
+        /*
+         * Interrupt flag virtualization with CR4.VME=1.
+         */
+        else if (   enmEffOpSize == IEMMODE_16BIT
+                 && (pCtx->cr4 & X86_CR4_VME) )
+        {
+            uint16_t    u16Value;
+            RTUINT64U   TmpRsp;
+            TmpRsp.u = pCtx->rsp;
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &u16Value, &TmpRsp);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+
+            /** @todo Is the popf VME #GP(0) delivered after updating RSP+RIP
+             *        or before? */
+            if (    (   (u16Value & X86_EFL_IF)
+                     && (fEflOld  & X86_EFL_VIP))
+                ||  (u16Value & X86_EFL_TF) )
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+            fEflNew = u16Value | (fEflOld & UINT32_C(0xffff0000) & ~X86_EFL_VIF);
+            fEflNew |= (fEflNew & X86_EFL_IF) << (19 - 9);
+            fEflNew &=   X86_EFL_POPF_BITS & ~(X86_EFL_IOPL | X86_EFL_IF);
+            fEflNew |= ~(X86_EFL_POPF_BITS & ~(X86_EFL_IOPL | X86_EFL_IF)) & fEflOld;
+
+            pCtx->rsp = TmpRsp.u;
+        }
+        else
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+    }
+    /*
+     * Not in V8086 mode.
+     */
+    else
+    {
+        /* Pop the flags. */
+        switch (enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                uint16_t u16Value;
+                rcStrict = iemMemStackPopU16(pIemCpu, &u16Value);
+                if (rcStrict != VINF_SUCCESS)
+                    return rcStrict;
+                fEflNew = u16Value | (fEflOld & UINT32_C(0xffff0000));
+                break;
+            }
+            case IEMMODE_32BIT:
+            case IEMMODE_64BIT:
+                rcStrict = iemMemStackPopU32(pIemCpu, &fEflNew);
+                if (rcStrict != VINF_SUCCESS)
+                    return rcStrict;
+                break;
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+
+        /* Merge them with the current flags. */
+        if (   (fEflNew & (X86_EFL_IOPL | X86_EFL_IF)) == (fEflOld & (X86_EFL_IOPL | X86_EFL_IF))
+            || pIemCpu->uCpl == 0)
+        {
+            fEflNew &=  X86_EFL_POPF_BITS;
+            fEflNew |= ~X86_EFL_POPF_BITS & fEflOld;
+        }
+        else if (pIemCpu->uCpl <= X86_EFL_GET_IOPL(fEflOld))
+        {
+            fEflNew &=   X86_EFL_POPF_BITS & ~(X86_EFL_IOPL);
+            fEflNew |= ~(X86_EFL_POPF_BITS & ~(X86_EFL_IOPL)) & fEflOld;
+        }
+        else
+        {
+            fEflNew &=   X86_EFL_POPF_BITS & ~(X86_EFL_IOPL | X86_EFL_IF);
+            fEflNew |= ~(X86_EFL_POPF_BITS & ~(X86_EFL_IOPL | X86_EFL_IF)) & fEflOld;
+        }
+    }
+
+    /*
+     * Commit the flags.
+     */
+    pCtx->eflags.u = fEflNew;
+    iemRegAddToRip(pIemCpu, cbInstr);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements a 16-bit relative call.
+ *
+ *
+ * @param   offDisp      The displacment offset.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_call_rel_16, int16_t, offDisp)
+{
+    PCPUMCTX pCtx  = pIemCpu->CTX_SUFF(pCtx);
+    uint16_t OldPC = pCtx->ip + cbInstr;
+    uint16_t NewPC = OldPC + offDisp;
+    if (NewPC > pCtx->csHid.u32Limit)
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+    VBOXSTRICTRC rcStrict = iemMemStackPushU16(pIemCpu, OldPC);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    pCtx->rip = NewPC;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements a 32-bit relative call.
+ *
+ *
+ * @param   offDisp      The displacment offset.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_call_rel_32, int32_t, offDisp)
+{
+    PCPUMCTX pCtx  = pIemCpu->CTX_SUFF(pCtx);
+    uint32_t OldPC = pCtx->eip + cbInstr;
+    uint32_t NewPC = OldPC + offDisp;
+    if (NewPC > pCtx->csHid.u32Limit)
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+    VBOXSTRICTRC rcStrict = iemMemStackPushU32(pIemCpu, OldPC);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    pCtx->rip = NewPC;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements a 64-bit relative call.
+ *
+ *
+ * @param   offDisp      The displacment offset.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_call_rel_64, int64_t, offDisp)
+{
+    PCPUMCTX pCtx  = pIemCpu->CTX_SUFF(pCtx);
+    uint64_t OldPC = pCtx->rip + cbInstr;
+
+    VBOXSTRICTRC rcStrict = iemMemStackPushU64(pIemCpu, OldPC);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    pCtx->rip = OldPC + offDisp;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements far jumps.
+ *
+ * @param   uSel        The selector.
+ * @param   offSeg      The segment offset.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_FarJmp, uint16_t, uSel, uint32_t, offSeg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Real mode and V8086 mode are easy.  The only snag seems to be that
+     * CS.limit doesn't change and the limit check is done against the current
+     * limit.
+     */
+    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
+        && IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+    {
+        if (offSeg > pCtx->csHid.u32Limit)
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+        if (pIemCpu->enmEffOpSize == IEMMODE_16BIT) /** @todo WRONG, must pass this. */
+            pCtx->rip       = offSeg;
+        else
+            pCtx->rip       = offSeg & UINT16_MAX;
+        pCtx->cs            = uSel;
+        pCtx->csHid.u64Base = (uint32_t)uSel << 4;
+        /** @todo REM reset the accessed bit (see on jmp far16 after disabling
+         *        PE.  Check with VT-x and AMD-V. */
+#ifdef IEM_VERIFICATION_MODE
+        pCtx->csHid.Attr.u  &= ~X86_SEL_TYPE_ACCESSED;
+#endif
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * Protected mode. Need to parse the specified descriptor...
+     */
+    if (!(uSel & (X86_SEL_MASK | X86_SEL_LDT)))
+    {
+        Log(("jmpf %04x:%08x -> invalid selector, #GP(0)\n", uSel, offSeg));
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+    }
+
+    /* Fetch the descriptor. */
+    IEMSELDESC Desc;
+    VBOXSTRICTRC rcStrict = iemMemFetchSelDesc(pIemCpu, &Desc, uSel);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /* Is it there? */
+    if (!Desc.Legacy.Gen.u1Present)
+    {
+        Log(("jmpf %04x:%08x -> segment not present\n", uSel, offSeg));
+        return iemRaiseSelectorNotPresentBySelector(pIemCpu, uSel);
+    }
+
+    /*
+     * Deal with it according to its type.
+     */
+    if (Desc.Legacy.Gen.u1DescType)
+    {
+        /* Only code segments. */
+        if (!(Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_CODE))
+        {
+            Log(("jmpf %04x:%08x -> not a code selector (u4Type=%#x).\n", uSel, offSeg, Desc.Legacy.Gen.u4Type));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+
+        /* L vs D. */
+        if (   Desc.Legacy.Gen.u1Long
+            && Desc.Legacy.Gen.u1DefBig
+            && IEM_IS_LONG_MODE(pIemCpu))
+        {
+            Log(("jmpf %04x:%08x -> both L and D are set.\n", uSel, offSeg));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+
+        /* DPL/RPL/CPL check, where conforming segments makes a difference. */
+        if (!(Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_CONF))
+        {
+            if (Desc.Legacy.Gen.u2Dpl > pIemCpu->uCpl)
+            {
+                Log(("jmpf %04x:%08x -> DPL violation (conforming); DPL=%d CPL=%u\n",
+                     uSel, offSeg, Desc.Legacy.Gen.u2Dpl, pIemCpu->uCpl));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+            }
+        }
+        else
+        {
+            if (Desc.Legacy.Gen.u2Dpl != pIemCpu->uCpl)
+            {
+                Log(("jmpf %04x:%08x -> CPL != DPL; DPL=%d CPL=%u\n", uSel, offSeg, Desc.Legacy.Gen.u2Dpl, pIemCpu->uCpl));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+            }
+            if ((uSel & X86_SEL_RPL) > pIemCpu->uCpl)
+            {
+                Log(("jmpf %04x:%08x -> RPL > DPL; RPL=%d CPL=%u\n", uSel, offSeg, (uSel & X86_SEL_RPL), pIemCpu->uCpl));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+            }
+        }
+
+        /* Limit check. (Should alternatively check for non-canonical addresses
+           here, but that is ruled out by offSeg being 32-bit, right?) */
+        uint64_t u64Base;
+        uint32_t cbLimit = X86DESC_LIMIT(Desc.Legacy);
+        if (Desc.Legacy.Gen.u1Granularity)
+            cbLimit = (cbLimit << PAGE_SHIFT) | PAGE_OFFSET_MASK;
+        if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+            u64Base = 0;
+        else
+        {
+            if (offSeg > offSeg)
+            {
+                Log(("jmpf %04x:%08x -> out of bounds (%#x)\n", uSel, offSeg, cbLimit));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+            }
+            u64Base = X86DESC_BASE(Desc.Legacy);
+        }
+
+        /*
+         * Ok, everything checked out fine.  Now set the accessed bit before
+         * committing the result into CS, CSHID and RIP.
+         */
+        if (!(Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_ACCESSED))
+        {
+            rcStrict = iemMemMarkSelDescAccessed(pIemCpu, uSel);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+            Desc.Legacy.Gen.u4Type |= X86_SEL_TYPE_ACCESSED;
+        }
+
+        /* commit */
+        pCtx->rip = offSeg;
+        pCtx->cs  = uSel & (X86_SEL_MASK | X86_SEL_LDT);
+        pCtx->cs |= pIemCpu->uCpl; /** @todo is this right for conforming segs? or in general? */
+        pCtx->csHid.Attr.u   = (Desc.Legacy.u >> (16+16+8)) & UINT32_C(0xf0ff);
+        pCtx->csHid.u32Limit = cbLimit;
+        pCtx->csHid.u64Base  = u64Base;
+        /** @todo check if the hidden bits are loaded correctly for 64-bit
+         *        mode.  */
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * System selector.
+     */
+    if (IEM_IS_LONG_MODE(pIemCpu))
+        switch (Desc.Legacy.Gen.u4Type)
+        {
+            case AMD64_SEL_TYPE_SYS_LDT:
+            case AMD64_SEL_TYPE_SYS_TSS_AVAIL:
+            case AMD64_SEL_TYPE_SYS_TSS_BUSY:
+            case AMD64_SEL_TYPE_SYS_CALL_GATE:
+            case AMD64_SEL_TYPE_SYS_INT_GATE:
+            case AMD64_SEL_TYPE_SYS_TRAP_GATE:
+                /* Call various functions to do the work. */
+                AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+            default:
+                Log(("jmpf %04x:%08x -> wrong sys selector (64-bit): %d\n", uSel, offSeg, Desc.Legacy.Gen.u4Type));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+
+        }
+    switch (Desc.Legacy.Gen.u4Type)
+    {
+        case X86_SEL_TYPE_SYS_286_TSS_AVAIL:
+        case X86_SEL_TYPE_SYS_LDT:
+        case X86_SEL_TYPE_SYS_286_CALL_GATE:
+        case X86_SEL_TYPE_SYS_TASK_GATE:
+        case X86_SEL_TYPE_SYS_286_INT_GATE:
+        case X86_SEL_TYPE_SYS_286_TRAP_GATE:
+        case X86_SEL_TYPE_SYS_386_TSS_AVAIL:
+        case X86_SEL_TYPE_SYS_386_CALL_GATE:
+        case X86_SEL_TYPE_SYS_386_INT_GATE:
+        case X86_SEL_TYPE_SYS_386_TRAP_GATE:
+            /* Call various functions to do the work. */
+            AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+
+        case X86_SEL_TYPE_SYS_286_TSS_BUSY:
+        case X86_SEL_TYPE_SYS_386_TSS_BUSY:
+            /* Call various functions to do the work. */
+            AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+
+        default:
+            Log(("jmpf %04x:%08x -> wrong sys selector (32-bit): %d\n", uSel, offSeg, Desc.Legacy.Gen.u4Type));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+    }
+}
+
+
+/**
+ * Implements far calls.
+ *
+ * @param   uSel        The selector.
+ * @param   offSeg      The segment offset.
+ * @param   enmOpSize   The operand size (in case we need it).
+ */
+IEM_CIMPL_DEF_3(iemCImpl_callf, uint16_t, uSel, uint64_t, offSeg, IEMMODE, enmOpSize)
+{
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    VBOXSTRICTRC    rcStrict;
+    uint64_t        uNewRsp;
+    void           *pvRet;
+
+    /*
+     * Real mode and V8086 mode are easy.  The only snag seems to be that
+     * CS.limit doesn't change and the limit check is done against the current
+     * limit.
+     */
+    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
+        && IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+    {
+        Assert(enmOpSize == IEMMODE_16BIT || enmOpSize == IEMMODE_32BIT);
+
+        /* Check stack first - may #SS(0). */
+        rcStrict = iemMemStackPushBeginSpecial(pIemCpu, 4 + (enmOpSize == IEMMODE_32BIT) * 2,
+                                               &pvRet, &uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+
+        /* Check the target address range. */
+        if (offSeg > UINT32_MAX)
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+        /* Everything is fine, push the return address. */
+        if (enmOpSize == IEMMODE_16BIT)
+        {
+            ((uint16_t *)pvRet)[0] = pCtx->ip;
+            ((uint16_t *)pvRet)[1] = pCtx->cs;
+        }
+        else
+        {
+            ((uint32_t *)pvRet)[0] = pCtx->eip;
+            ((uint16_t *)pvRet)[3] = pCtx->cs;
+        }
+        rcStrict = iemMemStackPushCommitSpecial(pIemCpu, pvRet, uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+
+        /* Branch. */
+        pCtx->rip           = offSeg;
+        pCtx->cs            = uSel;
+        pCtx->csHid.u64Base = (uint32_t)uSel << 4;
+        /** @todo Does REM reset the accessed bit here to? (See on jmp far16
+         *        after disabling PE.) Check with VT-x and AMD-V. */
+#ifdef IEM_VERIFICATION_MODE
+        pCtx->csHid.Attr.u  &= ~X86_SEL_TYPE_ACCESSED;
+#endif
+        return VINF_SUCCESS;
+    }
+
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/**
+ * Implements retf.
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ * @param   cbPop           The amount of arguments to pop from the stack
+ *                          (bytes).
+ */
+IEM_CIMPL_DEF_2(iemCImpl_retf, IEMMODE, enmEffOpSize, uint16_t, cbPop)
+{
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    VBOXSTRICTRC    rcStrict;
+    uint64_t        uNewRsp;
+
+    /*
+     * Real mode and V8086 mode are easy.
+     */
+    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
+        && IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+    {
+        Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
+        uint16_t const *pu16Frame;
+        rcStrict = iemMemStackPopBeginSpecial(pIemCpu, enmEffOpSize == IEMMODE_32BIT ? 8 : 4,
+                                              (void const **)&pu16Frame, &uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        uint32_t uNewEip;
+        uint16_t uNewCs;
+        if (enmEffOpSize == IEMMODE_32BIT)
+        {
+            uNewCs  = pu16Frame[2];
+            uNewEip = RT_MAKE_U32(pu16Frame[0], pu16Frame[1]);
+        }
+        else
+        {
+            uNewCs  = pu16Frame[1];
+            uNewEip = pu16Frame[0];
+        }
+        /** @todo check how this is supposed to work if sp=0xfffe. */
+
+        /* Check the limit of the new EIP. */
+        /** @todo Intel pseudo code only does the limit check for 16-bit
+         *        operands, AMD does make any distinction. What is right? */
+        if (uNewEip > pCtx->csHid.u32Limit)
+            return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
+
+        /* commit the operation. */
+        rcStrict = iemMemStackPopCommitSpecial(pIemCpu, pu16Frame, uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        pCtx->rip           = uNewEip;
+        pCtx->cs            = uNewCs;
+        pCtx->csHid.u64Base = (uint32_t)uNewCs << 4;
+        /** @todo do we load attribs and limit as well? */
+        if (cbPop)
+            iemRegAddToRsp(pCtx, cbPop);
+        return VINF_SUCCESS;
+    }
+
+    AssertFailed();
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Implements int3 and int XX.
+ *
+ * @param   u8Int       The interrupt vector number.
+ * @param   fIsBpInstr  Is it the breakpoint instruction.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_int, uint8_t, u8Int, bool, fIsBpInstr)
+{
+    /** @todo we should call TRPM to do this job.  */
+    VBOXSTRICTRC    rcStrict;
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Real mode is easy.
+     */
+    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
+        && IEM_IS_REAL_MODE(pIemCpu))
+    {
+        /* read the IDT entry. */
+        if (pCtx->idtr.cbIdt < UINT32_C(4) * u8Int + 3)
+            return iemRaiseGeneralProtectionFault(pIemCpu, X86_TRAP_ERR_IDT | ((uint16_t)u8Int << X86_TRAP_ERR_SEL_SHIFT));
+        RTFAR16 Idte;
+        rcStrict = iemMemFetchDataU32(pIemCpu, (uint32_t *)&Idte, UINT8_MAX, pCtx->idtr.pIdt + UINT32_C(4) * u8Int);
+        if (RT_UNLIKELY(rcStrict != VINF_SUCCESS))
+            return rcStrict;
+
+        /* push the stack frame. */
+        uint16_t *pu16Frame;
+        uint64_t  uNewRsp;
+        rcStrict = iemMemStackPushBeginSpecial(pIemCpu, 6, (void **)&pu16Frame, &uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+
+        pu16Frame[2] = (uint16_t)pCtx->eflags.u;
+        pu16Frame[1] = (uint16_t)pCtx->cs;
+        pu16Frame[0] = (uint16_t)pCtx->ip;
+        rcStrict = iemMemStackPushCommitSpecial(pIemCpu, pu16Frame, uNewRsp);
+        if (RT_UNLIKELY(rcStrict != VINF_SUCCESS))
+            return rcStrict;
+
+        /* load the vector address into cs:ip. */
+        pCtx->cs            = Idte.sel;
+        pCtx->csHid.u64Base = (uint32_t)Idte.sel << 4;
+        /** @todo do we load attribs and limit as well? Should we check against limit like far jump? */
+        pCtx->rip           = Idte.off;
+        return VINF_SUCCESS;
+    }
+
+    AssertFailed();
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Implements iret.
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_iret, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    VBOXSTRICTRC    rcStrict;
+    uint64_t        uNewRsp;
+
+    /*
+     * Real mode is easy, V8086 mode is relative similar.
+     */
+    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
+        && IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+    {
+        /* iret throws an exception if VME isn't enabled.  */
+        if (   pCtx->eflags.Bits.u1VM
+            && !(pCtx->cr4 & X86_CR4_VME))
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+        /* Do the stack bits, but don't commit RSP before everything checks
+           out right. */
+        union
+        {
+            uint32_t const *pu32;
+            uint16_t const *pu16;
+            void const     *pv;
+        } uFrame;
+        Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
+        uint16_t uNewCs;
+        uint32_t uNewEip;
+        uint32_t uNewFlags;
+        if (enmEffOpSize == IEMMODE_32BIT)
+        {
+            rcStrict = iemMemStackPopBeginSpecial(pIemCpu, 12, &uFrame.pv, &uNewRsp);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+            uNewEip    = uFrame.pu32[0];
+            uNewCs     = (uint16_t)uFrame.pu32[1];
+            uNewFlags  = uFrame.pu32[2];
+            uNewFlags &= X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF
+                       | X86_EFL_TF | X86_EFL_IF | X86_EFL_DF | X86_EFL_OF | X86_EFL_IOPL | X86_EFL_NT
+                       | X86_EFL_RF /*| X86_EFL_VM*/ | X86_EFL_AC /*|X86_EFL_VIF*/ /*|X86_EFL_VIP*/
+                       | X86_EFL_ID;
+            uNewFlags |= pCtx->eflags.u & (X86_EFL_VM | X86_EFL_VIF | X86_EFL_VIP);
+        }
+        else
+        {
+            rcStrict = iemMemStackPopBeginSpecial(pIemCpu, 6, &uFrame.pv, &uNewRsp);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+            uNewEip    = uFrame.pu16[0];
+            uNewCs     = uFrame.pu16[1];
+            uNewFlags  = uFrame.pu16[2];
+            uNewFlags &= X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF
+                       | X86_EFL_TF | X86_EFL_IF | X86_EFL_DF | X86_EFL_OF | X86_EFL_IOPL | X86_EFL_NT;
+            uNewFlags |= pCtx->eflags.u & UINT16_C(0xffff0000);
+            /** @todo The intel pseudo code does not indicate what happens to
+             *        reserved flags. We just ignore them. */
+        }
+        /** @todo Check how this is supposed to work if sp=0xfffe. */
+
+        /* Check the limit of the new EIP. */
+        /** @todo Only the AMD pseudo code check the limit here, what's
+         *        right? */
+        if (uNewEip > pCtx->csHid.u32Limit)
+            return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
+
+        /* V8086 checks and flag adjustments */
+        if (pCtx->eflags.Bits.u1VM)
+        {
+            if (pCtx->eflags.Bits.u2IOPL == 3)
+            {
+                /* Preserve IOPL and clear RF. */
+                uNewFlags &=                 ~(X86_EFL_IOPL | X86_EFL_RF);
+                uNewFlags |= pCtx->eflags.u & (X86_EFL_IOPL);
+            }
+            else if (   enmEffOpSize == IEMMODE_16BIT
+                     && (   !(uNewFlags & X86_EFL_IF)
+                         || !pCtx->eflags.Bits.u1VIP )
+                     && !(uNewFlags & X86_EFL_TF)   )
+            {
+                /* Move IF to VIF, clear RF and preserve IF and IOPL.*/
+                uNewFlags &= ~X86_EFL_VIF;
+                uNewFlags |= (uNewFlags & X86_EFL_IF) << (19 - 9);
+                uNewFlags &=                 ~(X86_EFL_IF | X86_EFL_IOPL | X86_EFL_RF);
+                uNewFlags |= pCtx->eflags.u & (X86_EFL_IF | X86_EFL_IOPL);
+            }
+            else
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+        }
+
+        /* commit the operation. */
+        rcStrict = iemMemStackPopCommitSpecial(pIemCpu, uFrame.pv, uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        pCtx->rip           = uNewEip;
+        pCtx->cs            = uNewCs;
+        pCtx->csHid.u64Base = (uint32_t)uNewCs << 4;
+        /** @todo do we load attribs and limit as well? */
+        pCtx->eflags.u      = uNewFlags;
+
+        return VINF_SUCCESS;
+    }
+
+
+    AssertFailed();
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Implements 'mov SReg, r/m'.
+ *
+ * @param   iSegReg     The segment register number (valid).
+ * @param   uSel        The new selector value.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_LoadSReg, uint8_t, iSegReg, uint16_t, uSel)
+{
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    uint16_t       *pSel = iemSRegRef(pIemCpu, iSegReg);
+    PCPUMSELREGHID  pHid = iemSRegGetHid(pIemCpu, iSegReg);
+
+    Assert(iSegReg < X86_SREG_GS && iSegReg != X86_SREG_CS);
+
+    /*
+     * Real mode and V8086 mode are easy.
+     */
+    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
+        && IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+    {
+        *pSel           = uSel;
+        pHid->u64Base   = (uint32_t)uSel << 4;
+        /** @todo Does the CPU actually load limits and attributes in the
+         *        real/V8086 mode segment load case?  It doesn't for CS in far
+         *        jumps...  Affects unreal mode.  */
+        pHid->u32Limit          = 0xffff;
+        pHid->Attr.u = 0;
+        pHid->Attr.n.u1Present  = 1;
+        pHid->Attr.n.u1DescType = 1;
+        pHid->Attr.n.u4Type     = iSegReg != X86_SREG_CS
+                                ? X86_SEL_TYPE_RW
+                                : X86_SEL_TYPE_READ | X86_SEL_TYPE_CODE;
+
+        iemRegAddToRip(pIemCpu, cbInstr);
+        if (iSegReg == X86_SREG_SS)
+            EMSetInhibitInterruptsPC(IEMCPU_TO_VMCPU(pIemCpu), pCtx->rip);
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * Protected mode.
+     *
+     * Check if it's a null segment selector value first, that's OK for DS, ES,
+     * FS and GS.  If not null, then we have to load and parse the descriptor.
+     */
+    if (!(uSel & (X86_SEL_MASK | X86_SEL_LDT)))
+    {
+        if (iSegReg == X86_SREG_SS)
+        {
+            if (   pIemCpu->enmCpuMode != IEMMODE_64BIT
+                || pIemCpu->uCpl != 0
+                || uSel != 0) /** @todo We cannot 'mov ss, 3' in 64-bit kernel mode, can we?  */
+            {
+                Log(("load sreg -> invalid stack selector, #GP(0)\n", uSel));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+            /* In 64-bit kernel mode, the stack can be 0 because of the way
+               interrupts are dispatched when in kernel ctx. Just load the
+               selector value into the register and leave the hidden bits
+               as is. */
+            *pSel = uSel;
+            iemRegAddToRip(pIemCpu, cbInstr);
+            EMSetInhibitInterruptsPC(IEMCPU_TO_VMCPU(pIemCpu), pCtx->rip);
+            return VINF_SUCCESS;
+        }
+
+        *pSel = uSel;   /* Not RPL, remember :-) */
+        if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+        {
+            /** @todo figure out what this actually does, it works. Needs
+             *        testcase! */
+            pHid->Attr.u           = 0;
+            pHid->Attr.n.u1Present = 1;
+            pHid->Attr.n.u1Long    = 1;
+            pHid->Attr.n.u4Type    = X86_SEL_TYPE_RW;
+            pHid->Attr.n.u2Dpl     = 3;
+            pHid->u32Limit         = 0;
+            pHid->u64Base          = 0;
+        }
+        else
+        {
+            pHid->Attr.u   = 0;
+            pHid->u32Limit = 0;
+            pHid->u64Base  = 0;
+        }
+        iemRegAddToRip(pIemCpu, cbInstr);
+        return VINF_SUCCESS;
+    }
+
+    /* Fetch the descriptor. */
+    IEMSELDESC Desc;
+    VBOXSTRICTRC rcStrict = iemMemFetchSelDesc(pIemCpu, &Desc, uSel);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /* Check GPs first. */
+    if (!Desc.Legacy.Gen.u1DescType)
+    {
+        Log(("load sreg %d - system selector (%#x) -> #GP\n", iSegReg, uSel, Desc.Legacy.Gen.u4Type));
+        return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+    }
+    if (iSegReg == X86_SREG_SS) /* SS gets different treatment */
+    {
+        if (   (Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_CODE)
+            || !(Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_WRITE) )
+        {
+            Log(("load sreg SS, %#x - code or read only (%#x) -> #GP\n", uSel, Desc.Legacy.Gen.u4Type));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+        if (    (Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_CODE)
+            || !(Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_WRITE) )
+        {
+            Log(("load sreg SS, %#x - code or read only (%#x) -> #GP\n", uSel, Desc.Legacy.Gen.u4Type));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+        if ((uSel & X86_SEL_RPL) != pIemCpu->uCpl)
+        {
+            Log(("load sreg SS, %#x - RPL and CPL (%d) differs -> #GP\n", uSel, pIemCpu->uCpl));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+        if (Desc.Legacy.Gen.u2Dpl != pIemCpu->uCpl)
+        {
+            Log(("load sreg SS, %#x - DPL (%d) and CPL (%d) differs -> #GP\n", uSel, Desc.Legacy.Gen.u2Dpl, pIemCpu->uCpl));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+    }
+    else
+    {
+        if ((Desc.Legacy.Gen.u4Type & (X86_SEL_TYPE_CODE | X86_SEL_TYPE_READ)) == X86_SEL_TYPE_CODE)
+        {
+            Log(("load sreg%u, %#x - execute only segment -> #GP\n", iSegReg, uSel));
+            return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+        }
+        if (   (Desc.Legacy.Gen.u4Type & (X86_SEL_TYPE_CODE | X86_SEL_TYPE_CONF))
+            != (X86_SEL_TYPE_CODE | X86_SEL_TYPE_CONF))
+        {
+#if 0 /* this is what intel says. */
+            if (   (uSel & X86_SEL_RPL) > Desc.Legacy.Gen.u2Dpl
+                && pIemCpu->uCpl        > Desc.Legacy.Gen.u2Dpl)
+            {
+                Log(("load sreg%u, %#x - both RPL (%d) and CPL (%d) are greater than DPL (%d) -> #GP\n",
+                     iSegReg, uSel, (uSel & X86_SEL_RPL), pIemCpu->uCpl, Desc.Legacy.Gen.u2Dpl));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+            }
+#else /* this is what makes more sense. */
+            if ((uSel & X86_SEL_RPL) > Desc.Legacy.Gen.u2Dpl)
+            {
+                Log(("load sreg%u, %#x - RPL (%d) is greater than DPL (%d) -> #GP\n",
+                     iSegReg, uSel, (uSel & X86_SEL_RPL), Desc.Legacy.Gen.u2Dpl));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+            }
+            if (pIemCpu->uCpl > Desc.Legacy.Gen.u2Dpl)
+            {
+                Log(("load sreg%u, %#x - CPL (%d) is greater than DPL (%d) -> #GP\n",
+                     iSegReg, uSel, pIemCpu->uCpl, Desc.Legacy.Gen.u2Dpl));
+                return iemRaiseGeneralProtectionFault(pIemCpu, uSel & (X86_SEL_MASK | X86_SEL_LDT));
+            }
+#endif
+        }
+    }
+
+    /* Is it there? */
+    if (!Desc.Legacy.Gen.u1Present)
+    {
+        Log(("load sreg%d,%#x - segment not present -> #NP\n", iSegReg, uSel));
+        return iemRaiseSelectorNotPresentBySelector(pIemCpu, uSel);
+    }
+
+    /* The the base and limit. */
+    uint64_t u64Base;
+    uint32_t cbLimit = X86DESC_LIMIT(Desc.Legacy);
+    if (Desc.Legacy.Gen.u1Granularity)
+        cbLimit = (cbLimit << PAGE_SHIFT) | PAGE_OFFSET_MASK;
+
+    if (   pIemCpu->enmCpuMode == IEMMODE_64BIT
+        && iSegReg < X86_SREG_FS)
+        u64Base = 0;
+    else
+        u64Base = X86DESC_BASE(Desc.Legacy);
+
+    /*
+     * Ok, everything checked out fine.  Now set the accessed bit before
+     * committing the result into the registers.
+     */
+    if (!(Desc.Legacy.Gen.u4Type & X86_SEL_TYPE_ACCESSED))
+    {
+        rcStrict = iemMemMarkSelDescAccessed(pIemCpu, uSel);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        Desc.Legacy.Gen.u4Type |= X86_SEL_TYPE_ACCESSED;
+    }
+
+    /* commit */
+    *pSel = uSel;
+    pHid->Attr.u   = (Desc.Legacy.u >> (16+16+8)) & UINT32_C(0xf0ff); /** @todo do we have a define for 0xf0ff? */
+    pHid->u32Limit = cbLimit;
+    pHid->u64Base  = u64Base;
+
+    /** @todo check if the hidden bits are loaded correctly for 64-bit
+     *        mode.  */
+
+    iemRegAddToRip(pIemCpu, cbInstr);
+    if (iSegReg == X86_SREG_SS)
+        EMSetInhibitInterruptsPC(IEMCPU_TO_VMCPU(pIemCpu), pCtx->rip);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements 'pop SReg'.
+ *
+ * @param   iSegReg         The segment register number (valid).
+ * @param   enmEffOpSize    The efficient operand size (valid).
+ */
+IEM_CIMPL_DEF_2(iemOpCImpl_pop_Sreg, uint8_t, iSegReg, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    VBOXSTRICTRC    rcStrict;
+
+    /*
+     * Read the selector off the stack and join paths with mov ss, reg.
+     */
+    RTUINT64U TmpRsp;
+    TmpRsp.u = pCtx->rsp;
+    switch (enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t uSel;
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &uSel, &TmpRsp);
+            if (rcStrict == VINF_SUCCESS)
+                rcStrict = IEM_CIMPL_CALL_2(iemCImpl_LoadSReg, iSegReg, uSel);
+            break;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32Value;
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &u32Value, &TmpRsp);
+            if (rcStrict == VINF_SUCCESS)
+                rcStrict = IEM_CIMPL_CALL_2(iemCImpl_LoadSReg, iSegReg, (uint16_t)u32Value);
+            break;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            uint64_t u64Value;
+            rcStrict = iemMemStackPopU64Ex(pIemCpu, &u64Value, &TmpRsp);
+            if (rcStrict == VINF_SUCCESS)
+                rcStrict = IEM_CIMPL_CALL_2(iemCImpl_LoadSReg, iSegReg, (uint16_t)u64Value);
+            break;
+        }
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+
+    /*
+     * Commit the stack on success.
+     */
+    if (rcStrict == VINF_SUCCESS)
+        pCtx->rsp = TmpRsp.u;
+    return rcStrict;
+}
+
+
+/**
+ * Implements lgdt.
+ *
+ * @param   iEffSeg         The segment of the new ldtr contents
+ * @param   GCPtrEffSrc     The address of the new ldtr contents.
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_3(iemCImpl_lgdt, uint8_t, iEffSeg, RTGCPTR, GCPtrEffSrc, IEMMODE, enmEffOpSize)
+{
+    if (pIemCpu->uCpl != 0)
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+    Assert(!pIemCpu->CTX_SUFF(pCtx)->eflags.Bits.u1VM);
+
+    /*
+     * Fetch the limit and base address.
+     */
+    uint16_t cbLimit;
+    RTGCPTR  GCPtrBase;
+    VBOXSTRICTRC rcStrict = iemMemFetchDataXdtr(pIemCpu, &cbLimit, &GCPtrBase, iEffSeg, GCPtrEffSrc, enmEffOpSize);
+    if (rcStrict == VINF_SUCCESS)
+    {
+#ifndef IEM_VERIFICATION_MODE
+        rcStrict = CPUMSetGuestGDTR(IEMCPU_TO_VMCPU(pIemCpu), GCPtrBase, cbLimit);
+#else
+        PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+        pCtx->gdtr.cbGdt = cbLimit;
+        pCtx->gdtr.pGdt  = GCPtrBase;
+#endif
+        if (rcStrict == VINF_SUCCESS)
+            iemRegAddToRip(pIemCpu, cbInstr);
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements lidt.
+ *
+ * @param   iEffSeg         The segment of the new ldtr contents
+ * @param   GCPtrEffSrc     The address of the new ldtr contents.
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_3(iemCImpl_lidt, uint8_t, iEffSeg, RTGCPTR, GCPtrEffSrc, IEMMODE, enmEffOpSize)
+{
+    if (pIemCpu->uCpl != 0)
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+    Assert(!pIemCpu->CTX_SUFF(pCtx)->eflags.Bits.u1VM);
+
+    /*
+     * Fetch the limit and base address.
+     */
+    uint16_t cbLimit;
+    RTGCPTR  GCPtrBase;
+    VBOXSTRICTRC rcStrict = iemMemFetchDataXdtr(pIemCpu, &cbLimit, &GCPtrBase, iEffSeg, GCPtrEffSrc, enmEffOpSize);
+    if (rcStrict == VINF_SUCCESS)
+    {
+#ifndef IEM_VERIFICATION_MODE
+        rcStrict = CPUMSetGuestIDTR(IEMCPU_TO_VMCPU(pIemCpu), GCPtrBase, cbLimit);
+#else
+        PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+        pCtx->idtr.cbIdt = cbLimit;
+        pCtx->idtr.pIdt  = GCPtrBase;
+#endif
+        if (rcStrict == VINF_SUCCESS)
+            iemRegAddToRip(pIemCpu, cbInstr);
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Implements mov GReg,CRx.
+ *
+ * @param   iGReg           The general register to store the CRx value in.
+ * @param   iCrReg          The CRx register to read (valid).
+ */
+IEM_CIMPL_DEF_2(iemCImpl_mov_Rd_Cd, uint8_t, iGReg, uint8_t, iCrReg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    if (pIemCpu->uCpl != 0)
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+    Assert(!pCtx->eflags.Bits.u1VM);
+
+    /* read it */
+    uint64_t crX;
+    switch (iCrReg)
+    {
+        case 0: crX = pCtx->cr0; break;
+        case 2: crX = pCtx->cr2; break;
+        case 3: crX = pCtx->cr3; break;
+        case 4: crX = pCtx->cr4; break;
+        case 8:
+#ifndef IEM_VERIFICATION_MODE
+            AssertFailedReturn(VERR_NOT_IMPLEMENTED); /** @todo implement CR8 reading and writing. */
+#else
+            crX = 0xff;
+#endif
+            break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* call checks */
+    }
+
+    /* store it */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+        *(uint64_t *)iemGRegRef(pIemCpu, iGReg) = crX;
+    else
+        *(uint64_t *)iemGRegRef(pIemCpu, iGReg) = (uint32_t)crX;
+
+    iemRegAddToRip(pIemCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements mov CRx,GReg.
+ *
+ * @param   iCrReg          The CRx register to read (valid).
+ * @param   iGReg           The general register to store the CRx value in.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_mov_Cd_Rd, uint8_t, iCrReg, uint8_t, iGReg)
+{
+    PCPUMCTX        pCtx  = pIemCpu->CTX_SUFF(pCtx);
+    PVMCPU          pVCpu = IEMCPU_TO_VMCPU(pIemCpu);
+    VBOXSTRICTRC    rcStrict;
+    int             rc;
+
+    if (pIemCpu->uCpl != 0)
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+    Assert(!pCtx->eflags.Bits.u1VM);
+
+    /*
+     * Read the new value from the source register.
+     */
+    uint64_t NewCrX;
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+        NewCrX = iemGRegFetchU64(pIemCpu, iGReg);
+    else
+        NewCrX = iemGRegFetchU32(pIemCpu, iGReg);
+
+    /*
+     * Try store it.
+     * Unfortunately, CPUM only does a tiny bit of the work.
+     */
+    switch (iCrReg)
+    {
+        case 0:
+        {
+            /*
+             * Perform checks.
+             */
+            uint64_t const OldCrX = pCtx->cr0;
+            NewCrX |= X86_CR0_ET; /* hardcoded */
+
+            /* Check for reserved bits. */
+            uint32_t const fValid = X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS
+                                  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM
+                                  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG;
+            if (NewCrX & ~(uint64_t)fValid)
+            {
+                Log(("Trying to set reserved CR0 bits: NewCR0=%#llx InvalidBits=%#llx\n", NewCrX, NewCrX & ~(uint64_t)fValid));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+            /* Check for invalid combinations. */
+            if (    (NewCrX & X86_CR0_PG)
+                && !(NewCrX & X86_CR0_PE) )
+            {
+                Log(("Trying to set CR0.PG without CR0.PE\n"));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+            if (   !(NewCrX & X86_CR0_CD)
+                && (NewCrX & X86_CR0_NW) )
+            {
+                Log(("Trying to clear CR0.CD while leaving CR0.NW set\n"));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+            /* Long mode consistency checks. */
+            if (    (NewCrX & X86_CR0_PG)
+                && !(OldCrX & X86_CR0_PG)
+                &&  (pCtx->msrEFER & MSR_K6_EFER_LME) )
+            {
+                if (!(pCtx->cr4 & X86_CR4_PAE))
+                {
+                    Log(("Trying to enabled long mode paging without CR4.PAE set\n"));
+                    return iemRaiseGeneralProtectionFault0(pIemCpu);
+                }
+                if (pCtx->csHid.Attr.n.u1Long)
+                {
+                    Log(("Trying to enabled long mode paging with a long CS descriptor loaded.\n"));
+                    return iemRaiseGeneralProtectionFault0(pIemCpu);
+                }
+            }
+
+            /** @todo check reserved PDPTR bits as AMD states. */
+
+            /*
+             * Change CR0.
+             */
+#ifndef IEM_VERIFICATION_MODE
+            rc = CPUMSetGuestCR0(pVCpu, NewCrX);
+            AssertRCSuccessReturn(rc, RT_FAILURE_NP(rc) ? rc : VERR_INTERNAL_ERROR_3);
+#else
+            pCtx->cr0 = NewCrX;
+#endif
+            Assert(pCtx->cr0 == NewCrX);
+
+            /*
+             * Change EFER.LMA if entering or leaving long mode.
+             */
+            if (   (NewCrX & X86_CR0_PG) != (OldCrX & X86_CR0_PG)
+                && (pCtx->msrEFER & MSR_K6_EFER_LME) )
+            {
+                uint64_t NewEFER = pCtx->msrEFER;
+                if (NewCrX & X86_CR0_PG)
+                    NewEFER |= MSR_K6_EFER_LME;
+                else
+                    NewEFER &= ~MSR_K6_EFER_LME;
+
+#ifndef IEM_VERIFICATION_MODE
+                CPUMSetGuestEFER(pVCpu, NewEFER);
+#else
+                pCtx->msrEFER = NewEFER;
+#endif
+                Assert(pCtx->msrEFER == NewEFER);
+            }
+
+#ifndef IEM_VERIFICATION_MODE
+            /*
+             * Inform PGM.
+             */
+            if (    (NewCrX & (X86_CR0_PG | X86_CR0_WP | X86_CR0_PE))
+                !=  (OldCrX & (X86_CR0_PG | X86_CR0_WP | X86_CR0_PE)) )
+            {
+                rc = PGMFlushTLB(pVCpu, pCtx->cr3, true /* global */);
+                AssertRCReturn(rc, rc);
+                /* ignore informational status codes */
+            }
+            rcStrict = PGMChangeMode(pVCpu, pCtx->cr0, pCtx->cr4, pCtx->msrEFER);
+            /** @todo Status code management.  */
+#else
+            rcStrict = VINF_SUCCESS;
+#endif
+            break;
+        }
+
+        /*
+         * CR2 can be changed without any restrictions.
+         */
+        case 2:
+            pCtx->cr2 = NewCrX;
+            break;
+
+        /*
+         * CR3 is relatively simple, although AMD and Intel have different
+         * accounts of how setting reserved bits are handled.  We take intel's
+         * word for the lower bits and AMD's for the high bits (63:52).
+         */
+        /** @todo Testcase: Setting reserved bits in CR3, especially before
+         *        enabling paging. */
+        case 3:
+        {
+            /* check / mask the value. */
+            if (NewCrX & UINT64_C(0xfff0000000000000))
+            {
+                Log(("Trying to load CR3 with invalid high bits set: %#llx\n", NewCrX));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+            uint64_t fValid;
+            if (   (pCtx->cr4 & X86_CR4_PAE)
+                && (pCtx->msrEFER & MSR_K6_EFER_LME))
+                fValid = UINT64_C(0x000ffffffffff014);
+            else if (pCtx->cr4 & X86_CR4_PAE)
+                fValid = UINT64_C(0xfffffff4);
+            else
+                fValid = UINT64_C(0xfffff014);
+            if (NewCrX & ~fValid)
+            {
+                Log(("Automatically clearing reserved bits in CR3 load: NewCR3=%#llx ClearedBits=%#llx\n",
+                     NewCrX, NewCrX & ~fValid));
+                NewCrX &= fValid;
+            }
+
+            /** @todo If we're in PAE mode we should check the PDPTRs for
+             *        invalid bits. */
+
+            /* Make the change. */
+#ifndef IEM_VERIFICATION_MODE
+            rc = CPUMSetGuestCR3(pVCpu, NewCrX);
+            AssertRCSuccessReturn(rc, rc);
+#else
+            pCtx->cr3 = NewCrX;
+#endif
+
+#ifndef IEM_VERIFICATION_MODE
+            /* Inform PGM. */
+            if (pCtx->cr0 & X86_CR0_PG)
+            {
+                rc = PGMFlushTLB(pVCpu, pCtx->cr3, !(pCtx->cr3 & X86_CR4_PGE));
+                AssertRCReturn(rc, rc);
+                /* ignore informational status codes */
+                /** @todo status code management */
+            }
+#endif
+            rcStrict = VINF_SUCCESS;
+            break;
+        }
+
+        /*
+         * CR4 is a bit more tedious as there are bits which cannot be cleared
+         * under some circumstances and such.
+         */
+        case 4:
+        {
+            uint64_t const OldCrX = pCtx->cr0;
+
+            /* reserved bits */
+            uint32_t fValid = X86_CR4_VME | X86_CR4_PVI
+                            | X86_CR4_TSD | X86_CR4_DE
+                            | X86_CR4_PSE | X86_CR4_PAE
+                            | X86_CR4_MCE | X86_CR4_PGE
+                            | X86_CR4_PCE | X86_CR4_OSFSXR
+                            | X86_CR4_OSXMMEEXCPT;
+            //if (xxx)
+            //    fValid |= X86_CR4_VMXE;
+            //if (xxx)
+            //    fValid |= X86_CR4_OSXSAVE;
+            if (NewCrX & ~(uint64_t)fValid)
+            {
+                Log(("Trying to set reserved CR4 bits: NewCR4=%#llx InvalidBits=%#llx\n", NewCrX, NewCrX & ~(uint64_t)fValid));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+            /* long mode checks. */
+            if (   (OldCrX & X86_CR4_PAE)
+                && !(NewCrX & X86_CR4_PAE)
+                && (pCtx->msrEFER & MSR_K6_EFER_LMA) )
+            {
+                Log(("Trying to set clear CR4.PAE while long mode is active\n"));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+
+            /*
+             * Change it.
+             */
+#ifndef IEM_VERIFICATION_MODE
+            rc = CPUMSetGuestCR4(pVCpu, NewCrX);
+            AssertRCSuccessReturn(rc, rc);
+#else
+            pCtx->cr4 = NewCrX;
+#endif
+            Assert(pCtx->cr4 == NewCrX);
+
+            /*
+             * Notify SELM and PGM.
+             */
+#ifndef IEM_VERIFICATION_MODE
+            /* SELM - VME may change things wrt to the TSS shadowing. */
+            if ((NewCrX ^ OldCrX) & X86_CR4_VME)
+                VMCPU_FF_SET(pVCpu, VMCPU_FF_SELM_SYNC_TSS);
+
+            /* PGM - flushing and mode. */
+            if (    (NewCrX & (X86_CR0_PG | X86_CR0_WP | X86_CR0_PE))
+                !=  (OldCrX & (X86_CR0_PG | X86_CR0_WP | X86_CR0_PE)) )
+            {
+                rc = PGMFlushTLB(pVCpu, pCtx->cr3, true /* global */);
+                AssertRCReturn(rc, rc);
+                /* ignore informational status codes */
+            }
+            rcStrict = PGMChangeMode(pVCpu, pCtx->cr0, pCtx->cr4, pCtx->msrEFER);
+            /** @todo Status code management.  */
+#else
+            rcStrict = VINF_SUCCESS;
+#endif
+            break;
+        }
+
+        /*
+         * CR8 maps to the APIC TPR.
+         */
+        case 8:
+#ifndef IEM_VERIFICATION_MODE
+            AssertFailedReturn(VERR_NOT_IMPLEMENTED); /** @todo implement CR8 reading and writing. */
+#else
+            rcStrict = VINF_SUCCESS;
+#endif
+            break;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* call checks */
+    }
+
+    /*
+     * Advance the RIP on success.
+     */
+    /** @todo Status code management.  */
+    if (rcStrict == VINF_SUCCESS)
+        iemRegAddToRip(pIemCpu, cbInstr);
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'IN eAX, port'.
+ *
+ * @param   u16Port     The source port.
+ * @param   cbReg       The register size.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_in, uint16_t, u16Port, uint8_t, cbReg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * CPL check
+     */
+    VBOXSTRICTRC rcStrict = iemHlpCheckPortIOPermission(pIemCpu, pCtx, u16Port, cbReg);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /*
+     * Perform the I/O.
+     */
+    uint32_t u32Value;
+#ifndef IEM_VERIFICATION_MODE
+    rcStrict = IOMIOPortRead(IEMCPU_TO_VM(pIemCpu), u16Port, &u32Value, cbReg);
+#else
+    u32Value = 0xffffffff;
+    rcStrict = VINF_SUCCESS;
+    pIemCpu->cIOReads++;
+#endif
+    if (IOM_SUCCESS(rcStrict))
+    {
+        switch (cbReg)
+        {
+            case 1: pCtx->al  = (uint8_t)u32Value;  break;
+            case 2: pCtx->ax  = (uint16_t)u32Value; break;
+            case 4: pCtx->rax = u32Value;           break;
+            default: AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+        }
+        iemRegAddToRip(pIemCpu, cbInstr);
+        pIemCpu->cPotentialExits++;
+    }
+    /** @todo massage rcStrict. */
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'IN eAX, DX'.
+ *
+ * @param   cbReg       The register size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_in_eAX_DX, uint8_t, cbReg)
+{
+    return IEM_CIMPL_CALL_2(iemCImpl_in, pIemCpu->CTX_SUFF(pCtx)->dx, cbReg);
+}
+
+
+/**
+ * Implements 'OUT port, eAX'.
+ *
+ * @param   u16Port     The destination port.
+ * @param   cbReg       The register size.
+ */
+IEM_CIMPL_DEF_2(iemCImpl_out, uint16_t, u16Port, uint8_t, cbReg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * CPL check
+     */
+    if (   (pCtx->cr0 & X86_CR0_PE)
+        && (    pIemCpu->uCpl > pCtx->eflags.Bits.u2IOPL
+            ||  pCtx->eflags.Bits.u1VM) )
+    {
+        /** @todo I/O port permission bitmap check */
+        AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+    }
+
+    /*
+     * Perform the I/O.
+     */
+    uint32_t u32Value;
+    switch (cbReg)
+    {
+        case 1: u32Value = pCtx->al;  break;
+        case 2: u32Value = pCtx->ax;  break;
+        case 4: u32Value = pCtx->eax; break;
+        default: AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+    }
+#ifndef IEM_VERIFICATION_MODE
+    VBOXSTRICTRC rc = IOMIOPortWrite(IEMCPU_TO_VM(pIemCpu), u16Port, u32Value, cbReg);
+#else
+    VBOXSTRICTRC rc = VINF_SUCCESS;
+    pIemCpu->cIOWrites++;
+#endif
+    if (IOM_SUCCESS(rc))
+    {
+        iemRegAddToRip(pIemCpu, cbInstr);
+        pIemCpu->cPotentialExits++;
+        /** @todo massage rc. */
+    }
+    return rc;
+}
+
+
+/**
+ * Implements 'OUT DX, eAX'.
+ *
+ * @param   cbReg       The register size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_out_DX_eAX, uint8_t, cbReg)
+{
+    return IEM_CIMPL_CALL_2(iemCImpl_out, pIemCpu->CTX_SUFF(pCtx)->dx, cbReg);
+}
+
+
+/**
+ * Implements 'CLI'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_cli)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    if (pCtx->cr0 & X86_CR0_PE)
+    {
+        uint8_t const uIopl = pCtx->eflags.Bits.u2IOPL;
+        if (!pCtx->eflags.Bits.u1VM)
+        {
+            if (pIemCpu->uCpl <= uIopl)
+                pCtx->eflags.Bits.u1IF = 0;
+            else if (   pIemCpu->uCpl == 3
+                     && (pCtx->cr4 & X86_CR4_PVI) )
+                pCtx->eflags.Bits.u1VIF = 0;
+            else
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+        }
+        /* V8086 */
+        else if (uIopl == 3)
+            pCtx->eflags.Bits.u1IF = 0;
+        else if (   uIopl < 3
+                 && (pCtx->cr4 & X86_CR4_VME) )
+            pCtx->eflags.Bits.u1VIF = 0;
+        else
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+    }
+    /* real mode */
+    else
+        pCtx->eflags.Bits.u1IF = 0;
+    iemRegAddToRip(pIemCpu, cbInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements 'STI'.
+ */
+IEM_CIMPL_DEF_0(iemCImpl_sti)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    if (pCtx->cr0 & X86_CR0_PE)
+    {
+        uint8_t const uIopl = pCtx->eflags.Bits.u2IOPL;
+        if (!pCtx->eflags.Bits.u1VM)
+        {
+            if (pIemCpu->uCpl <= uIopl)
+                pCtx->eflags.Bits.u1IF = 1;
+            else if (   pIemCpu->uCpl == 3
+                     && (pCtx->cr4 & X86_CR4_PVI)
+                     && !pCtx->eflags.Bits.u1VIP )
+                pCtx->eflags.Bits.u1VIF = 1;
+            else
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+        }
+        /* V8086 */
+        else if (uIopl == 3)
+            pCtx->eflags.Bits.u1IF = 1;
+        else if (   uIopl < 3
+                 && (pCtx->cr4 & X86_CR4_VME)
+                 && !pCtx->eflags.Bits.u1VIP )
+            pCtx->eflags.Bits.u1VIF = 1;
+        else
+            return iemRaiseGeneralProtectionFault0(pIemCpu);
+    }
+    /* real mode */
+    else
+        pCtx->eflags.Bits.u1IF = 1;
+
+    iemRegAddToRip(pIemCpu, cbInstr);
+    EMSetInhibitInterruptsPC(IEMCPU_TO_VMCPU(pIemCpu), pCtx->rip);
+    return VINF_SUCCESS;
+}
+
+
+/*
+ * Instantiate the various string operation combinations.
+ */
+#define OP_SIZE     8
+#define ADDR_SIZE   16
+#include "IEMAllCImplStrInstr.cpp.h"
+#define OP_SIZE     8
+#define ADDR_SIZE   32
+#include "IEMAllCImplStrInstr.cpp.h"
+#define OP_SIZE     8
+#define ADDR_SIZE   64
+#include "IEMAllCImplStrInstr.cpp.h"
+
+#define OP_SIZE     16
+#define ADDR_SIZE   16
+#include "IEMAllCImplStrInstr.cpp.h"
+#define OP_SIZE     16
+#define ADDR_SIZE   32
+#include "IEMAllCImplStrInstr.cpp.h"
+#define OP_SIZE     16
+#define ADDR_SIZE   64
+#include "IEMAllCImplStrInstr.cpp.h"
+
+#define OP_SIZE     32
+#define ADDR_SIZE   16
+#include "IEMAllCImplStrInstr.cpp.h"
+#define OP_SIZE     32
+#define ADDR_SIZE   32
+#include "IEMAllCImplStrInstr.cpp.h"
+#define OP_SIZE     32
+#define ADDR_SIZE   64
+#include "IEMAllCImplStrInstr.cpp.h"
+
+#define OP_SIZE     64
+#define ADDR_SIZE   32
+#include "IEMAllCImplStrInstr.cpp.h"
+#define OP_SIZE     64
+#define ADDR_SIZE   64
+#include "IEMAllCImplStrInstr.cpp.h"
+
+
+/** @} */
+
+
+/** @name   "Microcode" macros.
+ *
+ * The idea is that we should be able to use the same code to interpret
+ * instructions as well as recompiler instructions.  Thus this obfuscation.
+ *
+ * @{
+ */
+#define IEM_MC_BEGIN(cArgs, cLocals)                    {
+#define IEM_MC_END()                                    }
+#define IEM_MC_PAUSE()                                  do {} while (0)
+#define IEM_MC_CONTINUE()                               do {} while (0)
+
+/** Internal macro. */
+#define IEM_MC_RETURN_ON_FAILURE(a_Expr) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrict2 = a_Expr; \
+        if (rcStrict2 != VINF_SUCCESS) \
+            return rcStrict2; \
+    } while (0)
+
+#define IEM_MC_ADVANCE_RIP()                            iemRegUpdateRip(pIemCpu)
+#define IEM_MC_REL_JMP_S8(a_i8)                         IEM_MC_RETURN_ON_FAILURE(iemRegRipRelativeJumpS8(pIemCpu, a_i8))
+#define IEM_MC_REL_JMP_S16(a_i16)                       IEM_MC_RETURN_ON_FAILURE(iemRegRipRelativeJumpS16(pIemCpu, a_i16))
+#define IEM_MC_REL_JMP_S32(a_i32)                       IEM_MC_RETURN_ON_FAILURE(iemRegRipRelativeJumpS32(pIemCpu, a_i32))
+#define IEM_MC_SET_RIP_U16(a_u16NewIP)                  IEM_MC_RETURN_ON_FAILURE(iemRegRipJump((pIemCpu), (a_u16NewIP)))
+#define IEM_MC_SET_RIP_U32(a_u32NewIP)                  IEM_MC_RETURN_ON_FAILURE(iemRegRipJump((pIemCpu), (a_u32NewIP)))
+#define IEM_MC_SET_RIP_U64(a_u64NewIP)                  IEM_MC_RETURN_ON_FAILURE(iemRegRipJump((pIemCpu), (a_u64NewIP)))
+
+#define IEM_MC_RAISE_DIVIDE_ERROR()                     return iemRaiseDivideError(pIemCpu)
+
+#define IEM_MC_LOCAL(a_Type, a_Name)                    a_Type a_Name
+#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value)     a_Type const a_Name = (a_Value)
+#define IEM_MC_REF_LOCAL(a_pRefArg, a_Local)            (a_pRefArg) = &(a_Local)
+#define IEM_MC_ARG(a_Type, a_Name, a_iArg)              a_Type a_Name
+#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg)   a_Type const a_Name = (a_Value)
+#define IEM_MC_ARG_LOCAL_EFLAGS(a_pName, a_Name, a_iArg) \
+    uint32_t a_Name; \
+    uint32_t *a_pName = &a_Name
+#define IEM_MC_COMMIT_EFLAGS(a_EFlags)                  (pIemCpu)->CTX_SUFF(pCtx)->eflags.u = (a_EFlags)
+
+#define IEM_MC_ASSIGN(a_VarOrArg, a_CVariableOrConst)   (a_VarOrArg) = (a_CVariableOrConst)
+
+#define IEM_MC_FETCH_GREG_U8(a_u8Dst, a_iGReg)          (a_u8Dst)  = iemGRegFetchU8(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg)        (a_u16Dst) = iemGRegFetchU16(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg)        (a_u32Dst) = iemGRegFetchU32(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg)        (a_u64Dst) = iemGRegFetchU64(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg)        (a_u16Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
+#define IEM_MC_FETCH_SREG_U32_ZX(a_u32Dst, a_iSReg)     (a_u32Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
+#define IEM_MC_FETCH_SREG_U64_ZX(a_u64Dst, a_iSReg)     (a_u64Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
+#define IEM_MC_FETCH_EFLAGS(a_EFlags)                   (a_EFlags) = (pIemCpu)->CTX_SUFF(pCtx)->eflags.u
+
+#define IEM_MC_STORE_GREG_U8(a_iGReg, a_u8Value)        *iemGRegRefU8(pIemCpu, (a_iGReg)) = (a_u8Value)
+#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value)      *(uint16_t *)iemGRegRef(pIemCpu, (a_iGReg)) = (a_u16Value)
+#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value)      *(uint64_t *)iemGRegRef(pIemCpu, (a_iGReg)) = (uint32_t)(a_u32Value) /* clear high bits. */
+#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value)      *(uint64_t *)iemGRegRef(pIemCpu, (a_iGReg)) = (a_u64Value)
+
+#define IEM_MC_REF_GREG_U8(a_pu8Dst, a_iGReg)           (a_pu8Dst) = iemGRegRefU8(pIemCpu, (a_iGReg))
+#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg)         (a_pu16Dst) = (uint16_t *)iemGRegRef(pIemCpu, (a_iGReg))
+/** @todo User of IEM_MC_REF_GREG_U32 needs to clear the high bits on
+ *        commit. */
+#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg)         (a_pu32Dst) = (uint32_t *)iemGRegRef(pIemCpu, (a_iGReg))
+#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg)         (a_pu64Dst) = (uint64_t *)iemGRegRef(pIemCpu, (a_iGReg))
+#define IEM_MC_REF_EFLAGS(a_pEFlags)                    (a_pEFlags) = &(pIemCpu)->CTX_SUFF(pCtx)->eflags.u
+
+#define IEM_MC_ADD_GREG_U8(a_iGReg, a_u16Value)         *(uint8_t *)iemGRegRef(pIemCpu, (a_iGReg)) += (a_u8Value)
+#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u16Value)        *(uint16_t *)iemGRegRef(pIemCpu, (a_iGReg)) += (a_u16Value)
+#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u32Value) \
+    do { \
+        uint32_t *pu32Reg = (uint32_t *)iemGRegRef(pIemCpu, (a_iGReg)); \
+        *pu32Reg += (a_u32Value); \
+        pu32Reg[1] = 0; /* implicitly clear the high bit. */ \
+    } while (0)
+#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u64Value)        *(uint64_t *)iemGRegRef(pIemCpu, (a_iGReg)) += (a_u64Value)
+
+#define IEM_MC_SUB_GREG_U8(a_iGReg,  a_u8Value)         *(uint8_t *)iemGRegRef(pIemCpu, (a_iGReg)) -= (a_u8Value)
+#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u16Value)        *(uint16_t *)iemGRegRef(pIemCpu, (a_iGReg)) -= (a_u16Value)
+#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u32Value) \
+    do { \
+        uint32_t *pu32Reg = (uint32_t *)iemGRegRef(pIemCpu, (a_iGReg)); \
+        *pu32Reg -= (a_u32Value); \
+        pu32Reg[1] = 0; /* implicitly clear the high bit. */ \
+    } while (0)
+#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u64Value)        *(uint64_t *)iemGRegRef(pIemCpu, (a_iGReg)) -= (a_u64Value)
+
+#define IEM_MC_SET_EFL_BIT(a_fBit)                      do { (pIemCpu)->CTX_SUFF(pCtx)->eflags.u |= (a_fBit); } while (0)
+#define IEM_MC_CLEAR_EFL_BIT(a_fBit)                    do { (pIemCpu)->CTX_SUFF(pCtx)->eflags.u &= ~(a_fBit); } while (0)
+
+
+
+#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU8(pIemCpu, &(a_u8Dst), (a_iSeg), (a_GCPtrMem)))
+#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU16(pIemCpu, &(a_u16Dst), (a_iSeg), (a_GCPtrMem)))
+#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU32(pIemCpu, &(a_u32Dst), (a_iSeg), (a_GCPtrMem)))
+#define IEM_MC_FETCH_MEM_S32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataS32SxU64(pIemCpu, &(a_u64Dst), (a_iSeg), (a_GCPtrMem)))
+#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU64(pIemCpu, &(a_u64Dst), (a_iSeg), (a_GCPtrMem)))
+
+#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStoreDataU8(pIemCpu, (a_iSeg), (a_GCPtrMem), (a_u8Value)))
+#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStoreDataU16(pIemCpu, (a_iSeg), (a_GCPtrMem), (a_u16Value)))
+#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStoreDataU32(pIemCpu, (a_iSeg), (a_GCPtrMem), (a_u32Value)))
+#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStoreDataU64(pIemCpu, (a_iSeg), (a_GCPtrMem), (a_u64Value)))
+
+#define IEM_MC_PUSH_U16(a_u16Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStackPushU16(pIemCpu, (a_u16Value)))
+#define IEM_MC_PUSH_U32(a_u32Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStackPushU32(pIemCpu, (a_u32Value)))
+#define IEM_MC_PUSH_U64(a_u64Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStackPushU64(pIemCpu, (a_u64Value)))
+
+#define IEM_MC_POP_U16(a_pu16Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStackPopU16(pIemCpu, (a_pu16Value)))
+#define IEM_MC_POP_U32(a_pu32Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStackPopU32(pIemCpu, (a_pu32Value)))
+#define IEM_MC_POP_U64(a_pu64Value) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemStackPopU64(pIemCpu, (a_pu64Value)))
+
+/** Maps guest memory for direct or bounce buffered access.
+ * The purpose is to pass it to an operand implementation, thus the a_iArg.
+ * @remarks     May return.
+ */
+#define IEM_MC_MEM_MAP(a_pMem, a_fAccess, a_iSeg, a_GCPtrMem, a_iArg) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemMap(pIemCpu, (void **)&(a_pMem), sizeof(*(a_pMem)), (a_iSeg), (a_GCPtrMem), (a_fAccess)))
+
+/** Maps guest memory for direct or bounce buffered access.
+ * The purpose is to pass it to an operand implementation, thus the a_iArg.
+ * @remarks     May return.
+ */
+#define IEM_MC_MEM_MAP_EX(a_pvMem, a_fAccess, a_cbMem, a_iSeg, a_GCPtrMem, a_iArg) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemMap(pIemCpu, (void **)&(a_pvMem), (a_cbMem), (a_iSeg), (a_GCPtrMem), (a_fAccess)))
+
+/** Commits the memory and unmaps the guest memory.
+ * @remarks     May return.
+ */
+#define IEM_MC_MEM_COMMIT_AND_UNMAP(a_pvMem, a_fAccess) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemCommitAndUnmap(pIemCpu, (a_pvMem), (a_fAccess)))
+
+/** Calculate efficient address from R/M. */
+#define IEM_MC_CALC_RM_EFF_ADDR(a_GCPtrEff, bRm) \
+    IEM_MC_RETURN_ON_FAILURE(iemOpHlpCalcRmEffAddr(pIemCpu, (bRm), &(a_GCPtrEff)))
+
+#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1)           (a_pfn)((a0), (a1))
+#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2)       (a_pfn)((a0), (a1), (a2))
+#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3)  (a_rc) = (a_pfn)((a0), (a1), (a2), (a3))
+
+/**
+ * Defers the rest of the instruction emulation to a C implementation routine
+ * and returns, only taking the standard parameters.
+ *
+ * @param   a_pfnCImpl      The pointer to the C routine.
+ * @sa      IEM_DECL_IMPL_C_TYPE_0 and IEM_CIMPL_DEF_0.
+ */
+#define IEM_MC_CALL_CIMPL_0(a_pfnCImpl)                 return (a_pfnCImpl)(pIemCpu, pIemCpu->offOpcode)
+
+/**
+ * Defers the rest of instruction emulation to a C implementation routine and
+ * returns, taking one argument in addition to the standard ones.
+ *
+ * @param   a_pfnCImpl      The pointer to the C routine.
+ * @param   a0              The argument.
+ */
+#define IEM_MC_CALL_CIMPL_1(a_pfnCImpl, a0)             return (a_pfnCImpl)(pIemCpu, pIemCpu->offOpcode, a0)
+
+/**
+ * Defers the rest of the instruction emulation to a C implementation routine
+ * and returns, taking two arguments in addition to the standard ones.
+ *
+ * @param   a_pfnCImpl      The pointer to the C routine.
+ * @param   a0              The first extra argument.
+ * @param   a1              The second extra argument.
+ */
+#define IEM_MC_CALL_CIMPL_2(a_pfnCImpl, a0, a1)         return (a_pfnCImpl)(pIemCpu, pIemCpu->offOpcode, a0, a1)
+
+/**
+ * Defers the rest of the instruction emulation to a C implementation routine
+ * and returns, taking two arguments in addition to the standard ones.
+ *
+ * @param   a_pfnCImpl      The pointer to the C routine.
+ * @param   a0              The first extra argument.
+ * @param   a1              The second extra argument.
+ * @param   a2              The third extra argument.
+ */
+#define IEM_MC_CALL_CIMPL_3(a_pfnCImpl, a0, a1, a2)     return (a_pfnCImpl)(pIemCpu, pIemCpu->offOpcode, a0, a1, a2)
+
+/**
+ * Defers the entire instruction emulation to a C implementation routine and
+ * returns, only taking the standard parameters.
+ *
+ * This shall be used without any IEM_MC_BEGIN or IEM_END macro surrounding it.
+ *
+ * @param   a_pfnCImpl      The pointer to the C routine.
+ * @sa      IEM_DECL_IMPL_C_TYPE_0 and IEM_CIMPL_DEF_0.
+ */
+#define IEM_MC_DEFER_TO_CIMPL_0(a_pfnCImpl)             (a_pfnCImpl)(pIemCpu, pIemCpu->offOpcode)
+
+/**
+ * Defers the entire instruction emulation to a C implementation routine and
+ * returns, taking one argument in addition to the standard ones.
+ *
+ * This shall be used without any IEM_MC_BEGIN or IEM_END macro surrounding it.
+ *
+ * @param   a_pfnCImpl      The pointer to the C routine.
+ * @param   a0              The argument.
+ */
+#define IEM_MC_DEFER_TO_CIMPL_1(a_pfnCImpl, a0)         (a_pfnCImpl)(pIemCpu, pIemCpu->offOpcode, a0)
+
+/**
+ * Defers the entire instruction emulation to a C implementation routine and
+ * returns, taking two arguments in addition to the standard ones.
+ *
+ * This shall be used without any IEM_MC_BEGIN or IEM_END macro surrounding it.
+ *
+ * @param   a_pfnCImpl      The pointer to the C routine.
+ * @param   a0              The first extra argument.
+ * @param   a1              The second extra argument.
+ */
+#define IEM_MC_DEFER_TO_CIMPL_2(a_pfnCImpl, a0, a1)     (a_pfnCImpl)(pIemCpu, pIemCpu->offOpcode, a0, a1)
+
+#define IEM_MC_IF_EFL_BIT_SET(a_fBit)                   if (pIemCpu->CTX_SUFF(pCtx)->eflags.u & (a_fBit)) {
+#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits)             if (pIemCpu->CTX_SUFF(pCtx)->eflags.u & (a_fBits)) {
+#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2)         \
+    if (   !!(pIemCpu->CTX_SUFF(pCtx)->eflags.u & (a_fBit1)) \
+        != !!(pIemCpu->CTX_SUFF(pCtx)->eflags.u & (a_fBit2)) ) {
+#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
+    if (   (pIemCpu->CTX_SUFF(pCtx)->eflags.u & (a_fBit)) \
+        || !!(pIemCpu->CTX_SUFF(pCtx)->eflags.u & (a_fBit1)) \
+        != !!(pIemCpu->CTX_SUFF(pCtx)->eflags.u & (a_fBit2)) ) {
+#define IEM_MC_IF_CX_IS_NZ()                            if (pIemCpu->CTX_SUFF(pCtx)->cx != 0) {
+#define IEM_MC_IF_ECX_IS_NZ()                           if (pIemCpu->CTX_SUFF(pCtx)->ecx != 0) {
+#define IEM_MC_IF_RCX_IS_NZ()                           if (pIemCpu->CTX_SUFF(pCtx)->rcx != 0) {
+#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
+        if (   pIemCpu->CTX_SUFF(pCtx)->cx != 0 \
+            && (pIemCpu->CTX_SUFF(pCtx)->eflags.u & a_fBit)) {
+#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
+        if (   pIemCpu->CTX_SUFF(pCtx)->ecx != 0 \
+            && (pIemCpu->CTX_SUFF(pCtx)->eflags.u & a_fBit)) {
+#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
+        if (   pIemCpu->CTX_SUFF(pCtx)->rcx != 0 \
+            && (pIemCpu->CTX_SUFF(pCtx)->eflags.u & a_fBit)) {
+#define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
+        if (   pIemCpu->CTX_SUFF(pCtx)->cx != 0 \
+            && !(pIemCpu->CTX_SUFF(pCtx)->eflags.u & a_fBit)) {
+#define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
+        if (   pIemCpu->CTX_SUFF(pCtx)->ecx != 0 \
+            && !(pIemCpu->CTX_SUFF(pCtx)->eflags.u & a_fBit)) {
+#define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
+        if (   pIemCpu->CTX_SUFF(pCtx)->rcx != 0 \
+            && !(pIemCpu->CTX_SUFF(pCtx)->eflags.u & a_fBit)) {
+#define IEM_MC_IF_LOCAL_IS_Z(a_Local)                   if ((a_Local) == 0) {
+#define IEM_MC_ELSE()                                   } else {
+#define IEM_MC_ENDIF()                                  } do {} while (0)
+
+/** @}  */
+
+
+/** @name   Opcode Debug Helpers.
+ * @{
+ */
+#ifdef DEBUG
+# define IEMOP_MNEMONIC(a_szMnemonic) \
+    Log2(("decode - %04x:%08RGv %s\n", pIemCpu->CTX_SUFF(pCtx)->cs, pIemCpu->CTX_SUFF(pCtx)->rip, a_szMnemonic))
+# define IEMOP_MNEMONIC2(a_szMnemonic, a_szOps) \
+    Log2(("decode - %04x:%08RGv %s %s\n", pIemCpu->CTX_SUFF(pCtx)->cs, pIemCpu->CTX_SUFF(pCtx)->rip, a_szMnemonic, a_szOps))
+#else
+# define IEMOP_MNEMONIC(a_szMnemonic) do { } while (0)
+# define IEMOP_MNEMONIC2(a_szMnemonic, a_szOps) do { } while (0)
+#endif
+
+/** @} */
+
+
+/** @name   Opcode Helpers.
+ * @{
+ */
+
+/** The instruction allows no lock prefixing (in this encoding), throw #UD if
+ * lock prefixed. */
+#define IEMOP_HLP_NO_LOCK_PREFIX() \
+    do \
+    { \
+        if (pIemCpu->fPrefixes & IEM_OP_PRF_LOCK) \
+            return IEMOP_RAISE_INVALID_LOCK_PREFIX(); \
+    } while (0)
+
+/** The instruction is not available in 64-bit mode, throw #UD if we're in
+ * 64-bit mode. */
+#define IEMOP_HLP_NO_64BIT() \
+    do \
+    { \
+        if (pIemCpu->fPrefixes & IEM_OP_PRF_LOCK) \
+            return IEMOP_RAISE_INVALID_OPCODE(); \
+    } while (0)
+
+/** The instruction defaults to 64-bit operand size if 64-bit mode. */
+#define IEMOP_HLP_DEFAULT_64BIT_OP_SIZE() \
+    do \
+    { \
+        if (pIemCpu->enmCpuMode == IEMMODE_64BIT) \
+            iemRecalEffOpSize64Default(pIemCpu); \
+    } while (0)
+
+
+
+/**
+ * Calculates the effective address of a ModR/M memory operand.
+ *
+ * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
+ *
+ * @return  Strict VBox status code.
+ * @param   pIemCpu             The IEM per CPU data.
+ * @param   bRm                 The ModRM byte.
+ * @param   pGCPtrEff           Where to return the effective address.
+ */
+static VBOXSTRICTRC iemOpHlpCalcRmEffAddr(PIEMCPU pIemCpu, uint8_t bRm, PRTGCPTR pGCPtrEff)
+{
+    LogFlow(("iemOpHlpCalcRmEffAddr: bRm=%#x\n", bRm));
+    PCCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+#define SET_SS_DEF() \
+    do \
+    { \
+        if (!(pIemCpu->fPrefixes & IEM_OP_PRF_SEG_MASK)) \
+            pIemCpu->iEffSeg = X86_SREG_SS; \
+    } while (0)
+
+/** @todo Check the effective address size crap! */
+    switch (pIemCpu->enmEffAddrMode)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t u16EffAddr;
+
+            /* Handle the disp16 form with no registers first. */
+            if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
+                IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16EffAddr);
+            else
+            {
+                /* Get the displacment. */
+                switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
+                {
+                    case 0:  u16EffAddr = 0;                                       break;
+                    case 1:  IEM_OPCODE_GET_NEXT_S8_SX_U16(pIemCpu, &u16EffAddr);  break;
+                    case 2:  IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16EffAddr);        break;
+                    default: AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* (caller checked for these) */
+                }
+
+                /* Add the base and index registers to the disp. */
+                switch (bRm & X86_MODRM_RM_MASK)
+                {
+                    case 0: u16EffAddr += pCtx->bx + pCtx->si; break;
+                    case 1: u16EffAddr += pCtx->bx + pCtx->di; break;
+                    case 2: u16EffAddr += pCtx->bp + pCtx->si; SET_SS_DEF(); break;
+                    case 3: u16EffAddr += pCtx->bp + pCtx->di; SET_SS_DEF(); break;
+                    case 4: u16EffAddr += pCtx->si;            break;
+                    case 5: u16EffAddr += pCtx->di;            break;
+                    case 6: u16EffAddr += pCtx->bp;            SET_SS_DEF(); break;
+                    case 7: u16EffAddr += pCtx->bx;            break;
+                }
+            }
+
+            *pGCPtrEff = u16EffAddr;
+            LogFlow(("iemOpHlpCalcRmEffAddr: EffAddr=%#06RGv\n", *pGCPtrEff));
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32EffAddr;
+
+            /* Handle the disp32 form with no registers first. */
+            if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
+                IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32EffAddr);
+            else
+            {
+                /* Get the register (or SIB) value. */
+                switch ((bRm & X86_MODRM_RM_MASK))
+                {
+                    case 0: u32EffAddr = pCtx->eax; break;
+                    case 1: u32EffAddr = pCtx->ecx; break;
+                    case 2: u32EffAddr = pCtx->edx; break;
+                    case 3: u32EffAddr = pCtx->ebx; break;
+                    case 4: /* SIB */
+                    {
+                        uint8_t bSib; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bSib);
+
+                        /* Get the index and scale it. */
+                        switch ((bSib & X86_SIB_INDEX_SHIFT) >> X86_SIB_INDEX_SMASK)
+                        {
+                            case 0: u32EffAddr = pCtx->eax; break;
+                            case 1: u32EffAddr = pCtx->ecx; break;
+                            case 2: u32EffAddr = pCtx->edx; break;
+                            case 3: u32EffAddr = pCtx->ebx; break;
+                            case 4: u32EffAddr = 0; /*none */ break;
+                            case 5: u32EffAddr = pCtx->ebp; break;
+                            case 6: u32EffAddr = pCtx->esi; break;
+                            case 7: u32EffAddr = pCtx->edi; break;
+                        }
+                        u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
+
+                        /* add base */
+                        switch (bSib & X86_SIB_BASE_MASK)
+                        {
+                            case 0: u32EffAddr += pCtx->eax; break;
+                            case 1: u32EffAddr += pCtx->ecx; break;
+                            case 2: u32EffAddr += pCtx->edx; break;
+                            case 3: u32EffAddr += pCtx->ebx; break;
+                            case 4: u32EffAddr += pCtx->esp; SET_SS_DEF(); break;
+                            case 5:
+                                if ((bRm & X86_MODRM_MOD_MASK) != 0)
+                                {
+                                    u32EffAddr += pCtx->ebp;
+                                    SET_SS_DEF();
+                                }
+                                else
+                                {
+                                    uint32_t u32Disp;
+                                    IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Disp);
+                                    u32EffAddr += u32Disp;
+                                }
+                                break;
+                            case 6: u32EffAddr += pCtx->esi; break;
+                            case 7: u32EffAddr += pCtx->edi; break;
+                        }
+                        break;
+                    }
+                    case 5: u32EffAddr = pCtx->ebp; SET_SS_DEF(); break;
+                    case 6: u32EffAddr = pCtx->esi; break;
+                    case 7: u32EffAddr = pCtx->edi; break;
+                }
+
+                /* Get and add the displacement. */
+                switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
+                {
+                    case 0:
+                        break;
+                    case 1:
+                    {
+                        int8_t i8Disp;
+                        IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Disp);
+                        u32EffAddr += i8Disp;
+                        break;
+                    }
+                    case 2:
+                    {
+                        uint32_t u32Disp;
+                        IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Disp);
+                        u32EffAddr += u32Disp;
+                        break;
+                    }
+                    default:
+                        AssertFailedReturn(VERR_INTERNAL_ERROR_2); /* (caller checked for these) */
+                }
+
+            }
+            if (pIemCpu->enmEffAddrMode == IEMMODE_32BIT)
+                *pGCPtrEff = u32EffAddr;
+            else
+            {
+                Assert(pIemCpu->enmEffAddrMode == IEMMODE_16BIT);
+                *pGCPtrEff = u32EffAddr & UINT16_MAX;
+            }
+            LogFlow(("iemOpHlpCalcRmEffAddr: EffAddr=%#010RGv\n", *pGCPtrEff));
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            uint64_t u64EffAddr;
+
+            /* Handle the rip+disp32 form with no registers first. */
+            if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
+            {
+                IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64EffAddr);
+                u64EffAddr += pCtx->rip + pIemCpu->offOpcode;
+            }
+            else
+            {
+                /* Get the register (or SIB) value. */
+                switch ((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB)
+                {
+                    case  0: u64EffAddr = pCtx->rax; break;
+                    case  1: u64EffAddr = pCtx->rcx; break;
+                    case  2: u64EffAddr = pCtx->rdx; break;
+                    case  3: u64EffAddr = pCtx->rbx; break;
+                    case  5: u64EffAddr = pCtx->rbp; SET_SS_DEF(); break;
+                    case  6: u64EffAddr = pCtx->rsi; break;
+                    case  7: u64EffAddr = pCtx->rdi; break;
+                    case  8: u64EffAddr = pCtx->r8;  break;
+                    case  9: u64EffAddr = pCtx->r9;  break;
+                    case 10: u64EffAddr = pCtx->r10; break;
+                    case 11: u64EffAddr = pCtx->r11; break;
+                    case 13: u64EffAddr = pCtx->r13; break;
+                    case 14: u64EffAddr = pCtx->r14; break;
+                    case 15: u64EffAddr = pCtx->r15; break;
+                    /* SIB */
+                    case 4:
+                    case 12:
+                    {
+                        uint8_t bSib; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bSib);
+
+                        /* Get the index and scale it. */
+                        switch (((bSib & X86_SIB_INDEX_SHIFT) >> X86_SIB_INDEX_SMASK) | pIemCpu->uRexIndex)
+                        {
+                            case  0: u64EffAddr = pCtx->rax; break;
+                            case  1: u64EffAddr = pCtx->rcx; break;
+                            case  2: u64EffAddr = pCtx->rdx; break;
+                            case  3: u64EffAddr = pCtx->rbx; break;
+                            case  4: u64EffAddr = 0; /*none */ break;
+                            case  5: u64EffAddr = pCtx->rbp; break;
+                            case  6: u64EffAddr = pCtx->rsi; break;
+                            case  7: u64EffAddr = pCtx->rdi; break;
+                            case  8: u64EffAddr = pCtx->r8;  break;
+                            case  9: u64EffAddr = pCtx->r9;  break;
+                            case 10: u64EffAddr = pCtx->r10; break;
+                            case 11: u64EffAddr = pCtx->r11; break;
+                            case 12: u64EffAddr = pCtx->r12; break;
+                            case 13: u64EffAddr = pCtx->r13; break;
+                            case 14: u64EffAddr = pCtx->r14; break;
+                            case 15: u64EffAddr = pCtx->r15; break;
+                            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                        }
+                        u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
+
+                        /* add base */
+                        switch ((bSib & X86_SIB_BASE_MASK) | pIemCpu->uRexB)
+                        {
+                            case  0: u64EffAddr += pCtx->rax; break;
+                            case  1: u64EffAddr += pCtx->rcx; break;
+                            case  2: u64EffAddr += pCtx->rdx; break;
+                            case  3: u64EffAddr += pCtx->rbx; break;
+                            case  4: u64EffAddr += pCtx->rsp; SET_SS_DEF(); break;
+                            case  6: u64EffAddr += pCtx->rsi; break;
+                            case  7: u64EffAddr += pCtx->rdi; break;
+                            case  8: u64EffAddr += pCtx->r8;  break;
+                            case  9: u64EffAddr += pCtx->r9;  break;
+                            case 10: u64EffAddr += pCtx->r10; break;
+                            case 11: u64EffAddr += pCtx->r11; break;
+                            case 14: u64EffAddr += pCtx->r14; break;
+                            case 15: u64EffAddr += pCtx->r15; break;
+                            /* complicated encodings */
+                            case 5:
+                            case 13:
+                                if ((bRm & X86_MODRM_MOD_MASK) != 0)
+                                {
+                                    if (!pIemCpu->uRexB)
+                                    {
+                                        u64EffAddr += pCtx->rbp;
+                                        SET_SS_DEF();
+                                    }
+                                    else
+                                        u64EffAddr += pCtx->r13;
+                                }
+                                else
+                                {
+                                    uint32_t u32Disp;
+                                    IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Disp);
+                                    u64EffAddr += (int32_t)u32Disp;
+                                }
+                                break;
+                        }
+                        break;
+                    }
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                }
+
+                /* Get and add the displacement. */
+                switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
+                {
+                    case 0:
+                        break;
+                    case 1:
+                    {
+                        int8_t i8Disp;
+                        IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Disp);
+                        u64EffAddr += i8Disp;
+                        break;
+                    }
+                    case 2:
+                    {
+                        uint32_t u32Disp;
+                        IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Disp);
+                        u64EffAddr += (int32_t)u32Disp;
+                        break;
+                    }
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET(); /* (caller checked for these) */
+                }
+
+            }
+            if (pIemCpu->enmEffAddrMode == IEMMODE_64BIT)
+                *pGCPtrEff = u64EffAddr;
+            else
+                *pGCPtrEff = u64EffAddr & UINT16_MAX;
+            LogFlow(("iemOpHlpCalcRmEffAddr: EffAddr=%#010RGv\n", *pGCPtrEff));
+            return VINF_SUCCESS;
+        }
+    }
+
+    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+}
+
+/** @}  */
+
+
+
+/*
+ * Include the instructions
+ */
+#include "IEMAllInstructions.cpp.h"
+
+
+
+
+#if defined(IEM_VERIFICATION_MODE) && defined(IN_RING3)
+
+/**
+ * Sets up execution verification mode.
+ */
+static void iemExecVerificationModeSetup(PIEMCPU pIemCpu)
+{
+    static CPUMCTX  s_DebugCtx; /* Ugly! */
+
+    PCPUMCTX pOrgCtx = pIemCpu->CTX_SUFF(pCtx);
+    s_DebugCtx = *pOrgCtx;
+    pIemCpu->CTX_SUFF(pCtx) = &s_DebugCtx;
+    pIemCpu->cIOReads    = 0;
+    pIemCpu->cIOWrites   = 0;
+    pIemCpu->fMulDivHack = false;
+    pIemCpu->fShlHack    = false;
+
+}
+
+/**
+ * Performs the post-execution verfication checks.
+ */
+static void iemExecVerificationModeCheck(PIEMCPU pIemCpu)
+{
+    PCPUMCTX    pOrgCtx   = CPUMQueryGuestCtxPtr(IEMCPU_TO_VMCPU(pIemCpu));
+    PCPUMCTX    pDebugCtx = pIemCpu->CTX_SUFF(pCtx);
+    Assert(pOrgCtx != pDebugCtx);
+    pIemCpu->CTX_SUFF(pCtx) = pOrgCtx;
+
+    int rc = REMR3EmulateInstruction(IEMCPU_TO_VM(pIemCpu), IEMCPU_TO_VMCPU(pIemCpu));
+    AssertRC(rc);
+
+    if (memcmp(pOrgCtx, pDebugCtx, sizeof(*pDebugCtx)))
+    {
+        Log(("REM and IEM ends up with different registers!\n"));
+        unsigned cDiffs = 0;
+
+# define CHECK_FIELD(a_Field) \
+        do \
+        { \
+            if (pOrgCtx->a_Field != pDebugCtx->a_Field) \
+            { \
+                switch (sizeof(pOrgCtx->a_Field)) \
+                { \
+                    case 1: RTAssertMsg2Weak("  %8s differs - iem=%02x - rem=%02x\n", #a_Field, pDebugCtx->a_Field, pOrgCtx->a_Field); break; \
+                    case 2: RTAssertMsg2Weak("  %8s differs - iem=%04x - rem=%04x\n", #a_Field, pDebugCtx->a_Field, pOrgCtx->a_Field); break; \
+                    case 4: RTAssertMsg2Weak("  %8s differs - iem=%08x - rem=%08x\n", #a_Field, pDebugCtx->a_Field, pOrgCtx->a_Field); break; \
+                    case 8: RTAssertMsg2Weak("  %8s differs - iem=%016llx - rem=%016llx\n", #a_Field, pDebugCtx->a_Field, pOrgCtx->a_Field); break; \
+                    default: RTAssertMsg2Weak("  %8s differs\n", #a_Field); break; \
+                } \
+                cDiffs++; \
+            } \
+        } while (0)
+
+# define CHECK_BIT_FIELD(a_Field) \
+        do \
+        { \
+            if (pOrgCtx->a_Field != pDebugCtx->a_Field) \
+            { \
+                RTAssertMsg2Weak("  %8s differs - iem=%02x - rem=%02x\n", #a_Field, pDebugCtx->a_Field, pOrgCtx->a_Field); \
+                cDiffs++; \
+            } \
+        } while (0)
+
+        if (memcmp(&pOrgCtx->fpu, &pDebugCtx->fpu, sizeof(pDebugCtx->fpu)))
+        {
+            if (pIemCpu->cInstructions != 1)
+            {
+                RTAssertMsg2Weak("  the FPU state differs\n");
+                cDiffs++;
+            }
+            else
+                RTAssertMsg2Weak("  the FPU state differs - happends the first time...\n");
+        }
+        CHECK_FIELD(rip);
+        uint32_t fFlagsMask = UINT32_MAX;
+        if (pIemCpu->fMulDivHack)
+            fFlagsMask &= ~(X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF);
+        if (pIemCpu->fShlHack)
+            fFlagsMask &= ~(X86_EFL_OF);
+        if ((pOrgCtx->rflags.u & fFlagsMask) != (pDebugCtx->rflags.u & fFlagsMask))
+        {
+            RTAssertMsg2Weak("  rflags differs - iem=%08llx rem=%08llx\n", pDebugCtx->rflags.u, pOrgCtx->rflags.u);
+            CHECK_BIT_FIELD(rflags.Bits.u1CF);
+            /*CHECK_BIT_FIELD(rflags.Bits.u1Reserved0); */ /** @todo why does REM set this? */
+            CHECK_BIT_FIELD(rflags.Bits.u1PF);
+            CHECK_BIT_FIELD(rflags.Bits.u1Reserved1);
+            CHECK_BIT_FIELD(rflags.Bits.u1AF);
+            CHECK_BIT_FIELD(rflags.Bits.u1Reserved2);
+            CHECK_BIT_FIELD(rflags.Bits.u1ZF);
+            CHECK_BIT_FIELD(rflags.Bits.u1SF);
+            CHECK_BIT_FIELD(rflags.Bits.u1TF);
+            CHECK_BIT_FIELD(rflags.Bits.u1IF);
+            CHECK_BIT_FIELD(rflags.Bits.u1DF);
+            CHECK_BIT_FIELD(rflags.Bits.u1OF);
+            CHECK_BIT_FIELD(rflags.Bits.u2IOPL);
+            CHECK_BIT_FIELD(rflags.Bits.u1NT);
+            CHECK_BIT_FIELD(rflags.Bits.u1Reserved3);
+            CHECK_BIT_FIELD(rflags.Bits.u1RF);
+            CHECK_BIT_FIELD(rflags.Bits.u1VM);
+            CHECK_BIT_FIELD(rflags.Bits.u1AC);
+            CHECK_BIT_FIELD(rflags.Bits.u1VIF);
+            CHECK_BIT_FIELD(rflags.Bits.u1VIP);
+            CHECK_BIT_FIELD(rflags.Bits.u1ID);
+        }
+
+        if (pIemCpu->cIOReads != 1)
+            CHECK_FIELD(rax);
+        CHECK_FIELD(rcx);
+        CHECK_FIELD(rdx);
+        CHECK_FIELD(rbx);
+        CHECK_FIELD(rsp);
+        CHECK_FIELD(rbp);
+        CHECK_FIELD(rsi);
+        CHECK_FIELD(rdi);
+        CHECK_FIELD(r8);
+        CHECK_FIELD(r9);
+        CHECK_FIELD(r10);
+        CHECK_FIELD(r11);
+        CHECK_FIELD(r12);
+        CHECK_FIELD(r13);
+        CHECK_FIELD(cs);
+        CHECK_FIELD(csHid.u64Base);
+        CHECK_FIELD(csHid.u32Limit);
+        CHECK_FIELD(csHid.Attr.u);
+        CHECK_FIELD(ss);
+        CHECK_FIELD(ssHid.u64Base);
+        CHECK_FIELD(ssHid.u32Limit);
+        CHECK_FIELD(ssHid.Attr.u);
+        CHECK_FIELD(ds);
+        CHECK_FIELD(dsHid.u64Base);
+        CHECK_FIELD(dsHid.u32Limit);
+        CHECK_FIELD(dsHid.Attr.u);
+        CHECK_FIELD(es);
+        CHECK_FIELD(esHid.u64Base);
+        CHECK_FIELD(esHid.u32Limit);
+        CHECK_FIELD(esHid.Attr.u);
+        CHECK_FIELD(fs);
+        CHECK_FIELD(fsHid.u64Base);
+        CHECK_FIELD(fsHid.u32Limit);
+        CHECK_FIELD(fsHid.Attr.u);
+        CHECK_FIELD(gs);
+        CHECK_FIELD(gsHid.u64Base);
+        CHECK_FIELD(gsHid.u32Limit);
+        CHECK_FIELD(gsHid.Attr.u);
+        CHECK_FIELD(cr0);
+        CHECK_FIELD(cr2);
+        CHECK_FIELD(cr3);
+        CHECK_FIELD(cr4);
+        CHECK_FIELD(dr[0]);
+        CHECK_FIELD(dr[1]);
+        CHECK_FIELD(dr[2]);
+        CHECK_FIELD(dr[3]);
+        CHECK_FIELD(dr[6]);
+        CHECK_FIELD(dr[7]);
+        CHECK_FIELD(gdtr.cbGdt);
+        CHECK_FIELD(gdtr.pGdt);
+        CHECK_FIELD(idtr.cbIdt);
+        CHECK_FIELD(idtr.pIdt);
+        CHECK_FIELD(ldtr);
+        CHECK_FIELD(ldtrHid.u64Base);
+        CHECK_FIELD(ldtrHid.u32Limit);
+        CHECK_FIELD(ldtrHid.Attr.u);
+        CHECK_FIELD(tr);
+        CHECK_FIELD(trHid.u64Base);
+        CHECK_FIELD(trHid.u32Limit);
+        CHECK_FIELD(trHid.Attr.u);
+        CHECK_FIELD(SysEnter.cs);
+        CHECK_FIELD(SysEnter.eip);
+        CHECK_FIELD(SysEnter.esp);
+        CHECK_FIELD(msrEFER);
+        CHECK_FIELD(msrSTAR);
+        CHECK_FIELD(msrPAT);
+        CHECK_FIELD(msrLSTAR);
+        CHECK_FIELD(msrCSTAR);
+        CHECK_FIELD(msrSFMASK);
+        CHECK_FIELD(msrKERNELGSBASE);
+
+        if (cDiffs != 0)
+            AssertFailed();
+# undef CHECK_FIELD
+    }
+    pIemCpu->CTX_SUFF(pCtx) = pOrgCtx;
+}
+
+#endif /* IEM_VERIFICATION_MODE && IN_RING3 */
+
+
+/**
+ * Execute one instruction.
+ *
+ * @return  Strict VBox status code.
+ * @param   pVCpu       The current virtual CPU.
+ */
+VMMDECL(VBOXSTRICTRC) IEMExecOne(PVMCPU pVCpu)
+{
+    PIEMCPU  pIemCpu = &pVCpu->iem.s;
+#ifdef DEBUG
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+    char     szInstr[256];
+    uint32_t cbInstr = 0;
+    DBGFR3DisasInstrEx(pVCpu->pVMR3, pVCpu->idCpu, 0, 0,
+                       DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
+                       szInstr, sizeof(szInstr), &cbInstr);
+
+    Log2(("**** "
+          " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
+          " eip=%08x esp=%08x ebp=%08x iopl=%d\n"
+          " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
+          " %s\n"
+          ,
+          pCtx->eax, pCtx->ebx, pCtx->ecx, pCtx->edx, pCtx->esi, pCtx->edi,
+          pCtx->eip, pCtx->esp, pCtx->ebp, pCtx->eflags.Bits.u2IOPL,
+          (RTSEL)pCtx->cs, (RTSEL)pCtx->ss, (RTSEL)pCtx->ds, (RTSEL)pCtx->es,
+          (RTSEL)pCtx->fs, (RTSEL)pCtx->gs, pCtx->eflags.u,
+          szInstr));
+#endif
+#if defined(IEM_VERIFICATION_MODE) && defined(IN_RING3)
+    iemExecVerificationModeSetup(pIemCpu);
+#endif
+
+    /*
+     * Do the decoding and emulation.
+     */
+    VBOXSTRICTRC rcStrict = iemInitDecoderAndPrefetchOpcodes(pIemCpu);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    rcStrict = FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    if (rcStrict == VINF_SUCCESS)
+        pIemCpu->cInstructions++;
+
+    /* Execute the next instruction as well if a cli, pop ss or
+       mov ss, Gr has just completed successfully. */
+    if (   rcStrict == VINF_SUCCESS
+        && VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+        && EMGetInhibitInterruptsPC(pVCpu) == pIemCpu->CTX_SUFF(pCtx)->rip )
+    {
+        rcStrict = iemInitDecoderAndPrefetchOpcodes(pIemCpu);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+            rcStrict = FNIEMOP_CALL(g_apfnOneByteMap[b]);
+            if (rcStrict == VINF_SUCCESS)
+                pIemCpu->cInstructions++;
+        }
+    }
+
+    /*
+     * Assert some sanity.
+     */
+#ifdef DEBUG
+    AssertMsg(pIemCpu->offOpcode == cbInstr || rcStrict != VINF_SUCCESS, ("%u %u\n", pIemCpu->offOpcode, cbInstr));
+#endif
+#if defined(IEM_VERIFICATION_MODE) && defined(IN_RING3)
+    iemExecVerificationModeCheck(pIemCpu);
+#endif
+    return rcStrict;
+}
+
Index: /trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm	(revision 36768)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm	(revision 36768)
@@ -0,0 +1,840 @@
+; $Id$
+;; @file
+; IEM - Instruction Implementation in Assembly.
+;
+
+; Copyright (C) 2011 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;   Header Files                                                               ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%include "VBox/asmdefs.mac"
+%include "VBox/err.mac"
+%include "VBox/x86.mac"
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;   Defined Constants And Macros                                               ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;
+; We employ some macro assembly here to hid the calling convention differences.
+;
+%ifdef RT_ARCH_AMD64
+ %macro PROLOGUE_1_ARGS 0
+ %endmacro
+ %macro EPILOGUE_1_ARGS 0
+ %endmacro
+ %macro PROLOGUE_2_ARGS 0
+ %endmacro
+ %macro EPILOGUE_2_ARGS 0
+ %endmacro
+ %macro PROLOGUE_3_ARGS 0
+ %endmacro
+ %macro EPILOGUE_3_ARGS 0
+ %endmacro
+ %macro PROLOGUE_4_ARGS 0
+ %endmacro
+ %macro EPILOGUE_4_ARGS 0
+ %endmacro
+
+ %ifdef ASM_CALL64_GCC
+  %define A0        rdi
+  %define A0_32     edi
+  %define A0_16     di
+  %define A0_8      dil
+
+  %define A1        rsi
+  %define A1_32     esi
+  %define A1_16     si
+  %define A1_8      sil
+
+  %define A2        rdx
+  %define A2_32     edx
+  %define A2_16     dx
+  %define A2_8      dl
+
+  %define A3        rcx
+  %define A3_32     ecx
+  %define A3_16     cx
+ %endif
+
+ %ifdef ASM_CALL64_MSC
+  %define A0        rcx
+  %define A0_32     ecx
+  %define A0_16     cx
+  %define A0_8      cl
+
+  %define A1        rdx
+  %define A1_32     edx
+  %define A1_16     dx
+  %define A1_8      dl
+
+  %define A2        r8
+  %define A2_32     r8d
+  %define A2_16     r8w
+  %define A2_8      r8b
+
+  %define A3        r9
+  %define A3_32     r9d
+  %define A3_16     r9w
+ %endif
+
+ %define T0         rax
+ %define T0_32      eax
+ %define T0_16      ax
+ %define T0_8       al
+
+ %define T1         r11
+ %define T1_32      r11d
+ %define T1_16      r11w
+ %define T1_8       r11b
+
+%else
+ ; x86
+ %macro PROLOGUE_1_ARGS 0
+        push    edi
+ %endmacro
+ %macro EPILOGUE_1_ARGS 0
+        pop     edi
+ %endmacro
+
+ %macro PROLOGUE_2_ARGS 0
+        push    edi
+ %endmacro
+ %macro EPILOGUE_2_ARGS 0
+        pop     edi
+ %endmacro
+
+ %macro PROLOGUE_3_ARGS 0
+        push    ebx
+        mov     ebx, [esp + 4 + 4]
+        push    edi
+ %endmacro
+ %macro EPILOGUE_3_ARGS 0
+        pop     edi
+        pop     ebx
+ %endmacro
+
+ %macro PROLOGUE_4_ARGS 0
+        push    ebx
+        push    edi
+        push    esi
+        mov     ebx, [esp + 12 + 4 + 0]
+        mov     esi, [esp + 12 + 4 + 4]
+ %endmacro
+ %macro EPILOGUE_4_ARGS 0
+        pop     esi
+        pop     edi
+        pop     ebx
+ %endmacro
+
+ %define A0         ecx
+ %define A0_32      ecx
+ %define A0_16       cx
+ %define A0_8        cl
+
+ %define A1         edx
+ %define A1_32      edx
+ %define A1_16      dx
+ %define A1_8       dl
+
+ %define A2         ebx
+ %define A2_32      ebx
+ %define A2_16      bx
+ %define A2_8       bl
+
+ %define A3         esi
+ %define A3_32      esi
+ %define A3_16      si
+
+ %define T0         eax
+ %define T0_32      eax
+ %define T0_16      ax
+ %define T0_8       al
+
+ %define T1         edi
+ %define T1_32      edi
+ %define T1_16      di
+%endif
+
+
+;;
+; Load the relevant flags from [%1] if there are undefined flags (%3).
+;
+; @remarks      Clobbers T0, stack. Changes EFLAGS.
+; @param        A2      The register pointing to the flags.
+; @param        1       The parameter (A0..A3) pointing to the eflags.
+; @param        2       The set of modified flags.
+; @param        3       The set of undefined flags.
+;
+%macro IEM_MAYBE_LOAD_FLAGS 3
+ ;%if (%3) != 0
+        pushf                           ; store current flags
+        mov     T0_32, [%1]             ; load the guest flags
+        and     dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
+        and     T0_32, (%2 | %3)        ; select the modified and undefined flags.
+        or      [xSP], T0               ; merge guest flags with host flags.
+        popf                            ; load the mixed flags.
+ ;%endif
+%endmacro
+
+;;
+; Update the flag.
+;
+; @remarks  Clobbers T0, T1, stack.
+; @param        1       The register pointing to the EFLAGS.
+; @param        2       The mask of modified flags to save.
+; @param        3       The mask of undefined flags to (maybe) save.
+;
+%macro IEM_SAVE_FLAGS 3
+ %if (%2 | %3) != 0
+        pushf
+        pop     T1
+        mov     T0_32, [%1]             ; flags
+        and     T0_32, ~(%2 | %3)       ; clear the modified & undefined flags.
+  %ifndef IEM_VERIFICATION_MODE
+        and     T1_32, (%2 | %3)        ; select the modified and undefined flags.
+  %else
+        and     T1_32, (%2)             ; select the modified flags, leave the
+                                        ; undefined cleared. This matches REM better.
+  %endif
+        or      T0_32, T1_32            ; combine the flags.
+        mov     [%1], T0_32             ; save the flags.
+ %endif
+%endmacro
+
+
+;;
+; Macro for implementing a binary operator.
+;
+; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
+; variants, except on 32-bit system where the 64-bit accesses requires hand
+; coding.
+;
+; All the functions takes a pointer to the destination memory operand in A0,
+; the source register operand in A1 and a pointer to eflags in A2.
+;
+; @param        1       The instruction mnemonic.
+; @param        2       Non-zero if there should be a locked version.
+; @param        3       If non-zero, load the affected flags prior to
+;                       execution (for dealing with undefined flags).
+; @param        4       The affected flags.
+;
+%macro IEMIMPL_BIN_OP 4
+BEGINPROC iemAImpl_ %+ %1 %+ _u8
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        %1      byte [A0], A1_8
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u8
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u16
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        %1      word [A0], A1_16
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u16
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u32
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        %1      dword [A0], A1_32
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u32
+
+ %ifdef RT_ARCH_AMD64
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        %1      qword [A0], A1
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+ %else ; stub it for now - later, replace with hand coded stuff.
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        int3
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+  %endif ; !RT_ARCH_AMD64
+
+ %if %2 != 0 ; locked versions requested?
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u8_locked
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        lock %1 byte [A0], A1_8
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u16_locked
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        lock %1 word [A0], A1_16
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u32_locked
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        lock %1 dword [A0], A1_32
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
+
+  %ifdef RT_ARCH_AMD64
+BEGINPROC iemAImpl_ %+ %1 %+ _u64_locked
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS           A2, %3, %4
+        lock %1 qword [A0], A1
+        IEM_SAVE_FLAGS                 A2, %3, %4
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
+  %else ; stub it for now - later, replace with hand coded stuff.
+BEGINPROC iemAImpl_ %+ %1 %+ _u64_locked
+        int3
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
+  %endif ; !RT_ARCH_AMD64
+ %endif ; locked
+%endmacro
+
+;            instr,lock,modified-flags.
+IEMIMPL_BIN_OP add,  1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
+IEMIMPL_BIN_OP adc,  1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
+IEMIMPL_BIN_OP sub,  1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
+IEMIMPL_BIN_OP sbb,  1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
+IEMIMPL_BIN_OP or,   1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
+IEMIMPL_BIN_OP xor,  1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
+IEMIMPL_BIN_OP and,  1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
+IEMIMPL_BIN_OP cmp,  0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
+IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
+
+
+;
+; IMUL is also a similar but yet different case (no lock, no mem dst).
+; The rDX:rAX variant of imul is handled together with mul further down.
+;
+BEGINPROC iemAImpl_imul_two_u16
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+        imul    A1_16, word [A0]
+        mov     [A0], A1_16
+        IEM_SAVE_FLAGS       A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_imul_two_u16
+
+BEGINPROC iemAImpl_imul_two_u32
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+        imul    A1_32, dword [A0]
+        mov     [A0], A1_32
+        IEM_SAVE_FLAGS       A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_imul_two_u32
+
+BEGINPROC iemAImpl_imul_two_u64
+        PROLOGUE_3_ARGS
+%ifdef RT_ARCH_AMD64
+        IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+        imul    A1, qword [A0]
+        mov     [A0], A1
+        IEM_SAVE_FLAGS       A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+%else
+        int3 ;; @todo implement me
+%endif
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_imul_two_u64
+
+
+;;
+; Macro for implementing a unary operator.
+;
+; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
+; variants, except on 32-bit system where the 64-bit accesses requires hand
+; coding.
+;
+; All the functions takes a pointer to the destination memory operand in A0,
+; the source register operand in A1 and a pointer to eflags in A2.
+;
+; @param        1       The instruction mnemonic.
+; @param        2       The modified flags.
+; @param        3       The undefined flags.
+;
+%macro IEMIMPL_UNARY_OP 3
+BEGINPROC iemAImpl_ %+ %1 %+ _u8
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        %1      byte [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u8
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u8_locked
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        lock %1 byte [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u16
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        %1      word [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u16
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u16_locked
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        lock %1 word [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u32
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        %1      dword [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u32
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u32_locked
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        lock %1 dword [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
+
+ %ifdef RT_ARCH_AMD64
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        %1      qword [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u64_locked
+        PROLOGUE_2_ARGS
+        IEM_MAYBE_LOAD_FLAGS A1, %2, %3
+        lock %1 qword [A0]
+        IEM_SAVE_FLAGS       A1, %2, %3
+        EPILOGUE_2_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
+ %else
+        ; stub them for now.
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        int3
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+BEGINPROC iemAImpl_ %+ %1 %+ _u64_locked
+        int3
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
+ %endif
+
+%endmacro
+
+IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
+IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
+IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
+IEMIMPL_UNARY_OP not, 0, 0
+
+
+
+;;
+; Macro for implementing a shift operation.
+;
+; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
+; 32-bit system where the 64-bit accesses requires hand coding.
+;
+; All the functions takes a pointer to the destination memory operand in A0,
+; the shift count in A1 and a pointer to eflags in A2.
+;
+; @param        1       The instruction mnemonic.
+; @param        2       The modified flags.
+; @param        3       The undefined flags.
+;
+; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
+;
+%macro IEMIMPL_SHIFT_OP 3
+BEGINPROC iemAImpl_ %+ %1 %+ _u8
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS A2, %2, %3
+ %ifdef ASM_CALL64_GCC
+        mov     cl, A1_8
+        %1      byte [A0], cl
+ %else
+        xchg    A1, A0
+        %1      byte [A1], cl
+ %endif
+        IEM_SAVE_FLAGS       A2, %2, %3
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u8
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u16
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS A2, %2, %3
+ %ifdef ASM_CALL64_GCC
+        mov     cl, A1_8
+        %1      word [A0], cl
+ %else
+        xchg    A1, A0
+        %1      word [A1], cl
+ %endif
+        IEM_SAVE_FLAGS       A2, %2, %3
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u16
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u32
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS A2, %2, %3
+ %ifdef ASM_CALL64_GCC
+        mov     cl, A1_8
+        %1      dword [A0], cl
+ %else
+        xchg    A1, A0
+        %1      dword [A1], cl
+ %endif
+        IEM_SAVE_FLAGS       A2, %2, %3
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u32
+
+ %ifdef RT_ARCH_AMD64
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS A2, %2, %3
+ %ifdef ASM_CALL64_GCC
+        mov     cl, A1_8
+        %1      qword [A0], cl
+ %else
+        xchg    A1, A0
+        %1      qword [A1], cl
+ %endif
+        IEM_SAVE_FLAGS       A2, %2, %3
+        EPILOGUE_3_ARGS
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+ %else ; stub it for now - later, replace with hand coded stuff.
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        int3
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+  %endif ; !RT_ARCH_AMD64
+
+%endmacro
+
+IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
+IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
+IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
+IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
+IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
+IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
+IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
+
+
+;;
+; Macro for implemeting a multiplication operations.
+;
+; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
+; 32-bit system where the 64-bit accesses requires hand coding.
+;
+; The 8-bit function only operates on AX, so it takes no DX pointer.  The other
+; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
+; pointer to eflags in A3.
+;
+; The functions all return 0 so the caller can be used for div/idiv as well as
+; for the mul/imul implementation.
+;
+; @param        1       The instruction mnemonic.
+; @param        2       The modified flags.
+; @param        3       The undefined flags.
+;
+; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
+;
+%macro IEMIMPL_MUL_OP 3
+BEGINPROC iemAImpl_ %+ %1 %+ _u8
+        PROLOGUE_3_ARGS
+        IEM_MAYBE_LOAD_FLAGS A2, %2, %3
+        mov     al, [A0]
+        %1      A1_8
+        mov     [A0], ax
+        IEM_SAVE_FLAGS       A2, %2, %3
+        EPILOGUE_3_ARGS
+        xor     eax, eax
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u8
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u16
+        PROLOGUE_4_ARGS
+        IEM_MAYBE_LOAD_FLAGS A3, %2, %3
+        mov     ax, [A0]
+ %ifdef ASM_CALL64_GCC
+        %1      A2_16
+        mov     [A0], ax
+        mov     [A1], dx
+ %else
+        mov     T1, A1
+        %1      A2_16
+        mov     [A0], ax
+        mov     [T1], dx
+ %endif
+        IEM_SAVE_FLAGS       A3, %2, %3
+        EPILOGUE_4_ARGS
+        xor     eax, eax
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u16
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u32
+        PROLOGUE_4_ARGS
+        IEM_MAYBE_LOAD_FLAGS A3, %2, %3
+        mov     eax, [A0]
+ %ifdef ASM_CALL64_GCC
+        %1      A2_32
+        mov     [A0], eax
+        mov     [A1], edx
+ %else
+        mov     T1, A1
+        %1      A2_32
+        mov     [A0], eax
+        mov     [T1], edx
+ %endif
+        IEM_SAVE_FLAGS       A3, %2, %3
+        EPILOGUE_4_ARGS
+        xor     eax, eax
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u32
+
+ %ifdef RT_ARCH_AMD64
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        PROLOGUE_4_ARGS
+        IEM_MAYBE_LOAD_FLAGS A3, %2, %3
+        mov     rax, [A0]
+ %ifdef ASM_CALL64_GCC
+        %1      A2
+        mov     [A0], rax
+        mov     [A1], rdx
+ %else
+        mov     T1, A1
+        %1      A2
+        mov     [A0], rax
+        mov     [T1], rdx
+ %endif
+        IEM_SAVE_FLAGS       A3, %2, %3
+        EPILOGUE_4_ARGS
+        xor     eax, eax
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+ %else ; stub it for now - later, replace with hand coded stuff.
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        int3
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+  %endif ; !RT_ARCH_AMD64
+
+%endmacro
+
+IEMIMPL_MUL_OP mul,  (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
+
+
+;;
+; Macro for implemeting a division operations.
+;
+; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
+; 32-bit system where the 64-bit accesses requires hand coding.
+;
+; The 8-bit function only operates on AX, so it takes no DX pointer.  The other
+; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
+; pointer to eflags in A3.
+;
+; The functions all return 0 on success and -1 if a divide error should be
+; raised by the caller.
+;
+; @param        1       The instruction mnemonic.
+; @param        2       The modified flags.
+; @param        3       The undefined flags.
+;
+; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
+;
+%macro IEMIMPL_DIV_OP 3
+BEGINPROC iemAImpl_ %+ %1 %+ _u8
+        PROLOGUE_3_ARGS
+
+        test    A1_8, A1_8
+        jz      .div_zero
+        ;; @todo test for overflow
+
+        IEM_MAYBE_LOAD_FLAGS A2, %2, %3
+        mov     ax, [A0]
+        %1      A1_8
+        mov     [A0], ax
+        IEM_SAVE_FLAGS       A2, %2, %3
+        xor     eax, eax
+
+.return:
+        EPILOGUE_3_ARGS
+        ret
+.div_zero:
+        mov     eax, -1
+        jmp     .return
+ENDPROC iemAImpl_ %+ %1 %+ _u8
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u16
+        PROLOGUE_4_ARGS
+
+        test    A1_16, A1_16
+        jz      .div_zero
+        ;; @todo test for overflow
+
+        IEM_MAYBE_LOAD_FLAGS A3, %2, %3
+ %ifdef ASM_CALL64_GCC
+        mov     T1, A2
+        mov     ax, [A0]
+        mov     dx, [A1]
+        %1      T1_16
+        mov     [A0], ax
+        mov     [A1], dx
+ %else
+        mov     T1, A1
+        mov     ax, [A0]
+        mov     dx, [T1]
+        %1      A2_16
+        mov     [A0], ax
+        mov     [T1], dx
+ %endif
+        IEM_SAVE_FLAGS       A3, %2, %3
+        xor     eax, eax
+
+.return:
+        EPILOGUE_4_ARGS
+        ret
+.div_zero:
+        mov     eax, -1
+        jmp     .return
+ENDPROC iemAImpl_ %+ %1 %+ _u16
+
+BEGINPROC iemAImpl_ %+ %1 %+ _u32
+        PROLOGUE_4_ARGS
+
+        test    A1_32, A1_32
+        jz      .div_zero
+        ;; @todo test for overflow
+
+        IEM_MAYBE_LOAD_FLAGS A3, %2, %3
+        mov     eax, [A0]
+ %ifdef ASM_CALL64_GCC
+        mov     T1, A2
+        mov     eax, [A0]
+        mov     edx, [A1]
+        %1      T1_32
+        mov     [A0], eax
+        mov     [A1], edx
+ %else
+        mov     T1, A1
+        mov     eax, [A0]
+        mov     edx, [T1]
+        %1      A2_32
+        mov     [A0], eax
+        mov     [T1], edx
+ %endif
+        IEM_SAVE_FLAGS       A3, %2, %3
+        xor     eax, eax
+
+.return:
+        EPILOGUE_4_ARGS
+        ret
+.div_zero:
+        mov     eax, -1
+        jmp     .return
+ENDPROC iemAImpl_ %+ %1 %+ _u32
+
+ %ifdef RT_ARCH_AMD64
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        PROLOGUE_4_ARGS
+
+        test    A1, A1
+        jz      .div_zero
+        ;; @todo test for overflow
+
+        IEM_MAYBE_LOAD_FLAGS A3, %2, %3
+        mov     rax, [A0]
+ %ifdef ASM_CALL64_GCC
+        mov     T1, A2
+        mov     rax, [A0]
+        mov     rdx, [A1]
+        %1      T1
+        mov     [A0], rax
+        mov     [A1], rdx
+ %else
+        mov     T1, A1
+        mov     rax, [A0]
+        mov     rdx, [T1]
+        %1      A2
+        mov     [A0], rax
+        mov     [T1], rdx
+ %endif
+        IEM_SAVE_FLAGS       A3, %2, %3
+        xor     eax, eax
+
+.return:
+        EPILOGUE_4_ARGS
+        ret
+.div_zero:
+        mov     eax, -1
+        jmp     .return
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+ %else ; stub it for now - later, replace with hand coded stuff.
+BEGINPROC iemAImpl_ %+ %1 %+ _u64
+        int3
+        ret
+ENDPROC iemAImpl_ %+ %1 %+ _u64
+  %endif ; !RT_ARCH_AMD64
+
+%endmacro
+
+IEMIMPL_DIV_OP div,  0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
+IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
+
Index: /trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp	(revision 36768)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp	(revision 36768)
@@ -0,0 +1,48 @@
+/* $Id$ */
+/** @file
+ * IEM - Instruction Implementation in Assembly, portable C variant.
+ */
+
+/*
+ * Copyright (C) 2011 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+/*******************************************************************************
+*   Header Files                                                               *
+*******************************************************************************/
+#include "IEMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/x86.h>
+
+#if 0
+
+
+IEM_DECL_IMPL_DEF(void, iemImpl_add_u8,(uint8_t  *pu8Dst,  uint8_t  u8Src,  uint32_t *pEFlags))
+{
+    /* incorrect sketch (testing fastcall + gcc) */
+    uint8_t u8Dst = *pu8Dst;
+    uint8_t u8Res = u8Dst + u8Src;
+    *pu8Dst = u8Res;
+
+    if (u8Res)
+        *pEFlags &= X86_EFL_ZF;
+    else
+        *pEFlags |= X86_EFL_ZF;
+}
+
+IEM_DECL_IMPL_DEF(void, iemImpl_add_u8_locked,(uint8_t  *pu8Dst,  uint8_t  u8Src,  uint32_t *pEFlags))
+{
+    iemImpl_add_u8(pu8Dst, u8Src, pEFlags);
+}
+
+
+#endif
+
Index: /trunk/src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h	(revision 36768)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAllCImplStrInstr.cpp.h	(revision 36768)
@@ -0,0 +1,686 @@
+/* $Id$ */
+/** @file
+ * IEM - String Instruction Implementation Code Template.
+ */
+
+/*
+ * Copyright (C) 2011 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*******************************************************************************
+*   Defined Constants And Macros                                               *
+*******************************************************************************/
+#if OP_SIZE == 8
+# define OP_rAX     al
+#elif OP_SIZE == 16
+# define OP_rAX     ax
+#elif OP_SIZE == 32
+# define OP_rAX     eax
+#elif OP_SIZE == 64
+# define OP_rAX     rax
+#else
+# error "Bad OP_SIZE."
+#endif
+#define OP_TYPE                     RT_CONCAT3(uint,OP_SIZE,_t)
+
+#if ADDR_SIZE == 16
+# define ADDR_rDI   di
+# define ADDR_rSI   si
+# define ADDR_rCX   cx
+# define ADDR2_TYPE uint32_t
+#elif ADDR_SIZE == 32
+# define ADDR_rDI   edi
+# define ADDR_rSI   esi
+# define ADDR_rCX   ecx
+# define ADDR2_TYPE uint32_t
+#elif ADDR_SIZE == 64
+# define ADDR_rDI   rdi
+# define ADDR_rSI   rsi
+# define ADDR_rCX   rcx
+# define ADDR2_TYPE uint64_t
+#else
+# error "Bad ADDR_SIZE."
+#endif
+#define ADDR_TYPE                   RT_CONCAT3(uint,ADDR_SIZE,_t)
+
+
+
+/**
+ * Implements 'REP MOVS'.
+ */
+IEM_CIMPL_DEF_1(RT_CONCAT4(iemCImpl_rep_movs_op,OP_SIZE,_addr,ADDR_SIZE), uint8_t, iEffSeg)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Setup.
+     */
+    ADDR_TYPE       uCounterReg = pCtx->ADDR_rCX;
+    if (uCounterReg == 0)
+        return VINF_SUCCESS;
+
+    PCCPUMSELREGHID pSrcHid = iemSRegGetHid(pIemCpu, iEffSeg);
+    VBOXSTRICTRC rcStrict = iemMemSegCheckReadAccessEx(pIemCpu, pSrcHid, iEffSeg);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    rcStrict = iemMemSegCheckWriteAccessEx(pIemCpu, &pCtx->esHid, X86_SREG_ES);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    int8_t const    cbIncr      = pCtx->eflags.Bits.u1DF ? -(OP_SIZE / 8) : (OP_SIZE / 8);
+    ADDR_TYPE       uSrcAddrReg = pCtx->ADDR_rSI;
+    ADDR_TYPE       uDstAddrReg = pCtx->ADDR_rDI;
+
+    /*
+     * The loop.
+     */
+    do
+    {
+        /*
+         * Do segmentation and virtual page stuff.
+         */
+#if ADDR_SIZE != 64
+        ADDR2_TYPE  uVirtSrcAddr = (uint32_t)pSrcHid->u64Base    + uSrcAddrReg;
+        ADDR2_TYPE  uVirtDstAddr = (uint32_t)pCtx->esHid.u64Base + uDstAddrReg;
+#else
+        uint64_t    uVirtSrcAddr = uSrcAddrReg;
+        uint64_t    uVirtDstAddr = uDstAddrReg;
+#endif
+        uint32_t    cLeftSrcPage = (PAGE_SIZE - (uVirtSrcAddr & PAGE_OFFSET_MASK)) / (OP_SIZE / 8);
+        if (cLeftSrcPage > uCounterReg)
+            cLeftSrcPage = uCounterReg;
+        uint32_t    cLeftDstPage = (PAGE_SIZE - (uVirtDstAddr & PAGE_OFFSET_MASK)) / (OP_SIZE / 8);
+        uint32_t    cLeftPage = RT_MIN(cLeftSrcPage, cLeftDstPage);
+
+        if (   cLeftPage > 0 /* can be null if unaligned, do one fallback round. */
+            && cbIncr > 0    /** @todo Implement reverse direction string ops. */
+#if ADDR_SIZE != 64
+            && uSrcAddrReg < pSrcHid->u32Limit
+            && uSrcAddrReg + (cLeftPage * (OP_SIZE / 8)) <= pSrcHid->u32Limit
+            && uDstAddrReg < pCtx->esHid.u32Limit
+            && uDstAddrReg + (cLeftPage * (OP_SIZE / 8)) <= pCtx->esHid.u32Limit
+#endif
+           )
+        {
+            RTGCPHYS GCPhysSrcMem;
+            rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, uVirtSrcAddr, IEM_ACCESS_DATA_R, &GCPhysSrcMem);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            RTGCPHYS GCPhysDstMem;
+            rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, uVirtDstAddr, IEM_ACCESS_DATA_W, &GCPhysDstMem);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            /*
+             * If we can map the page without trouble, do a block processing
+             * until the end of the current page.
+             */
+            OP_TYPE *puDstMem;
+            rcStrict = iemMemPageMap(pIemCpu, GCPhysDstMem, IEM_ACCESS_DATA_W, (void **)&puDstMem);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                OP_TYPE const *puSrcMem;
+                rcStrict = iemMemPageMap(pIemCpu, GCPhysSrcMem, IEM_ACCESS_DATA_W, (void **)&puSrcMem);
+                if (rcStrict == VINF_SUCCESS)
+                {
+                    /* Perform the operation. */
+                    memcpy(puDstMem, puSrcMem, cLeftPage * (OP_SIZE / 8));
+
+                    /* Update the registers. */
+                    uSrcAddrReg += cLeftPage * cbIncr;
+                    uDstAddrReg += cLeftPage * cbIncr;
+                    uCounterReg -= cLeftPage;
+                    continue;
+                }
+            }
+        }
+
+        /*
+         * Fallback - slow processing till the end of the current page.
+         * In the cross page boundrary case we will end up here with cLeftPage
+         * as 0, we execute one loop then.
+         */
+        do
+        {
+            OP_TYPE uValue;
+            rcStrict = RT_CONCAT(iemMemFetchDataU,OP_SIZE)(pIemCpu, &uValue, iEffSeg, uSrcAddrReg);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+            rcStrict = RT_CONCAT(iemMemStoreDataU,OP_SIZE)(pIemCpu, X86_SREG_ES, uDstAddrReg, uValue);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            uSrcAddrReg += cbIncr;
+            uDstAddrReg += cbIncr;
+            uCounterReg--;
+            cLeftPage--;
+        } while ((int32_t)cLeftPage > 0);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+    } while (uCounterReg != 0);
+
+    /*
+     * Update the registers.
+     */
+    pCtx->ADDR_rCX = uCounterReg;
+    pCtx->ADDR_rDI = uDstAddrReg;
+    pCtx->ADDR_rSI = uSrcAddrReg;
+    if (rcStrict == VINF_SUCCESS)
+        iemRegAddToRip(pIemCpu, cbInstr);
+
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'REP STOS'.
+ */
+IEM_CIMPL_DEF_0(RT_CONCAT4(iemCImpl_stos_,OP_rAX,_m,ADDR_SIZE))
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Setup.
+     */
+    ADDR_TYPE       uCounterReg = pCtx->ADDR_rCX;
+    if (uCounterReg == 0)
+        return VINF_SUCCESS;
+
+    VBOXSTRICTRC rcStrict = iemMemSegCheckWriteAccessEx(pIemCpu, &pCtx->esHid, X86_SREG_ES);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    int8_t const    cbIncr      = pCtx->eflags.Bits.u1DF ? -(OP_SIZE / 8) : (OP_SIZE / 8);
+    OP_TYPE const   uValue      = pCtx->OP_rAX;
+    ADDR_TYPE       uAddrReg    = pCtx->ADDR_rDI;
+
+    /*
+     * The loop.
+     */
+    do
+    {
+        /*
+         * Do segmentation and virtual page stuff.
+         */
+#if ADDR_SIZE != 64
+        ADDR2_TYPE  uVirtAddr = (uint32_t)pCtx->esHid.u64Base + uAddrReg;
+#else
+        uint64_t    uVirtAddr = uAddrReg;
+#endif
+        uint32_t    cLeftPage = (PAGE_SIZE - (uVirtAddr & PAGE_OFFSET_MASK)) / (OP_SIZE / 8);
+        if (cLeftPage > uCounterReg)
+            cLeftPage = uCounterReg;
+        if (   cLeftPage > 0 /* can be null if unaligned, do one fallback round. */
+            && cbIncr > 0    /** @todo Implement reverse direction string ops. */
+#if ADDR_SIZE != 64
+            && uAddrReg < pCtx->esHid.u32Limit
+            && uAddrReg + (cLeftPage * (OP_SIZE / 8)) <= pCtx->esHid.u32Limit
+#endif
+           )
+        {
+            RTGCPHYS GCPhysMem;
+            rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, uVirtAddr, IEM_ACCESS_DATA_W, &GCPhysMem);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            /*
+             * If we can map the page without trouble, do a block processing
+             * until the end of the current page.
+             */
+            OP_TYPE *puMem;
+            rcStrict = iemMemPageMap(pIemCpu, GCPhysMem, IEM_ACCESS_DATA_W, (void **)&puMem);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                /* Update the regs first so we can loop on cLeftPage. */
+                uCounterReg -= cLeftPage;
+                uAddrReg    += cLeftPage * cbIncr;
+
+                /* Do the memsetting. */
+#if OP_SIZE == 8
+                memset(puMem, uValue, cLeftPage);
+/*#elif OP_SIZE == 32
+                ASMMemFill32(puMem, cLeftPage * (OP_SIZE / 8), uValue);*/
+#else
+                while (cLeftPage-- > 0)
+                    *puMem++ = uValue;
+#endif
+
+                /* If unaligned, we drop thru and do the page crossing access
+                   below. Otherwise, do the next page. */
+                if (!(uVirtAddr & (OP_SIZE - 1)))
+                    continue;
+                if (uCounterReg == 0)
+                    break;
+                cLeftPage = 0;
+            }
+        }
+
+        /*
+         * Fallback - slow processing till the end of the current page.
+         * In the cross page boundrary case we will end up here with cLeftPage
+         * as 0, we execute one loop then.
+         */
+        do
+        {
+            rcStrict = RT_CONCAT(iemMemStoreDataU,OP_SIZE)(pIemCpu, X86_SREG_ES, uAddrReg, uValue);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+            uAddrReg += cbIncr;
+            uCounterReg--;
+            cLeftPage--;
+        } while ((int32_t)cLeftPage > 0);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+    } while (uCounterReg != 0);
+
+    /*
+     * Update the registers.
+     */
+    pCtx->ADDR_rCX = uCounterReg;
+    pCtx->ADDR_rDI = uAddrReg;
+    if (rcStrict == VINF_SUCCESS)
+        iemRegAddToRip(pIemCpu, cbInstr);
+
+    return rcStrict;
+}
+
+
+#if OP_SIZE != 64
+
+/**
+ * Implements 'INS' (no rep)
+ */
+IEM_CIMPL_DEF_0(RT_CONCAT4(iemCImpl_ins_op,OP_SIZE,_addr,ADDR_SIZE))
+{
+    PVM             pVM  = IEMCPU_TO_VM(pIemCpu);
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    VBOXSTRICTRC    rcStrict;
+
+    /*
+     * ASSUMES the #GP for I/O permission is taken first, then any #GP for
+     * segmentation and finally any #PF due to virtual address translation.
+     * ASSUMES nothing is read from the I/O port before traps are taken.
+     */
+    rcStrict = iemHlpCheckPortIOPermission(pIemCpu, pCtx, pCtx->dx, OP_SIZE / 8);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    OP_TYPE        *puMem;
+    rcStrict = iemMemMap(pIemCpu, (void **)&puMem, OP_SIZE / 8, X86_SREG_ES, pCtx->ADDR_rDI, IEM_ACCESS_DATA_W);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    uint32_t        u32Value;
+    rcStrict = IOMIOPortRead(pVM, pCtx->dx, &u32Value, OP_SIZE / 8);
+    if (IOM_SUCCESS(rcStrict))
+    {
+        VBOXSTRICTRC rcStrict2 = iemMemCommitAndUnmap(pIemCpu, puMem, IEM_ACCESS_DATA_W);
+        if (RT_LIKELY(rcStrict2 == VINF_SUCCESS))
+        {
+            if (!pCtx->eflags.Bits.u1DF)
+                pCtx->ADDR_rDI += OP_SIZE / 8;
+            else
+                pCtx->ADDR_rDI -= OP_SIZE / 8;
+            iemRegAddToRip(pIemCpu, cbInstr);
+        }
+        /* iemMemMap already check permissions, so this may only be real errors
+           or access handlers medling. The access handler case is going to
+           cause misbehavior if the instruction is re-interpreted or smth. So,
+           we fail with an internal error here instead. */
+        else
+            AssertLogRelFailedReturn(VERR_INTERNAL_ERROR_3);
+    }
+    return rcStrict;
+}
+
+/**
+ * Implements 'REP INS'.
+ */
+IEM_CIMPL_DEF_0(RT_CONCAT4(iemCImpl_rep_ins_op,OP_SIZE,_addr,ADDR_SIZE))
+{
+    PVM         pVM  = IEMCPU_TO_VM(pIemCpu);
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Setup.
+     */
+    uint16_t const  u16Port    = pCtx->dx;
+    VBOXSTRICTRC rcStrict = iemHlpCheckPortIOPermission(pIemCpu, pCtx, u16Port, OP_SIZE / 8);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    ADDR_TYPE       uCounterReg = pCtx->ADDR_rCX;
+    if (uCounterReg == 0)
+        return VINF_SUCCESS;
+
+    rcStrict = iemMemSegCheckWriteAccessEx(pIemCpu, &pCtx->esHid, X86_SREG_ES);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    int8_t const    cbIncr      = pCtx->eflags.Bits.u1DF ? -(OP_SIZE / 8) : (OP_SIZE / 8);
+    ADDR_TYPE       uAddrReg    = pCtx->ADDR_rDI;
+
+    /*
+     * The loop.
+     */
+    do
+    {
+        /*
+         * Do segmentation and virtual page stuff.
+         */
+#if ADDR_SIZE != 64
+        ADDR2_TYPE  uVirtAddr = (uint32_t)pCtx->esHid.u64Base + uAddrReg;
+#else
+        uint64_t    uVirtAddr = uAddrReg;
+#endif
+        uint32_t    cLeftPage = (PAGE_SIZE - (uVirtAddr & PAGE_OFFSET_MASK)) / (OP_SIZE / 8);
+        if (cLeftPage > uCounterReg)
+            cLeftPage = uCounterReg;
+        if (   cLeftPage > 0 /* can be null if unaligned, do one fallback round. */
+            && cbIncr > 0    /** @todo Implement reverse direction string ops. */
+#if ADDR_SIZE != 64
+            && uAddrReg < pCtx->esHid.u32Limit
+            && uAddrReg + (cLeftPage * (OP_SIZE / 8)) <= pCtx->esHid.u32Limit
+#endif
+           )
+        {
+            RTGCPHYS GCPhysMem;
+            rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, uVirtAddr, IEM_ACCESS_DATA_W, &GCPhysMem);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            /*
+             * If we can map the page without trouble, we would've liked to use
+             * an string I/O method to do the work, but the current IOM
+             * interface doesn't match our current approach. So, do a regular
+             * loop instead.
+             */
+            /** @todo Change the I/O manager interface to make use of
+             *        mapped buffers instead of leaving those bits to the
+             *        device implementation? */
+            OP_TYPE *puMem;
+            rcStrict = iemMemPageMap(pIemCpu, GCPhysMem, IEM_ACCESS_DATA_W, (void **)&puMem);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                while (cLeftPage-- > 0)
+                {
+                    uint32_t u32Value;
+                    rcStrict = IOMIOPortRead(pVM, u16Port, &u32Value, OP_SIZE / 8);
+                    if (!IOM_SUCCESS(rcStrict))
+                        break;
+                    *puMem++     = (OP_TYPE)u32Value;
+                    uAddrReg    += cbIncr;
+                    uCounterReg -= 1;
+
+                    if (rcStrict != VINF_SUCCESS)
+                    {
+                        /** @todo massage rc */
+                        break;
+                    }
+                }
+                if (rcStrict != VINF_SUCCESS)
+                    break;
+
+                /* If unaligned, we drop thru and do the page crossing access
+                   below. Otherwise, do the next page. */
+                if (!(uVirtAddr & (OP_SIZE - 1)))
+                    continue;
+                if (uCounterReg == 0)
+                    break;
+                cLeftPage = 0;
+            }
+        }
+
+        /*
+         * Fallback - slow processing till the end of the current page.
+         * In the cross page boundrary case we will end up here with cLeftPage
+         * as 0, we execute one loop then.
+         *
+         * Note! We ASSUME the CPU will raise #PF or #GP before access the
+         *       I/O port, otherwise it wouldn't really be restartable.
+         */
+        /** @todo investigate what the CPU actually does with \#PF/\#GP
+         *        during INS. */
+        do
+        {
+            OP_TYPE *puMem;
+            rcStrict = iemMemMap(pIemCpu, (void **)&puMem, OP_SIZE / 8, X86_SREG_ES, uAddrReg, IEM_ACCESS_DATA_W);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            uint32_t u32Value;
+            rcStrict = IOMIOPortRead(pVM, u16Port, &u32Value, OP_SIZE / 8);
+            if (!IOM_SUCCESS(rcStrict))
+                break;
+
+            VBOXSTRICTRC rcStrict2 = iemMemCommitAndUnmap(pIemCpu, puMem, IEM_ACCESS_DATA_W);
+            AssertLogRelBreakStmt(rcStrict2 == VINF_SUCCESS, rcStrict = VERR_INTERNAL_ERROR_3); /* See non-rep version. */
+
+            uAddrReg += cbIncr;
+            uCounterReg--;
+            cLeftPage--;
+            if (rcStrict != VINF_SUCCESS)
+            {
+                /** @todo massage IOM status codes! */
+                break;
+            }
+        } while ((int32_t)cLeftPage > 0);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+    } while (uCounterReg != 0);
+
+    /*
+     * Update the registers.
+     */
+    pCtx->ADDR_rCX = uCounterReg;
+    pCtx->ADDR_rDI = uAddrReg;
+    if (rcStrict == VINF_SUCCESS)
+        iemRegAddToRip(pIemCpu, cbInstr);
+
+    return rcStrict;
+}
+
+
+/**
+ * Implements 'OUTS' (no rep)
+ */
+IEM_CIMPL_DEF_0(RT_CONCAT4(iemCImpl_outs_op,OP_SIZE,_addr,ADDR_SIZE))
+{
+    PVM             pVM  = IEMCPU_TO_VM(pIemCpu);
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    VBOXSTRICTRC    rcStrict;
+
+    /*
+     * ASSUMES the #GP for I/O permission is taken first, then any #GP for
+     * segmentation and finally any #PF due to virtual address translation.
+     * ASSUMES nothing is read from the I/O port before traps are taken.
+     */
+    rcStrict = iemHlpCheckPortIOPermission(pIemCpu, pCtx, pCtx->dx, OP_SIZE / 8);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    OP_TYPE uValue;
+    rcStrict = RT_CONCAT(iemMemFetchDataU,OP_SIZE)(pIemCpu, &uValue, X86_SREG_ES, pCtx->ADDR_rDI);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        rcStrict = IOMIOPortWrite(pVM, pCtx->dx, uValue, OP_SIZE / 8);
+        if (IOM_SUCCESS(rcStrict))
+        {
+            if (!pCtx->eflags.Bits.u1DF)
+                pCtx->ADDR_rDI += OP_SIZE / 8;
+            else
+                pCtx->ADDR_rDI -= OP_SIZE / 8;
+            iemRegAddToRip(pIemCpu, cbInstr);
+            /** @todo massage IOM status codes. */
+        }
+    }
+    return rcStrict;
+}
+
+/**
+ * Implements 'REP OUTS'.
+ */
+IEM_CIMPL_DEF_0(RT_CONCAT4(iemCImpl_rep_outs_op,OP_SIZE,_addr,ADDR_SIZE))
+{
+    PVM         pVM  = IEMCPU_TO_VM(pIemCpu);
+    PCPUMCTX    pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Setup.
+     */
+    uint16_t const  u16Port    = pCtx->dx;
+    VBOXSTRICTRC rcStrict = iemHlpCheckPortIOPermission(pIemCpu, pCtx, u16Port, OP_SIZE / 8);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    ADDR_TYPE       uCounterReg = pCtx->ADDR_rCX;
+    if (uCounterReg == 0)
+        return VINF_SUCCESS;
+
+    rcStrict = iemMemSegCheckReadAccessEx(pIemCpu, &pCtx->esHid, X86_SREG_ES);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    int8_t const    cbIncr      = pCtx->eflags.Bits.u1DF ? -(OP_SIZE / 8) : (OP_SIZE / 8);
+    ADDR_TYPE       uAddrReg    = pCtx->ADDR_rDI;
+
+    /*
+     * The loop.
+     */
+    do
+    {
+        /*
+         * Do segmentation and virtual page stuff.
+         */
+#if ADDR_SIZE != 64
+        ADDR2_TYPE  uVirtAddr = (uint32_t)pCtx->esHid.u64Base + uAddrReg;
+#else
+        uint64_t    uVirtAddr = uAddrReg;
+#endif
+        uint32_t    cLeftPage = (PAGE_SIZE - (uVirtAddr & PAGE_OFFSET_MASK)) / (OP_SIZE / 8);
+        if (cLeftPage > uCounterReg)
+            cLeftPage = uCounterReg;
+        if (   cLeftPage > 0 /* can be null if unaligned, do one fallback round. */
+            && cbIncr > 0    /** @todo Implement reverse direction string ops. */
+#if ADDR_SIZE != 64
+            && uAddrReg < pCtx->esHid.u32Limit
+            && uAddrReg + (cLeftPage * (OP_SIZE / 8)) <= pCtx->esHid.u32Limit
+#endif
+           )
+        {
+            RTGCPHYS GCPhysMem;
+            rcStrict = iemMemPageTranslateAndCheckAccess(pIemCpu, uVirtAddr, IEM_ACCESS_DATA_R, &GCPhysMem);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            /*
+             * If we can map the page without trouble, we would've liked to use
+             * an string I/O method to do the work, but the current IOM
+             * interface doesn't match our current approach. So, do a regular
+             * loop instead.
+             */
+            /** @todo Change the I/O manager interface to make use of
+             *        mapped buffers instead of leaving those bits to the
+             *        device implementation? */
+            OP_TYPE const *puMem;
+            rcStrict = iemMemPageMap(pIemCpu, GCPhysMem, IEM_ACCESS_DATA_R, (void **)&puMem);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                while (cLeftPage-- > 0)
+                {
+                    uint32_t u32Value = *puMem++;
+                    rcStrict = IOMIOPortWrite(pVM, u16Port, u32Value, OP_SIZE / 8);
+                    if (!IOM_SUCCESS(rcStrict))
+                        break;
+                    uAddrReg    += cbIncr;
+                    uCounterReg -= 1;
+
+                    if (rcStrict != VINF_SUCCESS)
+                    {
+                        /** @todo massage IOM rc */
+                        break;
+                    }
+                }
+                if (rcStrict != VINF_SUCCESS)
+                    break;
+
+                /* If unaligned, we drop thru and do the page crossing access
+                   below. Otherwise, do the next page. */
+                if (!(uVirtAddr & (OP_SIZE - 1)))
+                    continue;
+                if (uCounterReg == 0)
+                    break;
+                cLeftPage = 0;
+            }
+        }
+
+        /*
+         * Fallback - slow processing till the end of the current page.
+         * In the cross page boundrary case we will end up here with cLeftPage
+         * as 0, we execute one loop then.
+         *
+         * Note! We ASSUME the CPU will raise #PF or #GP before access the
+         *       I/O port, otherwise it wouldn't really be restartable.
+         */
+        /** @todo investigate what the CPU actually does with \#PF/\#GP
+         *        during INS. */
+        do
+        {
+            OP_TYPE uValue;
+            rcStrict = RT_CONCAT(iemMemFetchDataU,OP_SIZE)(pIemCpu, &uValue, X86_SREG_ES, uAddrReg);
+            if (rcStrict != VINF_SUCCESS)
+                break;
+
+            rcStrict = IOMIOPortWrite(pVM, u16Port, uValue, OP_SIZE / 8);
+            if (!IOM_SUCCESS(rcStrict))
+                break;
+
+            uAddrReg += cbIncr;
+            uCounterReg--;
+            cLeftPage--;
+            if (rcStrict != VINF_SUCCESS)
+            {
+                /** @todo massage IOM status codes! */
+                break;
+            }
+        } while ((int32_t)cLeftPage > 0);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+    } while (uCounterReg != 0);
+
+    /*
+     * Update the registers.
+     */
+    pCtx->ADDR_rCX = uCounterReg;
+    pCtx->ADDR_rDI = uAddrReg;
+    if (rcStrict == VINF_SUCCESS)
+        iemRegAddToRip(pIemCpu, cbInstr);
+
+    return rcStrict;
+}
+
+#endif /* OP_SIZE != 64-bit */
+
+
+#undef OP_rAX
+#undef OP_SIZE
+#undef ADDR_SIZE
+#undef ADDR_rDI
+#undef ADDR_rSI
+#undef ADDR_rCX
+#undef ADDR_rIP
+#undef ADDR2_TYPE
+#undef ADDR_TYPE
+#undef ADDR2_TYPE
+
Index: /trunk/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h	(revision 36768)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h	(revision 36768)
@@ -0,0 +1,8073 @@
+/* $Id$ */
+/** @file
+ * IEM - Instruction Decoding and Emulation.
+ */
+
+/*
+ * Copyright (C) 2011 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/**
+ * Common worker for instructions like ADD, AND, OR, ++ with a byte
+ * memory/register as the destination.
+ *
+ * @param   pImpl       Pointer to the instruction implementation (assembly).
+ */
+FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rm_r8, PCIEMOPBINSIZES, pImpl)
+{
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
+        IEM_MC_ARG(uint8_t,    u8Src,   1);
+        IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * We're accessing memory.
+         * Note! We're putting the eflags on the stack here so we can commit them
+         *       after the memory.
+         */
+        uint32_t const fAccess = pImpl->pfnLockedU8 ? IEM_ACCESS_DATA_RW : IEM_ACCESS_DATA_R; /* CMP,TEST */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(uint8_t *,  pu8Dst,           0);
+        IEM_MC_ARG(uint8_t,    u8Src,            1);
+        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_MEM_MAP(pu8Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_GREG_U8(u8Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
+        else
+            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU8, pu8Dst, u8Src, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, fAccess);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for word/dword/qword instructions like ADD, AND, OR, ++ with
+ * memory/register as the destination.
+ *
+ * @param   pImpl       Pointer to the instruction implementation (assembly).
+ */
+FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rm_rv, PCIEMOPBINSIZES, pImpl)
+{
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *, pu16Dst, 0);
+                IEM_MC_ARG(uint16_t,   u16Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *, pu32Dst, 0);
+                IEM_MC_ARG(uint32_t,   u32Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *, pu64Dst, 0);
+                IEM_MC_ARG(uint64_t,   u64Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    else
+    {
+        /*
+         * We're accessing memory.
+         * Note! We're putting the eflags on the stack here so we can commit them
+         *       after the memory.
+         */
+        uint32_t const fAccess = pImpl->pfnLockedU8 ? IEM_ACCESS_DATA_RW : IEM_ACCESS_DATA_R /* CMP,TEST */;
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *, pu16Dst,          0);
+                IEM_MC_ARG(uint16_t,   u16Src,           1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_MEM_MAP(pu16Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_GREG_U16(u16Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *, pu32Dst,          0);
+                IEM_MC_ARG(uint32_t,   u32Src,           1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_MEM_MAP(pu32Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_GREG_U32(u32Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *, pu64Dst,          0);
+                IEM_MC_ARG(uint64_t,   u64Src,           1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_MEM_MAP(pu64Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_GREG_U64(u64Src, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for byte instructions like ADD, AND, OR, ++ with a register as
+ * the destination.
+ *
+ * @param   pImpl       Pointer to the instruction implementation (assembly).
+ */
+FNIEMOP_DEF_1(iemOpHlpBinaryOperator_r8_rm, PCIEMOPBINSIZES, pImpl)
+{
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
+        IEM_MC_ARG(uint8_t,    u8Src,   1);
+        IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+        IEM_MC_FETCH_GREG_U8(u8Src, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_GREG_U8(pu8Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * We're accessing memory.
+         */
+        IEM_MC_BEGIN(3, 1);
+        IEM_MC_ARG(uint8_t *,  pu8Dst,  0);
+        IEM_MC_ARG(uint8_t,    u8Src,   1);
+        IEM_MC_ARG(uint32_t *, pEFlags, 2);
+        IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_FETCH_MEM_U8(u8Src, pIemCpu->iEffSeg, GCPtrEffDst);
+        IEM_MC_REF_GREG_U8(pu8Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for word/dword/qword instructions like ADD, AND, OR, ++ with a
+ * register as the destination.
+ *
+ * @param   pImpl       Pointer to the instruction implementation (assembly).
+ */
+FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rv_rm, PCIEMOPBINSIZES, pImpl)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *, pu16Dst, 0);
+                IEM_MC_ARG(uint16_t,   u16Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_FETCH_GREG_U16(u16Src, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *, pu32Dst, 0);
+                IEM_MC_ARG(uint32_t,   u32Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_FETCH_GREG_U32(u32Src, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_GREG_U32(pu32Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *, pu64Dst, 0);
+                IEM_MC_ARG(uint64_t,   u64Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+
+                IEM_MC_FETCH_GREG_U64(u64Src, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    else
+    {
+        /*
+         * We're accessing memory.
+         */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t *, pu16Dst, 0);
+                IEM_MC_ARG(uint16_t,   u16Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U16(u16Src, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_GREG_U16(pu16Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint32_t *, pu32Dst, 0);
+                IEM_MC_ARG(uint32_t,   u32Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U32(u32Src, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_GREG_U32(pu32Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint64_t *, pu64Dst, 0);
+                IEM_MC_ARG(uint64_t,   u64Src,  1);
+                IEM_MC_ARG(uint32_t *, pEFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U64(u64Src, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_GREG_U64(pu64Dst, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for instructions like ADD, AND, OR, ++ with working on AL with
+ * a byte immediate.
+ *
+ * @param   pImpl       Pointer to the instruction implementation (assembly).
+ */
+FNIEMOP_DEF_1(iemOpHlpBinaryOperator_AL_Ib, PCIEMOPBINSIZES, pImpl)
+{
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    IEM_MC_BEGIN(3, 0);
+    IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
+    IEM_MC_ARG_CONST(uint8_t,   u8Src,/*=*/ u8Imm,  1);
+    IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+
+    IEM_MC_REF_GREG_U8(pu8Dst, X86_GREG_xAX);
+    IEM_MC_REF_EFLAGS(pEFlags);
+    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
+
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common worker for instructions like ADD, AND, OR, ++ with working on
+ * AX/EAX/RAX with a word/dword immediate.
+ *
+ * @param   pImpl       Pointer to the instruction implementation (assembly).
+ */
+FNIEMOP_DEF_1(iemOpHlpBinaryOperator_rAX_Iz, PCIEMOPBINSIZES, pImpl)
+{
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+
+            IEM_MC_BEGIN(3, 0);
+            IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+            IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ u16Imm,    1);
+            IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+            IEM_MC_REF_GREG_U16(pu16Dst, X86_GREG_xAX);
+            IEM_MC_REF_EFLAGS(pEFlags);
+            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+
+            IEM_MC_BEGIN(3, 0);
+            IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+            IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ u32Imm,    1);
+            IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+            IEM_MC_REF_GREG_U32(pu32Dst, X86_GREG_xAX);
+            IEM_MC_REF_EFLAGS(pEFlags);
+            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+
+            IEM_MC_BEGIN(3, 0);
+            IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+            IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ u64Imm,    1);
+            IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+            IEM_MC_REF_GREG_U64(pu64Dst, X86_GREG_xAX);
+            IEM_MC_REF_EFLAGS(pEFlags);
+            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcodes 0xf1, 0xd6. */
+FNIEMOP_DEF(iemOp_Invalid)
+{
+    IEMOP_MNEMONIC("Invalid");
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+
+/** @name ..... opcodes.
+ *
+ * @{
+ */
+
+/** @}  */
+
+
+/** @name Two byte opcodes (first byte 0x0f).
+ *
+ * @{
+ */
+
+/** Opcode 0x0f 0x00 /0. */
+FNIEMOP_DEF_1(iemOp_Grp6_sldt, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x00 /1. */
+FNIEMOP_DEF_1(iemOp_Grp6_str, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x00 /2. */
+FNIEMOP_DEF_1(iemOp_Grp6_lldt, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x00 /3. */
+FNIEMOP_DEF_1(iemOp_Grp6_ltr, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x00 /4. */
+FNIEMOP_DEF_1(iemOp_Grp6_verr, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x00 /5. */
+FNIEMOP_DEF_1(iemOp_Grp6_verw, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x00. */
+FNIEMOP_DEF(iemOp_Grp6)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: return FNIEMOP_CALL_1(iemOp_Grp6_sldt, bRm);
+        case 1: return FNIEMOP_CALL_1(iemOp_Grp6_str,  bRm);
+        case 2: return FNIEMOP_CALL_1(iemOp_Grp6_lldt, bRm);
+        case 3: return FNIEMOP_CALL_1(iemOp_Grp6_ltr,  bRm);
+        case 4: return FNIEMOP_CALL_1(iemOp_Grp6_verr, bRm);
+        case 5: return FNIEMOP_CALL_1(iemOp_Grp6_verw, bRm);
+        case 6: return IEMOP_RAISE_INVALID_OPCODE();
+        case 7: return IEMOP_RAISE_INVALID_OPCODE();
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF_1(iemOp_Grp7_sgdt, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmcall)
+{
+    AssertFailed();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmlaunch)
+{
+    AssertFailed();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmresume)
+{
+    AssertFailed();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /0. */
+FNIEMOP_DEF(iemOp_Grp7_vmxoff)
+{
+    AssertFailed();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /1. */
+FNIEMOP_DEF_1(iemOp_Grp7_sidt, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /1. */
+FNIEMOP_DEF(iemOp_Grp7_monitor)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /1. */
+FNIEMOP_DEF(iemOp_Grp7_mwait)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /2. */
+FNIEMOP_DEF_1(iemOp_Grp7_lgdt, uint8_t, bRm)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    IEMMODE enmEffOpSize = pIemCpu->enmCpuMode == IEMMODE_64BIT
+                         ? IEMMODE_64BIT
+                         : pIemCpu->enmEffOpSize;
+    IEM_MC_BEGIN(3, 1);
+    IEM_MC_ARG_CONST(uint8_t,   iEffSeg, /*=*/pIemCpu->iEffSeg,     0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                        1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/enmEffOpSize,  2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_lgdt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x01 /2. */
+FNIEMOP_DEF(iemOp_Grp7_xgetbv)
+{
+    AssertFailed();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /2. */
+FNIEMOP_DEF(iemOp_Grp7_xsetbv)
+{
+    AssertFailed();
+    return IEMOP_RAISE_INVALID_OPCODE();
+}
+
+
+/** Opcode 0x0f 0x01 /3. */
+FNIEMOP_DEF_1(iemOp_Grp7_lidt, uint8_t, bRm)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    IEMMODE enmEffOpSize = pIemCpu->enmCpuMode == IEMMODE_64BIT
+                         ? IEMMODE_64BIT
+                         : pIemCpu->enmEffOpSize;
+    IEM_MC_BEGIN(3, 1);
+    IEM_MC_ARG_CONST(uint8_t,   iEffSeg, /*=*/pIemCpu->iEffSeg,     0);
+    IEM_MC_ARG(RTGCPTR,         GCPtrEffSrc,                        1);
+    IEM_MC_ARG_CONST(IEMMODE,   enmEffOpSizeArg,/*=*/enmEffOpSize,  2);
+    IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+    IEM_MC_CALL_CIMPL_3(iemCImpl_lidt, iEffSeg, GCPtrEffSrc, enmEffOpSizeArg);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x01 /4. */
+FNIEMOP_DEF_1(iemOp_Grp7_smsw, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /6. */
+FNIEMOP_DEF_1(iemOp_Grp7_lmsw, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /7. */
+FNIEMOP_DEF_1(iemOp_Grp7_invlpg, uint8_t, bRm)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /7. */
+FNIEMOP_DEF(iemOp_Grp7_swapgs)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01 /7. */
+FNIEMOP_DEF(iemOp_Grp7_rdtscp)
+{
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0x0f 0x01. */
+FNIEMOP_DEF(iemOp_Grp7)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0:
+            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+                return FNIEMOP_CALL_1(iemOp_Grp7_sgdt, bRm);
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_vmcall);
+                case 2: return FNIEMOP_CALL(iemOp_Grp7_vmlaunch);
+                case 3: return FNIEMOP_CALL(iemOp_Grp7_vmresume);
+                case 4: return FNIEMOP_CALL(iemOp_Grp7_vmxoff);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 1:
+            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+                return FNIEMOP_CALL_1(iemOp_Grp7_sidt, bRm);
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 0: return FNIEMOP_CALL(iemOp_Grp7_monitor);
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_mwait);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 2:
+            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+                return FNIEMOP_CALL_1(iemOp_Grp7_lgdt, bRm);
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 0: return FNIEMOP_CALL(iemOp_Grp7_xgetbv);
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_xsetbv);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 3:
+            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+                return FNIEMOP_CALL_1(iemOp_Grp7_lidt, bRm);
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 4:
+            return FNIEMOP_CALL_1(iemOp_Grp7_smsw, bRm);
+
+        case 5:
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        case 6:
+            return FNIEMOP_CALL_1(iemOp_Grp7_lmsw, bRm);
+
+        case 7:
+            if ((bRm & X86_MODRM_MOD_MASK) != (3 << X86_MODRM_MOD_SHIFT))
+                return FNIEMOP_CALL_1(iemOp_Grp7_invlpg, bRm);
+            switch (bRm & X86_MODRM_RM_MASK)
+            {
+                case 0: return FNIEMOP_CALL(iemOp_Grp7_swapgs);
+                case 1: return FNIEMOP_CALL(iemOp_Grp7_rdtscp);
+            }
+            return IEMOP_RAISE_INVALID_OPCODE();
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0x0f 0x02. */
+FNIEMOP_STUB(iemOp_lar_Gv_Ew);
+/** Opcode 0x0f 0x03. */
+FNIEMOP_STUB(iemOp_lsl_Gv_Ew);
+/** Opcode 0x0f 0x04. */
+FNIEMOP_STUB(iemOp_syscall);
+/** Opcode 0x0f 0x05. */
+FNIEMOP_STUB(iemOp_clts);
+/** Opcode 0x0f 0x06. */
+FNIEMOP_STUB(iemOp_sysret);
+/** Opcode 0x0f 0x08. */
+FNIEMOP_STUB(iemOp_invd);
+/** Opcode 0x0f 0x09. */
+FNIEMOP_STUB(iemOp_wbinvd);
+/** Opcode 0x0f 0x0b. */
+FNIEMOP_STUB(iemOp_ud2);
+/** Opcode 0x0f 0x0d. */
+FNIEMOP_STUB(iemOp_nop_Ev_prefetch);
+/** Opcode 0x0f 0x0e. */
+FNIEMOP_STUB(iemOp_femms);
+/** Opcode 0x0f 0x0f. */
+FNIEMOP_STUB(iemOp_3Dnow);
+/** Opcode 0x0f 0x10. */
+FNIEMOP_STUB(iemOp_movups_Vps_Wps__movupd_Vpd_Wpd__movss_Vss_Wss__movsd_Vsd_Wsd);
+/** Opcode 0x0f 0x11. */
+FNIEMOP_STUB(iemOp_movups_Wps_Vps__movupd_Wpd_Vpd__movss_Wss_Vss__movsd_Vsd_Wsd);
+/** Opcode 0x0f 0x12. */
+FNIEMOP_STUB(iemOp_movlps_Vq_Mq__movhlps_Vq_Uq__movlpd_Vq_Mq__movsldup_Vq_Wq__movddup_Vq_Wq);
+/** Opcode 0x0f 0x13. */
+FNIEMOP_STUB(iemOp_movlps_Mq_Vq__movlpd_Mq_Vq);
+/** Opcode 0x0f 0x14. */
+FNIEMOP_STUB(iemOp_unpckhlps_Vps_Wq__unpcklpd_Vpd_Wq);
+/** Opcode 0x0f 0x15. */
+FNIEMOP_STUB(iemOp_unpckhps_Vps_Wq__unpckhpd_Vpd_Wq);
+/** Opcode 0x0f 0x16. */
+FNIEMOP_STUB(iemOp_movhps_Vq_Mq__movlhps_Vq_Uq__movhpd_Vq_Mq__movshdup_Vq_Wq);
+/** Opcode 0x0f 0x17. */
+FNIEMOP_STUB(iemOp_movhps_Mq_Vq__movhpd_Mq_Vq);
+/** Opcode 0x0f 0x18. */
+FNIEMOP_STUB(iemOp_prefetch_Grp16);
+
+
+/** Opcode 0x0f 0x20. */
+FNIEMOP_DEF(iemOp_mov_Rd_Cd)
+{
+    /* mod is ignored, as is operand size overrides. */
+    IEMOP_MNEMONIC("mov Rd,Cd");
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+        pIemCpu->enmEffOpSize = pIemCpu->enmDefOpSize = IEMMODE_64BIT;
+    else
+        pIemCpu->enmEffOpSize = pIemCpu->enmDefOpSize = IEMMODE_32BIT;
+
+    /** @todo Verify that the the invalid lock sequence exception (\#UD) is raised
+     *        before the privilege level violation (\#GP). */
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg;
+    if (pIemCpu->fPrefixes & IEM_OP_PRF_LOCK)
+    {
+        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
+        if (!IEM_IS_AMD_CPUID_FEATURE_PRESENT_ECX(X86_CPUID_AMD_FEATURE_ECX_CR8L))
+            return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+        iCrReg |= 8;
+    }
+    switch (iCrReg)
+    {
+        case 0: case 2: case 3: case 4: case 8:
+            break;
+        default:
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Rd_Cd, (X86_MODRM_RM_MASK & bRm) | pIemCpu->uRexB, iCrReg);
+}
+
+
+/** Opcode 0x0f 0x21. */
+FNIEMOP_STUB(iemOp_mov_Rd_Dd);
+
+
+/** Opcode 0x0f 0x22. */
+FNIEMOP_DEF(iemOp_mov_Cd_Rd)
+{
+    /* mod is ignored, as is operand size overrides. */
+    IEMOP_MNEMONIC("mov Cd,Rd");
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+        pIemCpu->enmEffOpSize = pIemCpu->enmDefOpSize = IEMMODE_64BIT;
+    else
+        pIemCpu->enmEffOpSize = pIemCpu->enmDefOpSize = IEMMODE_32BIT;
+
+    /** @todo Verify that the the invalid lock sequence exception (\#UD) is raised
+     *        before the privilege level violation (\#GP). */
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    uint8_t iCrReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg;
+    if (pIemCpu->fPrefixes & IEM_OP_PRF_LOCK)
+    {
+        /* The lock prefix can be used to encode CR8 accesses on some CPUs. */
+        if (!IEM_IS_AMD_CPUID_FEATURE_PRESENT_ECX(X86_CPUID_AMD_FEATURE_ECX_CR8L))
+            return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+        iCrReg |= 8;
+    }
+    switch (iCrReg)
+    {
+        case 0: case 2: case 3: case 4: case 8:
+            break;
+        default:
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_mov_Cd_Rd, iCrReg, (X86_MODRM_RM_MASK & bRm) | pIemCpu->uRexB);
+}
+
+
+/** Opcode 0x0f 0x23. */
+FNIEMOP_STUB(iemOp_mov_Dd_Rd);
+/** Opcode 0x0f 0x24. */
+FNIEMOP_STUB(iemOp_mov_Rd_Td);
+/** Opcode 0x0f 0x26. */
+FNIEMOP_STUB(iemOp_mov_Td_Rd);
+/** Opcode 0x0f 0x28. */
+FNIEMOP_STUB(iemOp_movaps_Vps_Wps__movapd_Vpd_Wpd);
+/** Opcode 0x0f 0x29. */
+FNIEMOP_STUB(iemOp_movaps_Wps_Vps__movapd_Wpd_Vpd);
+/** Opcode 0x0f 0x2a. */
+FNIEMOP_STUB(iemOp_cvtpi2ps_Vps_Qpi__cvtpi2pd_Vpd_Qpi__cvtsi2ss_Vss_Ey__cvtsi2sd_Vsd_Ey);
+/** Opcode 0x0f 0x2b. */
+FNIEMOP_STUB(iemOp_movntps_Mps_Vps__movntpd_Mpd_Vpd);
+/** Opcode 0x0f 0x2c. */
+FNIEMOP_STUB(iemOp_cvttps2pi_Ppi_Wps__cvttpd2pi_Ppi_Wpd__cvttss2si_Gy_Wss__cvttsd2si_Yu_Wsd);
+/** Opcode 0x0f 0x2d. */
+FNIEMOP_STUB(iemOp_cvtps2pi_Ppi_Wps__cvtpd2pi_QpiWpd__cvtss2si_Gy_Wss__cvtsd2si_Gy_Wsd);
+/** Opcode 0x0f 0x2e. */
+FNIEMOP_STUB(iemOp_ucomiss_Vss_Wss__ucomisd_Vsd_Wsd);
+/** Opcode 0x0f 0x2f. */
+FNIEMOP_STUB(iemOp_comiss_Vss_Wss__comisd_Vsd_Wsd);
+/** Opcode 0x0f 0x30. */
+FNIEMOP_STUB(iemOp_wrmsr);
+/** Opcode 0x0f 0x31. */
+FNIEMOP_STUB(iemOp_rdtsc);
+/** Opcode 0x0f 0x33. */
+FNIEMOP_STUB(iemOp_rdmsr);
+/** Opcode 0x0f 0x34. */
+FNIEMOP_STUB(iemOp_rdpmc);
+/** Opcode 0x0f 0x34. */
+FNIEMOP_STUB(iemOp_sysenter);
+/** Opcode 0x0f 0x35. */
+FNIEMOP_STUB(iemOp_sysexit);
+/** Opcode 0x0f 0x37. */
+FNIEMOP_STUB(iemOp_getsec);
+/** Opcode 0x0f 0x38. */
+FNIEMOP_STUB(iemOp_3byte_Esc_A4);
+/** Opcode 0x0f 0x39. */
+FNIEMOP_STUB(iemOp_3byte_Esc_A5);
+/** Opcode 0x0f 0x3c (?). */
+FNIEMOP_STUB(iemOp_movnti_Gv_Ev);
+/** Opcode 0x0f 0x40. */
+FNIEMOP_STUB(iemOp_cmovo_Gv_Ev);
+/** Opcode 0x0f 0x41. */
+FNIEMOP_STUB(iemOp_cmovno_Gv_Ev);
+/** Opcode 0x0f 0x42. */
+FNIEMOP_STUB(iemOp_cmovc_Gv_Ev);
+/** Opcode 0x0f 0x43. */
+FNIEMOP_STUB(iemOp_cmovnc_Gv_Ev);
+/** Opcode 0x0f 0x44. */
+FNIEMOP_STUB(iemOp_cmove_Gv_Ev);
+/** Opcode 0x0f 0x45. */
+FNIEMOP_STUB(iemOp_cmovne_Gv_Ev);
+/** Opcode 0x0f 0x46. */
+FNIEMOP_STUB(iemOp_cmovbe_Gv_Ev);
+/** Opcode 0x0f 0x47. */
+FNIEMOP_STUB(iemOp_cmovnbe_Gv_Ev);
+/** Opcode 0x0f 0x48. */
+FNIEMOP_STUB(iemOp_cmovs_Gv_Ev);
+/** Opcode 0x0f 0x49. */
+FNIEMOP_STUB(iemOp_cmovns_Gv_Ev);
+/** Opcode 0x0f 0x4a. */
+FNIEMOP_STUB(iemOp_cmovp_Gv_Ev);
+/** Opcode 0x0f 0x4b. */
+FNIEMOP_STUB(iemOp_cmovnp_Gv_Ev);
+/** Opcode 0x0f 0x4c. */
+FNIEMOP_STUB(iemOp_cmovl_Gv_Ev);
+/** Opcode 0x0f 0x4d. */
+FNIEMOP_STUB(iemOp_cmovnl_Gv_Ev);
+/** Opcode 0x0f 0x4e. */
+FNIEMOP_STUB(iemOp_cmovle_Gv_Ev);
+/** Opcode 0x0f 0x4f. */
+FNIEMOP_STUB(iemOp_cmovnle_Gv_Ev);
+/** Opcode 0x0f 0x50. */
+FNIEMOP_STUB(iemOp_movmskps_Gy_Ups__movmskpd_Gy_Upd);
+/** Opcode 0x0f 0x51. */
+FNIEMOP_STUB(iemOp_sqrtps_Wps_Vps__sqrtpd_Wpd_Vpd__sqrtss_Vss_Wss__sqrtsd_Vsd_Wsd);
+/** Opcode 0x0f 0x52. */
+FNIEMOP_STUB(iemOp_rsqrtps_Wps_Vps__rsqrtss_Vss_Wss);
+/** Opcode 0x0f 0x53. */
+FNIEMOP_STUB(iemOp_rcpps_Wps_Vps__rcpss_Vs_Wss);
+/** Opcode 0x0f 0x54. */
+FNIEMOP_STUB(iemOp_andps_Vps_Wps__andpd_Wpd_Vpd);
+/** Opcode 0x0f 0x55. */
+FNIEMOP_STUB(iemOp_andnps_Vps_Wps__andnpd_Wpd_Vpd);
+/** Opcode 0x0f 0x56. */
+FNIEMOP_STUB(iemOp_orps_Wpd_Vpd__orpd_Wpd_Vpd);
+/** Opcode 0x0f 0x57. */
+FNIEMOP_STUB(iemOp_xorps_Vps_Wps__xorpd_Wpd_Vpd);
+/** Opcode 0x0f 0x58. */
+FNIEMOP_STUB(iemOp_addps_Vps_Wps__addpd_Vpd_Wpd__addss_Vss_Wss__addsd_Vsd_Wsd);
+/** Opcode 0x0f 0x59. */
+FNIEMOP_STUB(iemOp_mulps_Vps_Wps__mulpd_Vpd_Wpd__mulss_Vss__Wss__mulsd_Vsd_Wsd);
+/** Opcode 0x0f 0x5a. */
+FNIEMOP_STUB(iemOp_cvtps2pd_Vpd_Wps__cvtpd2ps_Vps_Wpd__cvtss2sd_Vsd_Wss__cvtsd2ss_Vss_Wsd);
+/** Opcode 0x0f 0x5b. */
+FNIEMOP_STUB(iemOp_cvtdq2ps_Vps_Wdq__cvtps2dq_Vdq_Wps__cvtps2dq_Vdq_Wps);
+/** Opcode 0x0f 0x5c. */
+FNIEMOP_STUB(iemOp_subps_Vps_Wps__subpd_Vps_Wdp__subss_Vss_Wss__subsd_Vsd_Wsd);
+/** Opcode 0x0f 0x5d. */
+FNIEMOP_STUB(iemOp_minps_Vps_Wps__minpd_Vpd_Wpd__minss_Vss_Wss__minsd_Vsd_Wsd);
+/** Opcode 0x0f 0x5e. */
+FNIEMOP_STUB(iemOp_divps_Vps_Wps__divpd_Vpd_Wpd__divss_Vss_Wss__divsd_Vsd_Wsd);
+/** Opcode 0x0f 0x5f. */
+FNIEMOP_STUB(iemOp_maxps_Vps_Wps__maxpd_Vpd_Wpd__maxss_Vss_Wss__maxsd_Vsd_Wsd);
+/** Opcode 0x0f 0x60. */
+FNIEMOP_STUB(iemOp_punpcklbw_Pq_Qd__punpcklbw_Vdq_Wdq);
+/** Opcode 0x0f 0x61. */
+FNIEMOP_STUB(iemOp_punpcklwd_Pq_Qd__punpcklwd_Vdq_Wdq);
+/** Opcode 0x0f 0x62. */
+FNIEMOP_STUB(iemOp_punpckldq_Pq_Qd__punpckldq_Vdq_Wdq);
+/** Opcode 0x0f 0x63. */
+FNIEMOP_STUB(iemOp_packsswb_Pq_Qq__packsswb_Vdq_Wdq);
+/** Opcode 0x0f 0x64. */
+FNIEMOP_STUB(iemOp_pcmpgtb_Pq_Qq__pcmpgtb_Vdq_Wdq);
+/** Opcode 0x0f 0x65. */
+FNIEMOP_STUB(iemOp_pcmpgtw_Pq_Qq__pcmpgtw_Vdq_Wdq);
+/** Opcode 0x0f 0x66. */
+FNIEMOP_STUB(iemOp_pcmpgtd_Pq_Qq__pcmpgtd_Vdq_Wdq);
+/** Opcode 0x0f 0x67. */
+FNIEMOP_STUB(iemOp_packuswb_Pq_Qq__packuswb_Vdq_Wdq);
+/** Opcode 0x0f 0x68. */
+FNIEMOP_STUB(iemOp_punpckhbw_Pq_Qq__punpckhbw_Vdq_Wdq);
+/** Opcode 0x0f 0x69. */
+FNIEMOP_STUB(iemOp_punpckhwd_Pq_Qd__punpckhwd_Vdq_Wdq);
+/** Opcode 0x0f 0x6a. */
+FNIEMOP_STUB(iemOp_punpckhdq_Pq_Qd__punpckhdq_Vdq_Wdq);
+/** Opcode 0x0f 0x6b. */
+FNIEMOP_STUB(iemOp_packssdw_Pq_Qd__packssdq_Vdq_Wdq);
+/** Opcode 0x0f 0x6c. */
+FNIEMOP_STUB(iemOp_punpcklqdq_Vdq_Wdq);
+/** Opcode 0x0f 0x6d. */
+FNIEMOP_STUB(iemOp_punpckhqdq_Vdq_Wdq);
+/** Opcode 0x0f 0x6e. */
+FNIEMOP_STUB(iemOp_movd_q_Pd_Ey__movd_q_Vy_Ey);
+/** Opcode 0x0f 0x6f. */
+FNIEMOP_STUB(iemOp_movq_Pq_Qq__movdqa_Vdq_Wdq__movdqu_Vdq_Wdq);
+/** Opcode 0x0f 0x70. */
+FNIEMOP_STUB(iemOp_pshufw_Pq_Qq_Ib__pshufd_Vdq_Wdq_Ib__pshufhw_Vdq_Wdq_Ib__pshuflq_Vdq_Wdq_Ib);
+/** Opcode 0x0f 0x71. */
+FNIEMOP_STUB(iemOp_Grp12);
+/** Opcode 0x0f 0x72. */
+FNIEMOP_STUB(iemOp_Grp13);
+/** Opcode 0x0f 0x73. */
+FNIEMOP_STUB(iemOp_Grp14);
+/** Opcode 0x0f 0x74. */
+FNIEMOP_STUB(iemOp_pcmpeqb_Pq_Qq__pcmpeqb_Vdq_Wdq);
+/** Opcode 0x0f 0x75. */
+FNIEMOP_STUB(iemOp_pcmpeqw_Pq_Qq__pcmpeqw_Vdq_Wdq);
+/** Opcode 0x0f 0x76. */
+FNIEMOP_STUB(iemOp_pcmped_Pq_Qq__pcmpeqd_Vdq_Wdq);
+/** Opcode 0x0f 0x77. */
+FNIEMOP_STUB(iemOp_emms);
+/** Opcode 0x0f 0x78. */
+FNIEMOP_STUB(iemOp_vmread);
+/** Opcode 0x0f 0x79. */
+FNIEMOP_STUB(iemOp_vmwrite);
+/** Opcode 0x0f 0x7c. */
+FNIEMOP_STUB(iemOp_haddpd_Vdp_Wpd__haddps_Vps_Wps);
+/** Opcode 0x0f 0x7d. */
+FNIEMOP_STUB(iemOp_hsubpd_Vpd_Wpd__hsubps_Vps_Wps);
+/** Opcode 0x0f 0x7e. */
+FNIEMOP_STUB(iemOp_movd_q_Ey_Pd__movd_q_Ey_Vy__movq_Vq_Wq);
+/** Opcode 0x0f 0x7f. */
+FNIEMOP_STUB(iemOp_movq_Qq_Pq__movq_movdqa_Wdq_Vdq__movdqu_Wdq_Vdq);
+
+
+/** Opcode 0x0f 0x80. */
+FNIEMOP_DEF(iemOp_jo_Jv)
+{
+    IEMOP_MNEMONIC("jo  Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x81. */
+FNIEMOP_DEF(iemOp_jno_Jv)
+{
+    IEMOP_MNEMONIC("jno Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x82. */
+FNIEMOP_DEF(iemOp_jc_Jv)
+{
+    IEMOP_MNEMONIC("jc/jb/jnae Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x83. */
+FNIEMOP_DEF(iemOp_jnc_Jv)
+{
+    IEMOP_MNEMONIC("jnc/jnb/jae Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x84. */
+FNIEMOP_DEF(iemOp_je_Jv)
+{
+    IEMOP_MNEMONIC("je/jz Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x85. */
+FNIEMOP_DEF(iemOp_jne_Jv)
+{
+    IEMOP_MNEMONIC("jne/jnz Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x86. */
+FNIEMOP_DEF(iemOp_jbe_Jv)
+{
+    IEMOP_MNEMONIC("jbe/jna Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x87. */
+FNIEMOP_DEF(iemOp_jnbe_Jv)
+{
+    IEMOP_MNEMONIC("jnbe/ja Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x88. */
+FNIEMOP_DEF(iemOp_js_Jv)
+{
+    IEMOP_MNEMONIC("js  Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x89. */
+FNIEMOP_DEF(iemOp_jns_Jv)
+{
+    IEMOP_MNEMONIC("jns Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8a. */
+FNIEMOP_DEF(iemOp_jp_Jv)
+{
+    IEMOP_MNEMONIC("jp  Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8b. */
+FNIEMOP_DEF(iemOp_jnp_Jv)
+{
+    IEMOP_MNEMONIC("jo  Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8c. */
+FNIEMOP_DEF(iemOp_jl_Jv)
+{
+    IEMOP_MNEMONIC("jl/jnge Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8d. */
+FNIEMOP_DEF(iemOp_jnl_Jv)
+{
+    IEMOP_MNEMONIC("jnl/jge Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8e. */
+FNIEMOP_DEF(iemOp_jle_Jv)
+{
+    IEMOP_MNEMONIC("jle/jng Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ELSE() {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x8f. */
+FNIEMOP_DEF(iemOp_jnle_Jv)
+{
+    IEMOP_MNEMONIC("jnle/jg Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+    {
+        uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    else
+    {
+        uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+            IEM_MC_ADVANCE_RIP();
+        } IEM_MC_ELSE() {
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+        } IEM_MC_ENDIF();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x0f 0x90. */
+FNIEMOP_STUB(iemOp_seto_Jv);
+/** Opcode 0x0f 0x91. */
+FNIEMOP_STUB(iemOp_setno_Jv);
+/** Opcode 0x0f 0x92. */
+FNIEMOP_STUB(iemOp_setc_Jv);
+/** Opcode 0x0f 0x93. */
+FNIEMOP_STUB(iemOp_setnc_Jv);
+/** Opcode 0x0f 0x94. */
+FNIEMOP_STUB(iemOp_sete_Jv);
+/** Opcode 0x0f 0x95. */
+FNIEMOP_STUB(iemOp_setne_Jv);
+/** Opcode 0x0f 0x96. */
+FNIEMOP_STUB(iemOp_setbe_Jv);
+/** Opcode 0x0f 0x97. */
+FNIEMOP_STUB(iemOp_setnbe_Jv);
+/** Opcode 0x0f 0x98. */
+FNIEMOP_STUB(iemOp_sets_Jv);
+/** Opcode 0x0f 0x99. */
+FNIEMOP_STUB(iemOp_setns_Jv);
+/** Opcode 0x0f 0x9a. */
+FNIEMOP_STUB(iemOp_setp_Jv);
+/** Opcode 0x0f 0x9b. */
+FNIEMOP_STUB(iemOp_setnp_Jv);
+/** Opcode 0x0f 0x9c. */
+FNIEMOP_STUB(iemOp_setl_Jv);
+/** Opcode 0x0f 0x9d. */
+FNIEMOP_STUB(iemOp_setnl_Jv);
+/** Opcode 0x0f 0x9e. */
+FNIEMOP_STUB(iemOp_setle_Jv);
+/** Opcode 0x0f 0x9f. */
+FNIEMOP_STUB(iemOp_setnle_Jv);
+/** Opcode 0x0f 0xa0. */
+FNIEMOP_STUB(iemOp_push_fs);
+/** Opcode 0x0f 0xa1. */
+FNIEMOP_STUB(iemOp_pop_fs);
+/** Opcode 0x0f 0xa2. */
+FNIEMOP_STUB(iemOp_cpuid);
+/** Opcode 0x0f 0xa3. */
+FNIEMOP_STUB(iemOp_bt_Ev_Gv);
+/** Opcode 0x0f 0xa4. */
+FNIEMOP_STUB(iemOp_shld_Ev_Gv_Ib);
+/** Opcode 0x0f 0xa7. */
+FNIEMOP_STUB(iemOp_shld_Ev_Gv_CL);
+/** Opcode 0x0f 0xa8. */
+FNIEMOP_STUB(iemOp_push_gs);
+/** Opcode 0x0f 0xa9. */
+FNIEMOP_STUB(iemOp_pop_gs);
+/** Opcode 0x0f 0xaa. */
+FNIEMOP_STUB(iemOp_rsm);
+/** Opcode 0x0f 0xab. */
+FNIEMOP_STUB(iemOp_bts_Ev_Gv);
+/** Opcode 0x0f 0xac. */
+FNIEMOP_STUB(iemOp_shrd_Ev_Gv_Ib);
+/** Opcode 0x0f 0xad. */
+FNIEMOP_STUB(iemOp_shrd_Ev_Gv_CL);
+/** Opcode 0x0f 0xae. */
+FNIEMOP_STUB(iemOp_Grp15);
+/** Opcode 0x0f 0xaf. */
+FNIEMOP_STUB(iemOp_imul_Gv_Ev);
+/** Opcode 0x0f 0xb0. */
+FNIEMOP_STUB(iemOp_cmpxchg_Eb_Gb);
+/** Opcode 0x0f 0xb1. */
+FNIEMOP_STUB(iemOp_cmpxchg_Ev_Gv);
+/** Opcode 0x0f 0xb2. */
+FNIEMOP_STUB(iemOp_lss_Gv_Mp);
+/** Opcode 0x0f 0xb3. */
+FNIEMOP_STUB(iemOp_btr_Ev_Gv);
+/** Opcode 0x0f 0xb4. */
+FNIEMOP_STUB(iemOp_lfs_Gv_Mp);
+/** Opcode 0x0f 0xb5. */
+FNIEMOP_STUB(iemOp_lgs_Gv_Mp);
+/** Opcode 0x0f 0xb6. */
+FNIEMOP_STUB(iemOp_movzx_Gv_Eb);
+/** Opcode 0x0f 0xb7. */
+FNIEMOP_STUB(iemOp_movzx_Gv_Ew);
+/** Opcode 0x0f 0xb8. */
+FNIEMOP_STUB(iemOp_popcnt_Gv_Ev_jmpe);
+/** Opcode 0x0f 0xb9. */
+FNIEMOP_STUB(iemOp_Grp10);
+/** Opcode 0x0f 0xba. */
+FNIEMOP_STUB(iemOp_Grp11);
+/** Opcode 0x0f 0xbb. */
+FNIEMOP_STUB(iemOp_btc_Ev_Gv);
+/** Opcode 0x0f 0xbc. */
+FNIEMOP_STUB(iemOp_bsf_Gv_Ev);
+/** Opcode 0x0f 0xbd. */
+FNIEMOP_STUB(iemOp_bsr_Gv_Ev);
+/** Opcode 0x0f 0xbe. */
+FNIEMOP_STUB(iemOp_movsx_Gv_Eb);
+/** Opcode 0x0f 0xbf. */
+FNIEMOP_STUB(iemOp_movsx_Gv_Ew);
+/** Opcode 0x0f 0xc0. */
+FNIEMOP_STUB(iemOp_xadd_Eb_Gb);
+/** Opcode 0x0f 0xc1. */
+FNIEMOP_STUB(iemOp_xadd_Ev_Gv);
+/** Opcode 0x0f 0xc2. */
+FNIEMOP_STUB(iemOp_cmpps_Vps_Wps_Ib__cmppd_Vpd_Wpd_Ib__cmpss_Vss_Wss_Ib__cmpsd_Vsd_Wsd_Ib);
+/** Opcode 0x0f 0xc3. */
+FNIEMOP_STUB(iemOp_movnti_My_Gy);
+/** Opcode 0x0f 0xc4. */
+FNIEMOP_STUB(iemOp_pinsrw_Pq_Ry_Mw_Ib__pinsrw_Vdq_Ry_Mw_Ib);
+/** Opcode 0x0f 0xc5. */
+FNIEMOP_STUB(iemOp_pextrw_Gd_Nq_Ib__pextrw_Gd_Udq_Ib);
+/** Opcode 0x0f 0xc6. */
+FNIEMOP_STUB(iemOp_shufps_Vps_Wps_Ib__shufdp_Vpd_Wpd_Ib);
+/** Opcode 0x0f 0xc7. */
+FNIEMOP_STUB(iemOp_Grp9);
+/** Opcode 0x0f 0xc8. */
+FNIEMOP_STUB(iemOp_bswap_rAX_r8);
+/** Opcode 0x0f 0xc9. */
+FNIEMOP_STUB(iemOp_bswap_rCX_r9);
+/** Opcode 0x0f 0xca. */
+FNIEMOP_STUB(iemOp_bswap_rDX_r10);
+/** Opcode 0x0f 0xcb. */
+FNIEMOP_STUB(iemOp_bswap_rBX_r11);
+/** Opcode 0x0f 0xcc. */
+FNIEMOP_STUB(iemOp_bswap_rSP_r12);
+/** Opcode 0x0f 0xcd. */
+FNIEMOP_STUB(iemOp_bswap_rBP_r13);
+/** Opcode 0x0f 0xce. */
+FNIEMOP_STUB(iemOp_bswap_rSI_r14);
+/** Opcode 0x0f 0xcf. */
+FNIEMOP_STUB(iemOp_bswap_rDI_r15);
+/** Opcode 0x0f 0xd0. */
+FNIEMOP_STUB(iemOp_addsubpd_Vpd_Wpd__addsubps_Vps_Wps);
+/** Opcode 0x0f 0xd1. */
+FNIEMOP_STUB(iemOp_psrlw_Pp_Qp__psrlw_Vdp_Wdq);
+/** Opcode 0x0f 0xd2. */
+FNIEMOP_STUB(iemOp_psrld_Pq_Qq__psrld_Vdq_Wdq);
+/** Opcode 0x0f 0xd3. */
+FNIEMOP_STUB(iemOp_psrlq_Pq_Qq__psrlq_Vdq_Wdq);
+/** Opcode 0x0f 0xd4. */
+FNIEMOP_STUB(iemOp_paddq_Pq_Qq__paddq_Vdq_Wdq);
+/** Opcode 0x0f 0xd5. */
+FNIEMOP_STUB(iemOp_pmulq_Pq_Qq__pmullw_Vdq_Wdq);
+/** Opcode 0x0f 0xd6. */
+FNIEMOP_STUB(iemOp_movq_Wq_Vq__movq2dq_Vdq_Nq__movdq2q_Pq_Uq);
+/** Opcode 0x0f 0xd7. */
+FNIEMOP_STUB(iemOp_pmovmskb_Gd_Nq__pmovmskb_Gd_Udq);
+/** Opcode 0x0f 0xd8. */
+FNIEMOP_STUB(iemOp_psubusb_Pq_Qq__psubusb_Vdq_Wdq);
+/** Opcode 0x0f 0xd9. */
+FNIEMOP_STUB(iemOp_psubusw_Pq_Qq__psubusw_Vdq_Wdq);
+/** Opcode 0x0f 0xda. */
+FNIEMOP_STUB(iemOp_pminub_Pq_Qq__pminub_Vdq_Wdq);
+/** Opcode 0x0f 0xdb. */
+FNIEMOP_STUB(iemOp_pand_Pq_Qq__pand_Vdq_Wdq);
+/** Opcode 0x0f 0xdc. */
+FNIEMOP_STUB(iemOp_paddusb_Pq_Qq__paddusb_Vdq_Wdq);
+/** Opcode 0x0f 0xdd. */
+FNIEMOP_STUB(iemOp_paddusw_Pq_Qq__paddusw_Vdq_Wdq);
+/** Opcode 0x0f 0xde. */
+FNIEMOP_STUB(iemOp_pmaxub_Pq_Qq__pamxub_Vdq_Wdq);
+/** Opcode 0x0f 0xdf. */
+FNIEMOP_STUB(iemOp_pandn_Pq_Qq__pandn_Vdq_Wdq);
+/** Opcode 0x0f 0xe0. */
+FNIEMOP_STUB(iemOp_pavgb_Pq_Qq__pavgb_Vdq_Wdq);
+/** Opcode 0x0f 0xe1. */
+FNIEMOP_STUB(iemOp_psraw_Pq_Qq__psraw_Vdq_Wdq);
+/** Opcode 0x0f 0xe2. */
+FNIEMOP_STUB(iemOp_psrad_Pq_Qq__psrad_Vdq_Wdq);
+/** Opcode 0x0f 0xe3. */
+FNIEMOP_STUB(iemOp_pavgw_Pq_Qq__pavgw_Vdq_Wdq);
+/** Opcode 0x0f 0xe4. */
+FNIEMOP_STUB(iemOp_pmulhuw_Pq_Qq__pmulhuw_Vdq_Wdq);
+/** Opcode 0x0f 0xe5. */
+FNIEMOP_STUB(iemOp_pmulhw_Pq_Qq__pmulhw_Vdq_Wdq);
+/** Opcode 0x0f 0xe6. */
+FNIEMOP_STUB(iemOp_cvttpd2dq_Vdq_Wdp__cvtdq2pd_Vdq_Wpd__cvtpd2dq_Vdq_Wpd);
+/** Opcode 0x0f 0xe7. */
+FNIEMOP_STUB(iemOp_movntq_Mq_Pq__movntdq_Mdq_Vdq);
+/** Opcode 0x0f 0xe8. */
+FNIEMOP_STUB(iemOp_psubsb_Pq_Qq__psubsb_Vdq_Wdq);
+/** Opcode 0x0f 0xe9. */
+FNIEMOP_STUB(iemOp_psubsw_Pq_Qq__psubsw_Vdq_Wdq);
+/** Opcode 0x0f 0xea. */
+FNIEMOP_STUB(iemOp_pminsw_Pq_Qq__pminsw_Vdq_Wdq);
+/** Opcode 0x0f 0xeb. */
+FNIEMOP_STUB(iemOp_por_Pq_Qq__por_Vdq_Wdq);
+/** Opcode 0x0f 0xec. */
+FNIEMOP_STUB(iemOp_paddsb_Pq_Qq__paddsb_Vdq_Wdq);
+/** Opcode 0x0f 0xed. */
+FNIEMOP_STUB(iemOp_paddsw_Pq_Qq__paddsw_Vdq_Wdq);
+/** Opcode 0x0f 0xee. */
+FNIEMOP_STUB(iemOp_pmaxsw_Pq_Qq__pmaxsw_Vdq_Wdq);
+/** Opcode 0x0f 0xef. */
+FNIEMOP_STUB(iemOp_pxor_Pq_Qq__pxor_Vdq_Wdq);
+/** Opcode 0x0f 0xf0. */
+FNIEMOP_STUB(iemOp_lddqu_Vdq_Mdq);
+/** Opcode 0x0f 0xf1. */
+FNIEMOP_STUB(iemOp_psllw_Pq_Qq__pslw_Vdq_Wdq);
+/** Opcode 0x0f 0xf2. */
+FNIEMOP_STUB(iemOp_psld_Pq_Qq__pslld_Vdq_Wdq);
+/** Opcode 0x0f 0xf3. */
+FNIEMOP_STUB(iemOp_psllq_Pq_Qq__pslq_Vdq_Wdq);
+/** Opcode 0x0f 0xf4. */
+FNIEMOP_STUB(iemOp_pmuludq_Pq_Qq__pmuludq_Vdq_Wdq);
+/** Opcode 0x0f 0xf5. */
+FNIEMOP_STUB(iemOp_pmaddwd_Pq_Qq__pmaddwd_Vdq_Wdq);
+/** Opcode 0x0f 0xf6. */
+FNIEMOP_STUB(iemOp_psadbw_Pq_Qq__psadbw_Vdq_Wdq);
+/** Opcode 0x0f 0xf7. */
+FNIEMOP_STUB(iemOp_maskmovq_Pq_Nq__maskmovdqu_Vdq_Udq);
+/** Opcode 0x0f 0xf8. */
+FNIEMOP_STUB(iemOp_psubb_Pq_Qq_psubb_Vdq_Wdq);
+/** Opcode 0x0f 0xf9. */
+FNIEMOP_STUB(iemOp_psubw_Pq_Qq__psubw_Vdq_Wdq);
+/** Opcode 0x0f 0xfa. */
+FNIEMOP_STUB(iemOp_psubd_Pq_Qq__psubd_Vdq_Wdq);
+/** Opcode 0x0f 0xfb. */
+FNIEMOP_STUB(iemOp_psubq_Pq_Qq__psbuq_Vdq_Wdq);
+/** Opcode 0x0f 0xfc. */
+FNIEMOP_STUB(iemOp_paddb_Pq_Qq__paddb_Vdq_Wdq);
+/** Opcode 0x0f 0xfd. */
+FNIEMOP_STUB(iemOp_paddw_Pq_Qq__paddw_Vdq_Wdq);
+/** Opcode 0x0f 0xfe. */
+FNIEMOP_STUB(iemOp_paddd_Pq_Qq__paddd_Vdq_Wdq);
+
+
+const PFNIEMOP g_apfnTwoByteMap[256] =
+{
+    /* 0x00 */  iemOp_Grp6,             iemOp_Grp7,             iemOp_lar_Gv_Ew,        iemOp_lsl_Gv_Ew,
+    /* 0x04 */  iemOp_Invalid,          iemOp_syscall,          iemOp_clts,             iemOp_sysret,
+    /* 0x08 */  iemOp_invd,             iemOp_wbinvd,           iemOp_Invalid,          iemOp_ud2,
+    /* 0x0c */  iemOp_Invalid,          iemOp_nop_Ev_prefetch,  iemOp_femms,            iemOp_3Dnow,
+    /* 0x10 */  iemOp_movups_Vps_Wps__movupd_Vpd_Wpd__movss_Vss_Wss__movsd_Vsd_Wsd,
+    /* 0x11 */  iemOp_movups_Wps_Vps__movupd_Wpd_Vpd__movss_Wss_Vss__movsd_Vsd_Wsd,
+    /* 0x12 */  iemOp_movlps_Vq_Mq__movhlps_Vq_Uq__movlpd_Vq_Mq__movsldup_Vq_Wq__movddup_Vq_Wq,
+    /* 0x13 */  iemOp_movlps_Mq_Vq__movlpd_Mq_Vq,
+    /* 0x14 */  iemOp_unpckhlps_Vps_Wq__unpcklpd_Vpd_Wq,
+    /* 0x15 */  iemOp_unpckhps_Vps_Wq__unpckhpd_Vpd_Wq,
+    /* 0x16 */  iemOp_movhps_Vq_Mq__movlhps_Vq_Uq__movhpd_Vq_Mq__movshdup_Vq_Wq,
+    /* 0x17 */  iemOp_movhps_Mq_Vq__movhpd_Mq_Vq,
+    /* 0x18 */  iemOp_prefetch_Grp16,   iemOp_Invalid,          iemOp_Invalid,          iemOp_Invalid,
+    /* 0x1c */  iemOp_Invalid,          iemOp_Invalid,          iemOp_Invalid,          iemOp_Invalid,
+    /* 0x20 */  iemOp_mov_Rd_Cd,        iemOp_mov_Rd_Dd,        iemOp_mov_Cd_Rd,        iemOp_mov_Dd_Rd,
+    /* 0x24 */  iemOp_mov_Rd_Td,        iemOp_Invalid,          iemOp_mov_Td_Rd,        iemOp_Invalid,
+    /* 0x28 */  iemOp_movaps_Vps_Wps__movapd_Vpd_Wpd,
+    /* 0x29 */  iemOp_movaps_Wps_Vps__movapd_Wpd_Vpd,
+    /* 0x2a */  iemOp_cvtpi2ps_Vps_Qpi__cvtpi2pd_Vpd_Qpi__cvtsi2ss_Vss_Ey__cvtsi2sd_Vsd_Ey,
+    /* 0x2b */  iemOp_movntps_Mps_Vps__movntpd_Mpd_Vpd,
+    /* 0x2c */  iemOp_cvttps2pi_Ppi_Wps__cvttpd2pi_Ppi_Wpd__cvttss2si_Gy_Wss__cvttsd2si_Yu_Wsd,
+    /* 0x2d */  iemOp_cvtps2pi_Ppi_Wps__cvtpd2pi_QpiWpd__cvtss2si_Gy_Wss__cvtsd2si_Gy_Wsd,
+    /* 0x2e */  iemOp_ucomiss_Vss_Wss__ucomisd_Vsd_Wsd,
+    /* 0x2f */  iemOp_comiss_Vss_Wss__comisd_Vsd_Wsd,
+    /* 0x30 */  iemOp_wrmsr,            iemOp_rdtsc,            iemOp_rdmsr,            iemOp_rdpmc,
+    /* 0x34 */  iemOp_sysenter,         iemOp_sysexit,          iemOp_Invalid,          iemOp_getsec,
+    /* 0x38 */  iemOp_3byte_Esc_A4,     iemOp_Invalid,          iemOp_3byte_Esc_A5,     iemOp_Invalid,
+    /* 0x3c */  iemOp_movnti_Gv_Ev/*?*/,iemOp_Invalid,          iemOp_Invalid,          iemOp_Invalid,
+    /* 0x40 */  iemOp_cmovo_Gv_Ev,      iemOp_cmovno_Gv_Ev,     iemOp_cmovc_Gv_Ev,      iemOp_cmovnc_Gv_Ev,
+    /* 0x44 */  iemOp_cmove_Gv_Ev,      iemOp_cmovne_Gv_Ev,     iemOp_cmovbe_Gv_Ev,     iemOp_cmovnbe_Gv_Ev,
+    /* 0x48 */  iemOp_cmovs_Gv_Ev,      iemOp_cmovns_Gv_Ev,     iemOp_cmovp_Gv_Ev,      iemOp_cmovnp_Gv_Ev,
+    /* 0x4c */  iemOp_cmovl_Gv_Ev,      iemOp_cmovnl_Gv_Ev,     iemOp_cmovle_Gv_Ev,     iemOp_cmovnle_Gv_Ev,
+    /* 0x50 */  iemOp_movmskps_Gy_Ups__movmskpd_Gy_Upd,
+    /* 0x51 */  iemOp_sqrtps_Wps_Vps__sqrtpd_Wpd_Vpd__sqrtss_Vss_Wss__sqrtsd_Vsd_Wsd,
+    /* 0x52 */  iemOp_rsqrtps_Wps_Vps__rsqrtss_Vss_Wss,
+    /* 0x53 */  iemOp_rcpps_Wps_Vps__rcpss_Vs_Wss,
+    /* 0x54 */  iemOp_andps_Vps_Wps__andpd_Wpd_Vpd,
+    /* 0x55 */  iemOp_andnps_Vps_Wps__andnpd_Wpd_Vpd,
+    /* 0x56 */  iemOp_orps_Wpd_Vpd__orpd_Wpd_Vpd,
+    /* 0x57 */  iemOp_xorps_Vps_Wps__xorpd_Wpd_Vpd,
+    /* 0x58 */  iemOp_addps_Vps_Wps__addpd_Vpd_Wpd__addss_Vss_Wss__addsd_Vsd_Wsd,
+    /* 0x59 */  iemOp_mulps_Vps_Wps__mulpd_Vpd_Wpd__mulss_Vss__Wss__mulsd_Vsd_Wsd,
+    /* 0x5a */  iemOp_cvtps2pd_Vpd_Wps__cvtpd2ps_Vps_Wpd__cvtss2sd_Vsd_Wss__cvtsd2ss_Vss_Wsd,
+    /* 0x5b */  iemOp_cvtdq2ps_Vps_Wdq__cvtps2dq_Vdq_Wps__cvtps2dq_Vdq_Wps,
+    /* 0x5c */  iemOp_subps_Vps_Wps__subpd_Vps_Wdp__subss_Vss_Wss__subsd_Vsd_Wsd,
+    /* 0x5d */  iemOp_minps_Vps_Wps__minpd_Vpd_Wpd__minss_Vss_Wss__minsd_Vsd_Wsd,
+    /* 0x5e */  iemOp_divps_Vps_Wps__divpd_Vpd_Wpd__divss_Vss_Wss__divsd_Vsd_Wsd,
+    /* 0x5f */  iemOp_maxps_Vps_Wps__maxpd_Vpd_Wpd__maxss_Vss_Wss__maxsd_Vsd_Wsd,
+    /* 0x60 */  iemOp_punpcklbw_Pq_Qd__punpcklbw_Vdq_Wdq,
+    /* 0x61 */  iemOp_punpcklwd_Pq_Qd__punpcklwd_Vdq_Wdq,
+    /* 0x62 */  iemOp_punpckldq_Pq_Qd__punpckldq_Vdq_Wdq,
+    /* 0x63 */  iemOp_packsswb_Pq_Qq__packsswb_Vdq_Wdq,
+    /* 0x64 */  iemOp_pcmpgtb_Pq_Qq__pcmpgtb_Vdq_Wdq,
+    /* 0x65 */  iemOp_pcmpgtw_Pq_Qq__pcmpgtw_Vdq_Wdq,
+    /* 0x66 */  iemOp_pcmpgtd_Pq_Qq__pcmpgtd_Vdq_Wdq,
+    /* 0x67 */  iemOp_packuswb_Pq_Qq__packuswb_Vdq_Wdq,
+    /* 0x68 */  iemOp_punpckhbw_Pq_Qq__punpckhbw_Vdq_Wdq,
+    /* 0x69 */  iemOp_punpckhwd_Pq_Qd__punpckhwd_Vdq_Wdq,
+    /* 0x6a */  iemOp_punpckhdq_Pq_Qd__punpckhdq_Vdq_Wdq,
+    /* 0x6b */  iemOp_packssdw_Pq_Qd__packssdq_Vdq_Wdq,
+    /* 0x6c */  iemOp_punpcklqdq_Vdq_Wdq,
+    /* 0x6d */  iemOp_punpckhqdq_Vdq_Wdq,
+    /* 0x6e */  iemOp_movd_q_Pd_Ey__movd_q_Vy_Ey,
+    /* 0x6f */  iemOp_movq_Pq_Qq__movdqa_Vdq_Wdq__movdqu_Vdq_Wdq,
+    /* 0x70 */  iemOp_pshufw_Pq_Qq_Ib__pshufd_Vdq_Wdq_Ib__pshufhw_Vdq_Wdq_Ib__pshuflq_Vdq_Wdq_Ib,
+    /* 0x71 */  iemOp_Grp12,
+    /* 0x72 */  iemOp_Grp13,
+    /* 0x73 */  iemOp_Grp14,
+    /* 0x74 */  iemOp_pcmpeqb_Pq_Qq__pcmpeqb_Vdq_Wdq,
+    /* 0x75 */  iemOp_pcmpeqw_Pq_Qq__pcmpeqw_Vdq_Wdq,
+    /* 0x76 */  iemOp_pcmped_Pq_Qq__pcmpeqd_Vdq_Wdq,
+    /* 0x77 */  iemOp_emms,
+    /* 0x78 */  iemOp_vmread,           iemOp_vmwrite,          iemOp_Invalid,          iemOp_Invalid,
+    /* 0x7c */  iemOp_haddpd_Vdp_Wpd__haddps_Vps_Wps,
+    /* 0x7d */  iemOp_hsubpd_Vpd_Wpd__hsubps_Vps_Wps,
+    /* 0x7e */  iemOp_movd_q_Ey_Pd__movd_q_Ey_Vy__movq_Vq_Wq,
+    /* 0x7f */  iemOp_movq_Qq_Pq__movq_movdqa_Wdq_Vdq__movdqu_Wdq_Vdq,
+    /* 0x80 */  iemOp_jo_Jv,            iemOp_jno_Jv,           iemOp_jc_Jv,            iemOp_jnc_Jv,
+    /* 0x84 */  iemOp_je_Jv,            iemOp_jne_Jv,           iemOp_jbe_Jv,           iemOp_jnbe_Jv,
+    /* 0x88 */  iemOp_js_Jv,            iemOp_jns_Jv,           iemOp_jp_Jv,            iemOp_jnp_Jv,
+    /* 0x8c */  iemOp_jl_Jv,            iemOp_jnl_Jv,           iemOp_jle_Jv,           iemOp_jnle_Jv,
+    /* 0x90 */  iemOp_seto_Jv,          iemOp_setno_Jv,         iemOp_setc_Jv,          iemOp_setnc_Jv,
+    /* 0x94 */  iemOp_sete_Jv,          iemOp_setne_Jv,         iemOp_setbe_Jv,         iemOp_setnbe_Jv,
+    /* 0x98 */  iemOp_sets_Jv,          iemOp_setns_Jv,         iemOp_setp_Jv,          iemOp_setnp_Jv,
+    /* 0x9c */  iemOp_setl_Jv,          iemOp_setnl_Jv,         iemOp_setle_Jv,         iemOp_setnle_Jv,
+    /* 0xa0 */  iemOp_push_fs,          iemOp_pop_fs,           iemOp_cpuid,            iemOp_bt_Ev_Gv,
+    /* 0xa4 */  iemOp_shld_Ev_Gv_Ib,    iemOp_shld_Ev_Gv_CL,    iemOp_Invalid,          iemOp_Invalid,
+    /* 0xa8 */  iemOp_push_gs,          iemOp_pop_gs,           iemOp_rsm,              iemOp_bts_Ev_Gv,
+    /* 0xac */  iemOp_shrd_Ev_Gv_Ib,    iemOp_shrd_Ev_Gv_CL,    iemOp_Grp15,            iemOp_imul_Gv_Ev,
+    /* 0xb0 */  iemOp_cmpxchg_Eb_Gb,    iemOp_cmpxchg_Ev_Gv,    iemOp_lss_Gv_Mp,        iemOp_btr_Ev_Gv,
+    /* 0xb4 */  iemOp_lfs_Gv_Mp,        iemOp_lgs_Gv_Mp,        iemOp_movzx_Gv_Eb,      iemOp_movzx_Gv_Ew,
+    /* 0xb8 */  iemOp_popcnt_Gv_Ev_jmpe,iemOp_Grp10,            iemOp_Grp11,            iemOp_btc_Ev_Gv,
+    /* 0xbc */  iemOp_bsf_Gv_Ev,        iemOp_bsr_Gv_Ev,        iemOp_movsx_Gv_Eb,      iemOp_movsx_Gv_Ew,
+    /* 0xc0 */  iemOp_xadd_Eb_Gb,
+    /* 0xc1 */  iemOp_xadd_Ev_Gv,
+    /* 0xc2 */  iemOp_cmpps_Vps_Wps_Ib__cmppd_Vpd_Wpd_Ib__cmpss_Vss_Wss_Ib__cmpsd_Vsd_Wsd_Ib,
+    /* 0xc3 */  iemOp_movnti_My_Gy,
+    /* 0xc4 */  iemOp_pinsrw_Pq_Ry_Mw_Ib__pinsrw_Vdq_Ry_Mw_Ib,
+    /* 0xc5 */  iemOp_pextrw_Gd_Nq_Ib__pextrw_Gd_Udq_Ib,
+    /* 0xc6 */  iemOp_shufps_Vps_Wps_Ib__shufdp_Vpd_Wpd_Ib,
+    /* 0xc7 */  iemOp_Grp9,
+    /* 0xc8 */  iemOp_bswap_rAX_r8,     iemOp_bswap_rCX_r9,     iemOp_bswap_rDX_r10,    iemOp_bswap_rBX_r11,
+    /* 0xcc */  iemOp_bswap_rSP_r12,    iemOp_bswap_rBP_r13,    iemOp_bswap_rSI_r14,    iemOp_bswap_rDI_r15,
+    /* 0xd0 */  iemOp_addsubpd_Vpd_Wpd__addsubps_Vps_Wps,
+    /* 0xd1 */  iemOp_psrlw_Pp_Qp__psrlw_Vdp_Wdq,
+    /* 0xd2 */  iemOp_psrld_Pq_Qq__psrld_Vdq_Wdq,
+    /* 0xd3 */  iemOp_psrlq_Pq_Qq__psrlq_Vdq_Wdq,
+    /* 0xd4 */  iemOp_paddq_Pq_Qq__paddq_Vdq_Wdq,
+    /* 0xd5 */  iemOp_pmulq_Pq_Qq__pmullw_Vdq_Wdq,
+    /* 0xd6 */  iemOp_movq_Wq_Vq__movq2dq_Vdq_Nq__movdq2q_Pq_Uq,
+    /* 0xd7 */  iemOp_pmovmskb_Gd_Nq__pmovmskb_Gd_Udq,
+    /* 0xd8 */  iemOp_psubusb_Pq_Qq__psubusb_Vdq_Wdq,
+    /* 0xd9 */  iemOp_psubusw_Pq_Qq__psubusw_Vdq_Wdq,
+    /* 0xda */  iemOp_pminub_Pq_Qq__pminub_Vdq_Wdq,
+    /* 0xdb */  iemOp_pand_Pq_Qq__pand_Vdq_Wdq,
+    /* 0xdc */  iemOp_paddusb_Pq_Qq__paddusb_Vdq_Wdq,
+    /* 0xdd */  iemOp_paddusw_Pq_Qq__paddusw_Vdq_Wdq,
+    /* 0xde */  iemOp_pmaxub_Pq_Qq__pamxub_Vdq_Wdq,
+    /* 0xdf */  iemOp_pandn_Pq_Qq__pandn_Vdq_Wdq,
+    /* 0xe0 */  iemOp_pavgb_Pq_Qq__pavgb_Vdq_Wdq,
+    /* 0xe1 */  iemOp_psraw_Pq_Qq__psraw_Vdq_Wdq,
+    /* 0xe2 */  iemOp_psrad_Pq_Qq__psrad_Vdq_Wdq,
+    /* 0xe3 */  iemOp_pavgw_Pq_Qq__pavgw_Vdq_Wdq,
+    /* 0xe4 */  iemOp_pmulhuw_Pq_Qq__pmulhuw_Vdq_Wdq,
+    /* 0xe5 */  iemOp_pmulhw_Pq_Qq__pmulhw_Vdq_Wdq,
+    /* 0xe6 */  iemOp_cvttpd2dq_Vdq_Wdp__cvtdq2pd_Vdq_Wpd__cvtpd2dq_Vdq_Wpd,
+    /* 0xe7 */  iemOp_movntq_Mq_Pq__movntdq_Mdq_Vdq,
+    /* 0xe8 */  iemOp_psubsb_Pq_Qq__psubsb_Vdq_Wdq,
+    /* 0xe9 */  iemOp_psubsw_Pq_Qq__psubsw_Vdq_Wdq,
+    /* 0xea */  iemOp_pminsw_Pq_Qq__pminsw_Vdq_Wdq,
+    /* 0xeb */  iemOp_por_Pq_Qq__por_Vdq_Wdq,
+    /* 0xec */  iemOp_paddsb_Pq_Qq__paddsb_Vdq_Wdq,
+    /* 0xed */  iemOp_paddsw_Pq_Qq__paddsw_Vdq_Wdq,
+    /* 0xee */  iemOp_pmaxsw_Pq_Qq__pmaxsw_Vdq_Wdq,
+    /* 0xef */  iemOp_pxor_Pq_Qq__pxor_Vdq_Wdq,
+    /* 0xf0 */  iemOp_lddqu_Vdq_Mdq,
+    /* 0xf1 */  iemOp_psllw_Pq_Qq__pslw_Vdq_Wdq,
+    /* 0xf2 */  iemOp_psld_Pq_Qq__pslld_Vdq_Wdq,
+    /* 0xf3 */  iemOp_psllq_Pq_Qq__pslq_Vdq_Wdq,
+    /* 0xf4 */  iemOp_pmuludq_Pq_Qq__pmuludq_Vdq_Wdq,
+    /* 0xf5 */  iemOp_pmaddwd_Pq_Qq__pmaddwd_Vdq_Wdq,
+    /* 0xf6 */  iemOp_psadbw_Pq_Qq__psadbw_Vdq_Wdq,
+    /* 0xf7 */  iemOp_maskmovq_Pq_Nq__maskmovdqu_Vdq_Udq,
+    /* 0xf8 */  iemOp_psubb_Pq_Qq_psubb_Vdq_Wdq,
+    /* 0xf9 */  iemOp_psubw_Pq_Qq__psubw_Vdq_Wdq,
+    /* 0xfa */  iemOp_psubd_Pq_Qq__psubd_Vdq_Wdq,
+    /* 0xfb */  iemOp_psubq_Pq_Qq__psbuq_Vdq_Wdq,
+    /* 0xfc */  iemOp_paddb_Pq_Qq__paddb_Vdq_Wdq,
+    /* 0xfd */  iemOp_paddw_Pq_Qq__paddw_Vdq_Wdq,
+    /* 0xfe */  iemOp_paddd_Pq_Qq__paddd_Vdq_Wdq,
+    /* 0xff */  iemOp_Invalid
+};
+
+/** @}  */
+
+
+/** @name One byte opcodes.
+ *
+ * @{
+ */
+
+/** Opcode 0x00. */
+FNIEMOP_DEF(iemOp_add_Eb_Gb)
+{
+    IEMOP_MNEMONIC("add Eb,Gb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_add);
+}
+
+
+/** Opcode 0x01. */
+FNIEMOP_DEF(iemOp_add_Ev_Gv)
+{
+    IEMOP_MNEMONIC("add Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_add);
+}
+
+
+/** Opcode 0x02. */
+FNIEMOP_DEF(iemOp_add_Gb_Eb)
+{
+    IEMOP_MNEMONIC("add Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_add);
+}
+
+
+/** Opcode 0x03. */
+FNIEMOP_DEF(iemOp_add_Gv_Ev)
+{
+    IEMOP_MNEMONIC("add Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_add);
+}
+
+
+/** Opcode 0x04. */
+FNIEMOP_DEF(iemOp_add_Al_Ib)
+{
+    IEMOP_MNEMONIC("add al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_add);
+}
+
+
+/** Opcode 0x05. */
+FNIEMOP_DEF(iemOp_add_eAX_Iz)
+{
+    IEMOP_MNEMONIC("add rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_add);
+}
+
+
+/**
+ * Common 'push segment-register' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonPushSReg, uint8_t, iReg)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    if (iReg < X86_SREG_FS)
+        IEMOP_HLP_NO_64BIT();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint16_t, u16Value);
+            IEM_MC_FETCH_SREG_U16(u16Value, iReg);
+            IEM_MC_PUSH_U16(u16Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint32_t, u32Value);
+            IEM_MC_FETCH_SREG_U32_ZX(u32Value, iReg);
+            IEM_MC_PUSH_U32(u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint64_t, u64Value);
+            IEM_MC_FETCH_SREG_U64_ZX(u64Value, iReg);
+            IEM_MC_PUSH_U64(u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x06. */
+FNIEMOP_DEF(iemOp_push_ES)
+{
+    IEMOP_MNEMONIC("push es");
+    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_ES);
+}
+
+
+/** Opcode 0x07. */
+FNIEMOP_DEF(iemOp_pop_ES)
+{
+    IEMOP_MNEMONIC("pop es");
+    IEMOP_HLP_NO_64BIT();
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemOpCImpl_pop_Sreg, X86_SREG_ES, pIemCpu->enmEffOpSize);
+}
+
+
+/** Opcode 0x08. */
+FNIEMOP_DEF(iemOp_or_Eb_Gb)
+{
+    IEMOP_MNEMONIC("or  Eb,Gb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_or);
+}
+
+
+/** Opcode 0x09. */
+FNIEMOP_DEF(iemOp_or_Ev_Gv)
+{
+    IEMOP_MNEMONIC("or  Ev,Gv ");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_or);
+}
+
+
+/** Opcode 0x0a. */
+FNIEMOP_DEF(iemOp_or_Gb_Eb)
+{
+    IEMOP_MNEMONIC("or  Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_or);
+}
+
+
+/** Opcode 0x0b. */
+FNIEMOP_DEF(iemOp_or_Gv_Ev)
+{
+    IEMOP_MNEMONIC("or  Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_or);
+}
+
+
+/** Opcode 0x0c. */
+FNIEMOP_DEF(iemOp_or_Al_Ib)
+{
+    IEMOP_MNEMONIC("or  al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_or);
+}
+
+
+/** Opcode 0x0d. */
+FNIEMOP_DEF(iemOp_or_eAX_Iz)
+{
+    IEMOP_MNEMONIC("or  rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_or);
+}
+
+
+/** Opcode 0x0e. */
+FNIEMOP_DEF(iemOp_push_CS)
+{
+    IEMOP_MNEMONIC("push cs");
+    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_CS);
+}
+
+
+/** Opcode 0x0f. */
+FNIEMOP_DEF(iemOp_2byteEscape)
+{
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnTwoByteMap[b]);
+}
+
+/** Opcode 0x10. */
+FNIEMOP_DEF(iemOp_adc_Eb_Gb)
+{
+    IEMOP_MNEMONIC("adc Eb,Gb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_adc);
+}
+
+
+/** Opcode 0x11. */
+FNIEMOP_DEF(iemOp_adc_Ev_Gv)
+{
+    IEMOP_MNEMONIC("adc Ev,Gv");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_adc);
+}
+
+
+/** Opcode 0x12. */
+FNIEMOP_DEF(iemOp_adc_Gb_Eb)
+{
+    IEMOP_MNEMONIC("adc Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_adc);
+}
+
+
+/** Opcode 0x13. */
+FNIEMOP_DEF(iemOp_adc_Gv_Ev)
+{
+    IEMOP_MNEMONIC("adc Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_adc);
+}
+
+
+/** Opcode 0x14. */
+FNIEMOP_DEF(iemOp_adc_Al_Ib)
+{
+    IEMOP_MNEMONIC("adc al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_adc);
+}
+
+
+/** Opcode 0x15. */
+FNIEMOP_DEF(iemOp_adc_eAX_Iz)
+{
+    IEMOP_MNEMONIC("adc rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_adc);
+}
+
+
+/** Opcode 0x16. */
+FNIEMOP_DEF(iemOp_push_SS)
+{
+    IEMOP_MNEMONIC("push ss");
+    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_SS);
+}
+
+
+/** Opcode 0x17. */
+FNIEMOP_DEF(iemOp_pop_SS)
+{
+    IEMOP_MNEMONIC("pop ss"); /** @todo implies instruction fusing? */
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_NO_64BIT();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemOpCImpl_pop_Sreg, X86_SREG_SS, pIemCpu->enmEffOpSize);
+}
+
+
+/** Opcode 0x18. */
+FNIEMOP_DEF(iemOp_sbb_Eb_Gb)
+{
+    IEMOP_MNEMONIC("sbb Eb,Gb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_sbb);
+}
+
+
+/** Opcode 0x19. */
+FNIEMOP_DEF(iemOp_sbb_Ev_Gv)
+{
+    IEMOP_MNEMONIC("sbb Ev,Gv");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_sbb);
+}
+
+
+/** Opcode 0x1a. */
+FNIEMOP_DEF(iemOp_sbb_Gb_Eb)
+{
+    IEMOP_MNEMONIC("sbb Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_sbb);
+}
+
+
+/** Opcode 0x1b. */
+FNIEMOP_DEF(iemOp_sbb_Gv_Ev)
+{
+    IEMOP_MNEMONIC("sbb Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_sbb);
+}
+
+
+/** Opcode 0x1c. */
+FNIEMOP_DEF(iemOp_sbb_Al_Ib)
+{
+    IEMOP_MNEMONIC("sbb al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_sbb);
+}
+
+
+/** Opcode 0x1d. */
+FNIEMOP_DEF(iemOp_sbb_eAX_Iz)
+{
+    IEMOP_MNEMONIC("sbb rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_sbb);
+}
+
+
+/** Opcode 0x1e. */
+FNIEMOP_DEF(iemOp_push_DS)
+{
+    IEMOP_MNEMONIC("push ds");
+    return FNIEMOP_CALL_1(iemOpCommonPushSReg, X86_SREG_DS);
+}
+
+
+/** Opcode 0x1f. */
+FNIEMOP_DEF(iemOp_pop_DS)
+{
+    IEMOP_MNEMONIC("pop ds");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_NO_64BIT();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemOpCImpl_pop_Sreg, X86_SREG_DS, pIemCpu->enmEffOpSize);
+}
+
+
+/** Opcode 0x20. */
+FNIEMOP_DEF(iemOp_and_Eb_Gb)
+{
+    IEMOP_MNEMONIC("and Eb,Gb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_and);
+}
+
+
+/** Opcode 0x21. */
+FNIEMOP_DEF(iemOp_and_Ev_Gv)
+{
+    IEMOP_MNEMONIC("and Ev,Gv");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_and);
+}
+
+
+/** Opcode 0x22. */
+FNIEMOP_DEF(iemOp_and_Gb_Eb)
+{
+    IEMOP_MNEMONIC("and Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_and);
+}
+
+
+/** Opcode 0x23. */
+FNIEMOP_DEF(iemOp_and_Gv_Ev)
+{
+    IEMOP_MNEMONIC("and Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_and);
+}
+
+
+/** Opcode 0x24. */
+FNIEMOP_DEF(iemOp_and_Al_Ib)
+{
+    IEMOP_MNEMONIC("and al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_and);
+}
+
+
+/** Opcode 0x25. */
+FNIEMOP_DEF(iemOp_and_eAX_Iz)
+{
+    IEMOP_MNEMONIC("and rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_and);
+}
+
+
+/** Opcode 0x26. */
+FNIEMOP_DEF(iemOp_seg_ES)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SEG_ES;
+    pIemCpu->iEffSeg    = X86_SREG_ES;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x27. */
+FNIEMOP_STUB(iemOp_daa);
+
+
+/** Opcode 0x28. */
+FNIEMOP_DEF(iemOp_sub_Eb_Gb)
+{
+    IEMOP_MNEMONIC("sub Eb,Gb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_sub);
+}
+
+
+/** Opcode 0x29. */
+FNIEMOP_DEF(iemOp_sub_Ev_Gv)
+{
+    IEMOP_MNEMONIC("sub Ev,Gv");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_sub);
+}
+
+
+/** Opcode 0x2a. */
+FNIEMOP_DEF(iemOp_sub_Gb_Eb)
+{
+    IEMOP_MNEMONIC("sub Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_sub);
+}
+
+
+/** Opcode 0x2b. */
+FNIEMOP_DEF(iemOp_sub_Gv_Ev)
+{
+    IEMOP_MNEMONIC("sub Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_sub);
+}
+
+
+/** Opcode 0x2c. */
+FNIEMOP_DEF(iemOp_sub_Al_Ib)
+{
+    IEMOP_MNEMONIC("sub al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_sub);
+}
+
+
+/** Opcode 0x2d. */
+FNIEMOP_DEF(iemOp_sub_eAX_Iz)
+{
+    IEMOP_MNEMONIC("sub rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_sub);
+}
+
+
+/** Opcode 0x2e. */
+FNIEMOP_DEF(iemOp_seg_CS)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SEG_CS;
+    pIemCpu->iEffSeg    = X86_SREG_CS;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x2f. */
+FNIEMOP_STUB(iemOp_das);
+
+
+/** Opcode 0x30. */
+FNIEMOP_DEF(iemOp_xor_Eb_Gb)
+{
+    IEMOP_MNEMONIC("xor Eb,Gb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_xor);
+}
+
+
+/** Opcode 0x31. */
+FNIEMOP_DEF(iemOp_xor_Ev_Gv)
+{
+    IEMOP_MNEMONIC("xor Ev,Gv");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_xor);
+}
+
+
+/** Opcode 0x32. */
+FNIEMOP_DEF(iemOp_xor_Gb_Eb)
+{
+    IEMOP_MNEMONIC("xor Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_xor);
+}
+
+
+/** Opcode 0x33. */
+FNIEMOP_DEF(iemOp_xor_Gv_Ev)
+{
+    IEMOP_MNEMONIC("xor Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_xor);
+}
+
+
+/** Opcode 0x34. */
+FNIEMOP_DEF(iemOp_xor_Al_Ib)
+{
+    IEMOP_MNEMONIC("xor al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_xor);
+}
+
+
+/** Opcode 0x35. */
+FNIEMOP_DEF(iemOp_xor_eAX_Iz)
+{
+    IEMOP_MNEMONIC("xor rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_xor);
+}
+
+
+/** Opcode 0x36. */
+FNIEMOP_DEF(iemOp_seg_SS)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SEG_SS;
+    pIemCpu->iEffSeg    = X86_SREG_SS;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x37. */
+FNIEMOP_STUB(iemOp_aaa);
+
+
+/** Opcode 0x38. */
+FNIEMOP_DEF(iemOp_cmp_Eb_Gb)
+{
+    IEMOP_MNEMONIC("cmp Eb,Gb");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo do we have to decode the whole instruction first?  */
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_cmp);
+}
+
+
+/** Opcode 0x39. */
+FNIEMOP_DEF(iemOp_cmp_Ev_Gv)
+{
+    IEMOP_MNEMONIC("cmp Ev,Gv");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo do we have to decode the whole instruction first?  */
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_cmp);
+}
+
+
+/** Opcode 0x3a. */
+FNIEMOP_DEF(iemOp_cmp_Gb_Eb)
+{
+    IEMOP_MNEMONIC("cmp Gb,Eb");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_r8_rm, &g_iemAImpl_cmp);
+}
+
+
+/** Opcode 0x3b. */
+FNIEMOP_DEF(iemOp_cmp_Gv_Ev)
+{
+    IEMOP_MNEMONIC("cmp Gv,Ev");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rv_rm, &g_iemAImpl_cmp);
+}
+
+
+/** Opcode 0x3c. */
+FNIEMOP_DEF(iemOp_cmp_Al_Ib)
+{
+    IEMOP_MNEMONIC("cmp al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_cmp);
+}
+
+
+/** Opcode 0x3d. */
+FNIEMOP_DEF(iemOp_cmp_eAX_Iz)
+{
+    IEMOP_MNEMONIC("cmp rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_cmp);
+}
+
+
+/** Opcode 0x3e. */
+FNIEMOP_DEF(iemOp_seg_DS)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SEG_DS;
+    pIemCpu->iEffSeg    = X86_SREG_DS;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x3f. */
+FNIEMOP_STUB(iemOp_aas);
+
+/**
+ * Common 'inc/dec/not/neg register' helper.
+ */
+FNIEMOP_DEF_2(iemOpCommonUnaryGReg, PCIEMOPUNARYSIZES, pImpl, uint8_t, iReg)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(2, 0);
+            IEM_MC_ARG(uint16_t *,  pu16Dst, 0);
+            IEM_MC_ARG(uint32_t *,  pEFlags, 1);
+            IEM_MC_REF_GREG_U16(pu16Dst, iReg);
+            IEM_MC_REF_EFLAGS(pEFlags);
+            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU16, pu16Dst, pEFlags);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(2, 0);
+            IEM_MC_ARG(uint32_t *,  pu32Dst, 0);
+            IEM_MC_ARG(uint32_t *,  pEFlags, 1);
+            IEM_MC_REF_GREG_U32(pu32Dst, iReg);
+            IEM_MC_REF_EFLAGS(pEFlags);
+            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU32, pu32Dst, pEFlags);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(2, 0);
+            IEM_MC_ARG(uint64_t *,  pu64Dst, 0);
+            IEM_MC_ARG(uint32_t *,  pEFlags, 1);
+            IEM_MC_REF_GREG_U64(pu64Dst, iReg);
+            IEM_MC_REF_EFLAGS(pEFlags);
+            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU64, pu64Dst, pEFlags);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x40. */
+FNIEMOP_DEF(iemOp_inc_eAX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eAX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xAX);
+}
+
+
+/** Opcode 0x41. */
+FNIEMOP_DEF(iemOp_inc_eCX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B;
+        pIemCpu->uRexB     = 1 << 3;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eCX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xCX);
+}
+
+
+/** Opcode 0x42. */
+FNIEMOP_DEF(iemOp_inc_eDX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_X;
+        pIemCpu->uRexIndex = 1 << 3;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eDX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xDX);
+}
+
+
+
+/** Opcode 0x43. */
+FNIEMOP_DEF(iemOp_inc_eBX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X;
+        pIemCpu->uRexB     = 1 << 3;
+        pIemCpu->uRexIndex = 1 << 3;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eBX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xBX);
+}
+
+
+/** Opcode 0x44. */
+FNIEMOP_DEF(iemOp_inc_eSP)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R;
+        pIemCpu->uRexReg   = 1 << 3;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eSP");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xSP);
+}
+
+
+/** Opcode 0x45. */
+FNIEMOP_DEF(iemOp_inc_eBP)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B;
+        pIemCpu->uRexReg   = 1 << 3;
+        pIemCpu->uRexB     = 1 << 3;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eBP");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xBP);
+}
+
+
+/** Opcode 0x46. */
+FNIEMOP_DEF(iemOp_inc_eSI)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_X;
+        pIemCpu->uRexReg   = 1 << 3;
+        pIemCpu->uRexIndex = 1 << 3;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eSI");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xSI);
+}
+
+
+/** Opcode 0x47. */
+FNIEMOP_DEF(iemOp_inc_eDI)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X;
+        pIemCpu->uRexReg   = 1 << 3;
+        pIemCpu->uRexB     = 1 << 3;
+        pIemCpu->uRexIndex = 1 << 3;
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("inc eDI");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_inc, X86_GREG_xDI);
+}
+
+
+/** Opcode 0x48. */
+FNIEMOP_DEF(iemOp_dec_eAX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_SIZE_REX_W;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eAX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xAX);
+}
+
+
+/** Opcode 0x49. */
+FNIEMOP_DEF(iemOp_dec_eCX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B | IEM_OP_PRF_SIZE_REX_W;
+        pIemCpu->uRexB     = 1 << 3;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eCX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xCX);
+}
+
+
+/** Opcode 0x4a. */
+FNIEMOP_DEF(iemOp_dec_eDX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
+        pIemCpu->uRexIndex = 1 << 3;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eDX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xDX);
+}
+
+
+/** Opcode 0x4b. */
+FNIEMOP_DEF(iemOp_dec_eBX)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
+        pIemCpu->uRexB     = 1 << 3;
+        pIemCpu->uRexIndex = 1 << 3;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eBX");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xBX);
+}
+
+
+/** Opcode 0x4c. */
+FNIEMOP_DEF(iemOp_dec_eSP)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_SIZE_REX_W;
+        pIemCpu->uRexReg   = 1 << 3;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eSP");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xSP);
+}
+
+
+/** Opcode 0x4d. */
+FNIEMOP_DEF(iemOp_dec_eBP)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_SIZE_REX_W;
+        pIemCpu->uRexReg   = 1 << 3;
+        pIemCpu->uRexB     = 1 << 3;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eBP");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xBP);
+}
+
+
+/** Opcode 0x4e. */
+FNIEMOP_DEF(iemOp_dec_eSI)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
+        pIemCpu->uRexReg   = 1 << 3;
+        pIemCpu->uRexIndex = 1 << 3;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eSI");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xSI);
+}
+
+
+/** Opcode 0x4f. */
+FNIEMOP_DEF(iemOp_dec_eDI)
+{
+    /*
+     * This is a REX prefix in 64-bit mode.
+     */
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        pIemCpu->fPrefixes |= IEM_OP_PRF_REX | IEM_OP_PRF_REX_R | IEM_OP_PRF_REX_B | IEM_OP_PRF_REX_X | IEM_OP_PRF_SIZE_REX_W;
+        pIemCpu->uRexReg   = 1 << 3;
+        pIemCpu->uRexB     = 1 << 3;
+        pIemCpu->uRexIndex = 1 << 3;
+        iemRecalEffOpSize(pIemCpu);
+
+        uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+        return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+    }
+
+    IEMOP_MNEMONIC("dec eDI");
+    return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, &g_iemAImpl_dec, X86_GREG_xDI);
+}
+
+
+/**
+ * Common 'push register' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonPushGReg, uint8_t, iReg)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        iReg |= pIemCpu->uRexB;
+        pIemCpu->enmDefOpSize = IEMMODE_64BIT;
+        pIemCpu->enmEffOpSize = !(pIemCpu->fPrefixes & IEM_OP_PRF_SIZE_OP) ? IEMMODE_64BIT : IEMMODE_16BIT;
+    }
+
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint16_t, u16Value);
+            IEM_MC_FETCH_GREG_U16(u16Value, iReg);
+            IEM_MC_PUSH_U16(u16Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint32_t, u32Value);
+            IEM_MC_FETCH_GREG_U32(u32Value, iReg);
+            IEM_MC_PUSH_U32(u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint64_t, u64Value);
+            IEM_MC_FETCH_GREG_U64(u64Value, iReg);
+            IEM_MC_PUSH_U64(u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x50. */
+FNIEMOP_DEF(iemOp_push_eAX)
+{
+    IEMOP_MNEMONIC("push rAX");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xAX);
+}
+
+
+/** Opcode 0x51. */
+FNIEMOP_DEF(iemOp_push_eCX)
+{
+    IEMOP_MNEMONIC("push rCX");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xCX);
+}
+
+
+/** Opcode 0x52. */
+FNIEMOP_DEF(iemOp_push_eDX)
+{
+    IEMOP_MNEMONIC("push rDX");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xDX);
+}
+
+
+/** Opcode 0x53. */
+FNIEMOP_DEF(iemOp_push_eBX)
+{
+    IEMOP_MNEMONIC("push rBX");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xBX);
+}
+
+
+/** Opcode 0x54. */
+FNIEMOP_DEF(iemOp_push_eSP)
+{
+    IEMOP_MNEMONIC("push rSP");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xSP);
+}
+
+
+/** Opcode 0x55. */
+FNIEMOP_DEF(iemOp_push_eBP)
+{
+    IEMOP_MNEMONIC("push rBP");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xBP);
+}
+
+
+/** Opcode 0x56. */
+FNIEMOP_DEF(iemOp_push_eSI)
+{
+    IEMOP_MNEMONIC("push rSI");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xSI);
+}
+
+
+/** Opcode 0x57. */
+FNIEMOP_DEF(iemOp_push_eDI)
+{
+    IEMOP_MNEMONIC("push rDI");
+    return FNIEMOP_CALL_1(iemOpCommonPushGReg, X86_GREG_xDI);
+}
+
+
+/**
+ * Common 'pop register' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonPopGReg, uint8_t, iReg)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    if (pIemCpu->enmCpuMode == IEMMODE_64BIT)
+    {
+        iReg |= pIemCpu->uRexB;
+        pIemCpu->enmDefOpSize = IEMMODE_64BIT;
+        pIemCpu->enmEffOpSize = !(pIemCpu->fPrefixes & IEM_OP_PRF_SIZE_OP) ? IEMMODE_64BIT : IEMMODE_16BIT;
+    }
+
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint16_t, *pu16Dst);
+            IEM_MC_REF_GREG_U16(pu16Dst, iReg);
+            IEM_MC_POP_U16(pu16Dst);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint32_t, *pu32Dst);
+            IEM_MC_REF_GREG_U32(pu32Dst, iReg);
+            IEM_MC_POP_U32(pu32Dst);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint64_t, *pu64Dst);
+            IEM_MC_REF_GREG_U64(pu64Dst, iReg);
+            IEM_MC_POP_U64(pu64Dst);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x58. */
+FNIEMOP_DEF(iemOp_pop_eAX)
+{
+    IEMOP_MNEMONIC("pop rAX");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xAX);
+}
+
+
+/** Opcode 0x59. */
+FNIEMOP_DEF(iemOp_pop_eCX)
+{
+    IEMOP_MNEMONIC("pop rCX");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xCX);
+}
+
+
+/** Opcode 0x5a. */
+FNIEMOP_DEF(iemOp_pop_eDX)
+{
+    IEMOP_MNEMONIC("pop rDX");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xDX);
+}
+
+
+/** Opcode 0x5b. */
+FNIEMOP_DEF(iemOp_pop_eBX)
+{
+    IEMOP_MNEMONIC("pop rBX");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xBX);
+}
+
+
+/** Opcode 0x5c. */
+FNIEMOP_DEF(iemOp_pop_eSP)
+{
+    IEMOP_MNEMONIC("pop rSP");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xSP);
+}
+
+
+/** Opcode 0x5d. */
+FNIEMOP_DEF(iemOp_pop_eBP)
+{
+    IEMOP_MNEMONIC("pop rBP");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xBP);
+}
+
+
+/** Opcode 0x5e. */
+FNIEMOP_DEF(iemOp_pop_eSI)
+{
+    IEMOP_MNEMONIC("pop rSI");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xSI);
+}
+
+
+/** Opcode 0x5f. */
+FNIEMOP_DEF(iemOp_pop_eDI)
+{
+    IEMOP_MNEMONIC("pop rDI");
+    return FNIEMOP_CALL_1(iemOpCommonPopGReg, X86_GREG_xDI);
+}
+
+
+/** Opcode 0x60. */
+FNIEMOP_DEF(iemOp_pusha)
+{
+    IEMOP_MNEMONIC("pusha");
+    IEMOP_HLP_NO_64BIT();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_pusha_16);
+    Assert(pIemCpu->enmEffOpSize == IEMMODE_32BIT);
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_pusha_32);
+}
+
+
+/** Opcode 0x61. */
+FNIEMOP_DEF(iemOp_popa)
+{
+    IEMOP_MNEMONIC("popa");
+    IEMOP_HLP_NO_64BIT();
+    if (pIemCpu->enmEffOpSize == IEMMODE_16BIT)
+        return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_16);
+    Assert(pIemCpu->enmEffOpSize == IEMMODE_32BIT);
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_popa_32);
+}
+
+
+/** Opcode 0x62. */
+FNIEMOP_STUB(iemOp_bound_Gv_Ma);
+/** Opcode 0x63. */
+FNIEMOP_STUB(iemOp_arpl_Ew_Gw);
+
+
+/** Opcode 0x64. */
+FNIEMOP_DEF(iemOp_seg_FS)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SEG_FS;
+    pIemCpu->iEffSeg    = X86_SREG_FS;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x65. */
+FNIEMOP_DEF(iemOp_seg_GS)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SEG_GS;
+    pIemCpu->iEffSeg    = X86_SREG_GS;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x66. */
+FNIEMOP_DEF(iemOp_op_size)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SIZE_OP;
+    iemRecalEffOpSize(pIemCpu);
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x67. */
+FNIEMOP_DEF(iemOp_addr_size)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_SIZE_ADDR;
+    switch (pIemCpu->enmDefAddrMode)
+    {
+        case IEMMODE_16BIT: pIemCpu->enmEffAddrMode = IEMMODE_32BIT; break;
+        case IEMMODE_32BIT: pIemCpu->enmEffAddrMode = IEMMODE_16BIT; break;
+        case IEMMODE_64BIT: pIemCpu->enmEffAddrMode = IEMMODE_32BIT; break;
+        default: AssertFailed();
+    }
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0x68. */
+FNIEMOP_DEF(iemOp_push_Iz)
+{
+    IEMOP_MNEMONIC("push Iz");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_PUSH_U16(u16Imm);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_PUSH_U32(u32Imm);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_PUSH_U64(u64Imm);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0x69. */
+FNIEMOP_DEF(iemOp_imul_Gv_Ev_Iz)
+{
+    IEMOP_MNEMONIC("imul Gv,Ev,Iz"); /* Gv = Ev * Iz; */
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register operand */
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ u16Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+                IEM_MC_LOCAL(uint16_t,      u16Tmp);
+
+                IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory operand */
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ u16Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+                IEM_MC_LOCAL(uint16_t,      u16Tmp);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U16(u16Tmp, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register operand */
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ u32Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+                IEM_MC_LOCAL(uint32_t,      u32Tmp);
+
+                IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory operand */
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ u32Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+                IEM_MC_LOCAL(uint32_t,      u32Tmp);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U32(u32Tmp, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register operand */
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ u64Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+                IEM_MC_LOCAL(uint64_t,      u64Tmp);
+
+                IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory operand */
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ u64Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+                IEM_MC_LOCAL(uint64_t,      u64Tmp);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U64(u64Tmp, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            return VINF_SUCCESS;
+        }
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+}
+
+
+/** Opcode 0x6a. */
+FNIEMOP_DEF(iemOp_push_Ib)
+{
+    IEMOP_MNEMONIC("push Ib");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0,0);
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_PUSH_U16(i8Imm);
+            break;
+        case IEMMODE_32BIT:
+            IEM_MC_PUSH_U32(i8Imm);
+            break;
+        case IEMMODE_64BIT:
+            IEM_MC_PUSH_U64(i8Imm);
+            break;
+    }
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x6b. */
+FNIEMOP_DEF(iemOp_imul_Gv_Ev_Ib)
+{
+    IEMOP_MNEMONIC("imul Gv,Ev,Ib"); /* Gv = Ev * Iz; */
+    uint8_t bRm;   IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register operand */
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ (int8_t)u8Imm, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+                IEM_MC_LOCAL(uint16_t,      u16Tmp);
+
+                IEM_MC_FETCH_GREG_U16(u16Tmp, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory operand */
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/ (int8_t)u8Imm, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+                IEM_MC_LOCAL(uint16_t,      u16Tmp);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U16(u16Tmp, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_LOCAL(pu16Dst, u16Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u16, pu16Dst, u16Src, pEFlags);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register operand */
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ (int8_t)u8Imm, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+                IEM_MC_LOCAL(uint32_t,      u32Tmp);
+
+                IEM_MC_FETCH_GREG_U32(u32Tmp, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory operand */
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/ (int8_t)u8Imm, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+                IEM_MC_LOCAL(uint32_t,      u32Tmp);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U32(u32Tmp, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_LOCAL(pu32Dst, u32Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u32, pu32Dst, u32Src, pEFlags);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register operand */
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ (int8_t)u8Imm, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+                IEM_MC_LOCAL(uint64_t,      u64Tmp);
+
+                IEM_MC_FETCH_GREG_U64(u64Tmp, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory operand */
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/ (int8_t)u8Imm, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+                IEM_MC_LOCAL(uint64_t,      u64Tmp);
+                IEM_MC_LOCAL(RTGCPTR,  GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U64(u64Tmp, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_LOCAL(pu64Dst, u64Tmp);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_imul_two_u64, pu64Dst, u64Src, pEFlags);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Tmp);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            return VINF_SUCCESS;
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+}
+
+
+/** Opcode 0x6c. */
+FNIEMOP_DEF(iemOp_insb_Yb_DX)
+{
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    /*
+     * Use the C implementation if a repeate prefix is encountered.
+     */
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
+    {
+        IEMOP_MNEMONIC("rep ins Yb,DX");
+        switch (pIemCpu->enmEffAddrMode)
+        {
+            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op8_addr16);
+            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op8_addr32);
+            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op8_addr64);
+        }
+    }
+    else
+    {
+        IEMOP_MNEMONIC("ins Yb,DX");
+        switch (pIemCpu->enmEffAddrMode)
+        {
+            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op8_addr16);
+            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op8_addr32);
+            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op8_addr64);
+        }
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+}
+
+
+/** Opcode 0x6d. */
+FNIEMOP_DEF(iemOp_inswd_Yv_DX)
+{
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
+    {
+        IEMOP_MNEMONIC("rep ins Yv,DX");
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op16_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op16_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op16_addr64);
+                }
+                break;
+            case IEMMODE_64BIT:
+            case IEMMODE_32BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op32_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op32_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op32_addr64);
+                }
+                break;
+        }
+    }
+    else
+    {
+        IEMOP_MNEMONIC("ins Yv,DX");
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op16_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op16_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op16_addr64);
+                }
+                break;
+            case IEMMODE_64BIT:
+            case IEMMODE_32BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op32_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op32_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op32_addr64);
+                }
+                break;
+        }
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+}
+
+
+/** Opcode 0x6e. */
+FNIEMOP_DEF(iemOp_outsb_Yb_DX)
+{
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    /*
+     * Use the C implementation if a repeate prefix is encountered.
+     */
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
+    {
+        IEMOP_MNEMONIC("rep out DX,Yb");
+        switch (pIemCpu->enmEffAddrMode)
+        {
+            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op8_addr16);
+            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op8_addr32);
+            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op8_addr64);
+        }
+    }
+    else
+    {
+        IEMOP_MNEMONIC("out DX,Yb");
+        switch (pIemCpu->enmEffAddrMode)
+        {
+            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op8_addr16);
+            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op8_addr32);
+            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op8_addr64);
+        }
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+}
+
+
+/** Opcode 0x6f. */
+FNIEMOP_DEF(iemOp_outswd_Yv_DX)
+{
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
+    {
+        IEMOP_MNEMONIC("rep outs DX,Yv");
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op16_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op16_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op16_addr64);
+                }
+                break;
+            case IEMMODE_64BIT:
+            case IEMMODE_32BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op32_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op32_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op32_addr64);
+                }
+                break;
+        }
+    }
+    else
+    {
+        IEMOP_MNEMONIC("outs DX,Yv");
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op16_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op16_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op16_addr64);
+                }
+                break;
+            case IEMMODE_64BIT:
+            case IEMMODE_32BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op32_addr16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op32_addr32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op32_addr64);
+                }
+                break;
+        }
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+}
+
+
+/** Opcode 0x70. */
+FNIEMOP_DEF(iemOp_jo_Jb)
+{
+    IEMOP_MNEMONIC("jo  Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x71. */
+FNIEMOP_DEF(iemOp_jno_Jb)
+{
+    IEMOP_MNEMONIC("jno Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_OF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+/** Opcode 0x72. */
+FNIEMOP_DEF(iemOp_jc_Jb)
+{
+    IEMOP_MNEMONIC("jc/jnae Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x73. */
+FNIEMOP_DEF(iemOp_jnc_Jb)
+{
+    IEMOP_MNEMONIC("jnc/jnb Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_CF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x74. */
+FNIEMOP_DEF(iemOp_je_Jb)
+{
+    IEMOP_MNEMONIC("je/jz   Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x75. */
+FNIEMOP_DEF(iemOp_jne_Jb)
+{
+    IEMOP_MNEMONIC("jne/jnz Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_ZF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x76. */
+FNIEMOP_DEF(iemOp_jbe_Jb)
+{
+    IEMOP_MNEMONIC("jbe/jna Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x77. */
+FNIEMOP_DEF(iemOp_jnbe_Jb)
+{
+    IEMOP_MNEMONIC("jnbe/ja Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_ANY_BITS_SET(X86_EFL_CF | X86_EFL_ZF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x78. */
+FNIEMOP_DEF(iemOp_js_Jb)
+{
+    IEMOP_MNEMONIC("js  Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x79. */
+FNIEMOP_DEF(iemOp_jns_Jb)
+{
+    IEMOP_MNEMONIC("jns Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_SF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x7a. */
+FNIEMOP_DEF(iemOp_jp_Jb)
+{
+    IEMOP_MNEMONIC("jp  Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x7b. */
+FNIEMOP_DEF(iemOp_jnp_Jb)
+{
+    IEMOP_MNEMONIC("jnp Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET(X86_EFL_PF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x7c. */
+FNIEMOP_DEF(iemOp_jl_Jb)
+{
+    IEMOP_MNEMONIC("jl/jnge Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x7d. */
+FNIEMOP_DEF(iemOp_jnl_Jb)
+{
+    IEMOP_MNEMONIC("jnl/jge Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BITS_NE(X86_EFL_SF, X86_EFL_OF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x7e. */
+FNIEMOP_DEF(iemOp_jle_Jb)
+{
+    IEMOP_MNEMONIC("jle/jng Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ELSE() {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x7f. */
+FNIEMOP_DEF(iemOp_jnle_Jb)
+{
+    IEMOP_MNEMONIC("jnle/jg Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(X86_EFL_ZF, X86_EFL_SF, X86_EFL_OF) {
+        IEM_MC_ADVANCE_RIP();
+    } IEM_MC_ELSE() {
+        IEM_MC_REL_JMP_S8(i8Imm);
+    } IEM_MC_ENDIF();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x80. */
+FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_80)
+{
+    uint8_t bRm;   IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Eb,Ib");
+    PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register target */
+        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
+        IEM_MC_ARG_CONST(uint8_t,   u8Src, /*=*/ u8Imm,     1);
+        IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory target */
+        uint32_t fAccess;
+        if (pImpl->pfnLockedU8)
+            fAccess = IEM_ACCESS_DATA_RW;
+        else
+        {   /* CMP */
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            fAccess = IEM_ACCESS_DATA_R;
+        }
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,                 0);
+        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+        IEM_MC_ARG_CONST(uint8_t,   u8Src, /*=*/ u8Imm,     1);
+
+        IEM_MC_MEM_MAP(pu8Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, u8Src, pEFlags);
+        else
+            IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU8, pu8Dst, u8Src, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, fAccess);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x81. */
+FNIEMOP_DEF(iemOp_Grp1_Ev_Iz)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Ev,Iz");
+    PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
+
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register target */
+                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src, /*=*/ u16Imm,   1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory target */
+                uint32_t fAccess;
+                if (pImpl->pfnLockedU16)
+                    fAccess = IEM_ACCESS_DATA_RW;
+                else
+                {   /* CMP, TEST */
+                    IEMOP_HLP_NO_LOCK_PREFIX();
+                    fAccess = IEM_ACCESS_DATA_R;
+                }
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG(uint16_t,        u16Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+                IEM_MC_ASSIGN(u16Src, u16Imm);
+                IEM_MC_MEM_MAP(pu16Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            break;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register target */
+                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src, /*=*/ u32Imm,   1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory target */
+                uint32_t fAccess;
+                if (pImpl->pfnLockedU32)
+                    fAccess = IEM_ACCESS_DATA_RW;
+                else
+                {   /* CMP, TEST */
+                    IEMOP_HLP_NO_LOCK_PREFIX();
+                    fAccess = IEM_ACCESS_DATA_R;
+                }
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG(uint32_t,        u32Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+                IEM_MC_ASSIGN(u32Src, u32Imm);
+                IEM_MC_MEM_MAP(pu32Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            break;
+        }
+
+        case IEMMODE_64BIT:
+        {
+            if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+            {
+                /* register target */
+                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src, /*=*/ u64Imm,   1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            else
+            {
+                /* memory target */
+                uint32_t fAccess;
+                if (pImpl->pfnLockedU64)
+                    fAccess = IEM_ACCESS_DATA_RW;
+                else
+                {   /* CMP */
+                    IEMOP_HLP_NO_LOCK_PREFIX();
+                    fAccess = IEM_ACCESS_DATA_R;
+                }
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG(uint64_t,        u64Src,                 1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+                IEM_MC_ASSIGN(u64Src, u64Imm);
+                IEM_MC_MEM_MAP(pu64Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+            }
+            break;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x82. */
+    FNIEMOP_DEF(iemOp_Grp1_Eb_Ib_82)
+{
+    IEMOP_HLP_NO_64BIT(); /** @todo do we need to decode the whole instruction or is this ok? */
+    return FNIEMOP_CALL(iemOp_Grp1_Eb_Ib_80);
+}
+
+
+/** Opcode 0x83. */
+FNIEMOP_DEF(iemOp_Grp1_Ev_Ib)
+{
+    uint8_t bRm;   IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_MNEMONIC2("add\0or\0\0adc\0sbb\0and\0sub\0xor\0cmp" + ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)*4, "Ev,Ib");
+    PCIEMOPBINSIZES pImpl = g_apIemImplGrp1[(bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK];
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /*
+         * Register target
+         */
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src, /*=*/ (int8_t)u8Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+            }
+
+            case IEMMODE_32BIT:
+            {
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src, /*=*/ (int8_t)u8Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+            }
+
+            case IEMMODE_64BIT:
+            {
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src, /*=*/ (int8_t)u8Imm,1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                    2);
+
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * Memory target.
+         */
+        uint32_t fAccess;
+        if (pImpl->pfnLockedU16)
+            fAccess = IEM_ACCESS_DATA_RW;
+        else
+        {   /* CMP */
+            IEMOP_HLP_NO_LOCK_PREFIX();
+            fAccess = IEM_ACCESS_DATA_R;
+        }
+
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                    0);
+                IEM_MC_ARG(uint16_t,        u16Src,                     1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,            2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+                IEM_MC_ASSIGN(u16Src, (int8_t)u8Imm);
+                IEM_MC_MEM_MAP(pu16Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, u16Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+            }
+
+            case IEMMODE_32BIT:
+            {
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                    0);
+                IEM_MC_ARG(uint32_t,        u32Src,                     1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,            2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+                IEM_MC_ASSIGN(u32Src, (int8_t)u8Imm);
+                IEM_MC_MEM_MAP(pu32Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, u32Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+            }
+
+            case IEMMODE_64BIT:
+            {
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                    0);
+                IEM_MC_ARG(uint64_t,        u64Src,                     1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,            2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+                IEM_MC_ASSIGN(u64Src, (int8_t)u8Imm);
+                IEM_MC_MEM_MAP(pu64Dst, fAccess, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, u64Src, pEFlags);
+                else
+                    IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnLockedU64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, fAccess);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+            }
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x84. */
+FNIEMOP_DEF(iemOp_test_Eb_Gb)
+{
+    IEMOP_MNEMONIC("test Eb,Gb");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo do we have to decode the whole instruction first?  */
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_r8, &g_iemAImpl_test);
+}
+
+
+/** Opcode 0x85. */
+FNIEMOP_DEF(iemOp_test_Ev_Gv)
+{
+    IEMOP_MNEMONIC("test Ev,Gv");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo do we have to decode the whole instruction first?  */
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rm_rv, &g_iemAImpl_test);
+}
+
+
+/** Opcode 0x86. */
+FNIEMOP_STUB(iemOp_xchg_Eb_Gb);
+/** Opcode 0x87. */
+FNIEMOP_STUB(iemOp_xchg_Ev_Gv);
+
+
+/** Opcode 0x88. */
+FNIEMOP_DEF(iemOp_mov_Eb_Gb)
+{
+    IEMOP_MNEMONIC("mov Eb,Gb");
+
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint8_t, u8Value);
+        IEM_MC_FETCH_GREG_U8(u8Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+        IEM_MC_STORE_GREG_U8((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u8Value);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * We're writing a register to memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint8_t, u8Value);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_FETCH_GREG_U8(u8Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+        IEM_MC_STORE_MEM_U8(pIemCpu->iEffSeg, GCPtrEffDst, u8Value);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+
+}
+
+
+/** Opcode 0x89. */
+FNIEMOP_DEF(iemOp_mov_Ev_Gv)
+{
+    IEMOP_MNEMONIC("mov Ev,Gv");
+
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_FETCH_GREG_U16(u16Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_FETCH_GREG_U32(u32Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_FETCH_GREG_U64(u64Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    else
+    {
+        /*
+         * We're writing a register to memory.
+         */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_GREG_U16(u16Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_STORE_MEM_U16(pIemCpu->iEffSeg, GCPtrEffDst, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_GREG_U32(u32Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_STORE_MEM_U32(pIemCpu->iEffSeg, GCPtrEffDst, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_GREG_U64(u64Value, ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg);
+                IEM_MC_STORE_MEM_U64(pIemCpu->iEffSeg, GCPtrEffDst, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x8a. */
+FNIEMOP_DEF(iemOp_mov_Gb_Eb)
+{
+    IEMOP_MNEMONIC("mov Gb,Eb");
+
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(uint8_t, u8Value);
+        IEM_MC_FETCH_GREG_U8(u8Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u8Value);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * We're loading a register from memory.
+         */
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint8_t, u8Value);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_FETCH_MEM_U8(u8Value, pIemCpu->iEffSeg, GCPtrEffDst);
+        IEM_MC_STORE_GREG_U8(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u8Value);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x8b. */
+FNIEMOP_DEF(iemOp_mov_Gv_Ev)
+{
+    IEMOP_MNEMONIC("mov Gv,Ev");
+
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_FETCH_GREG_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_FETCH_GREG_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_FETCH_GREG_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    else
+    {
+        /*
+         * We're loading a register from memory.
+         */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+Log(("GCPtrEffDst=%RGv\n", GCPtrEffDst));
+                IEM_MC_FETCH_MEM_U16(u16Value, pIemCpu->iEffSeg, GCPtrEffDst);
+Log(("u16Value=%#x\n", u16Value));
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U32(u32Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U64(u64Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x8c. */
+FNIEMOP_DEF(iemOp_mov_Ev_Sw)
+{
+    IEMOP_MNEMONIC("mov Ev,Sw");
+
+    uint8_t bRm;
+    IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * Check that the destination register exists. The REX.R prefix is ignored.
+     */
+    uint8_t const iSegReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+    if (   iSegReg > X86_SREG_GS)
+        return IEMOP_RAISE_INVALID_OPCODE(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     * In that case, the operand size is respected and the upper bits are
+     * cleared (starting with some pentium).
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_FETCH_SREG_U16(u16Value, iSegReg);
+                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_FETCH_SREG_U32_ZX(u32Value, iSegReg);
+                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_FETCH_SREG_U64_ZX(u64Value, iSegReg);
+                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                break;
+        }
+    }
+    else
+    {
+        /*
+         * We're saving the register to memory.  The access is word sized
+         * regardless of operand size prefixes.
+         */
+#if 0 /* not necessary */
+        pIemCpu->enmEffOpSize = pIemCpu->enmDefOpSize = IEMMODE_16BIT;
+#endif
+        IEM_MC_BEGIN(0, 2);
+        IEM_MC_LOCAL(uint16_t,  u16Value);
+        IEM_MC_LOCAL(RTGCPTR,   GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_FETCH_SREG_U16(u16Value, iSegReg);
+        IEM_MC_STORE_MEM_U16(pIemCpu->iEffSeg, GCPtrEffDst, u16Value);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+
+
+/** Opcode 0x8d. */
+FNIEMOP_DEF(iemOp_lea_Gv_M)
+{
+    IEMOP_MNEMONIC("lea Gv,M");
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        return IEMOP_RAISE_INVALID_LOCK_PREFIX(); /* no register form */
+
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, GCPtrEffSrc);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, GCPtrEffSrc);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, GCPtrEffSrc);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_5);
+}
+
+
+/** Opcode 0x8e. */
+FNIEMOP_DEF(iemOp_mov_Sw_Ev)
+{
+    IEMOP_MNEMONIC("mov Sw,Ev");
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * The practical operand size is 16-bit.
+     */
+#if 0 /* not necessary */
+    pIemCpu->enmEffOpSize = pIemCpu->enmDefOpSize = IEMMODE_16BIT;
+#endif
+
+    /*
+     * Check that the destination register exists and can be used with this
+     * instruction.  The REX.R prefix is ignored.
+     */
+    uint8_t const iSegReg = ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK);
+    if (   iSegReg == X86_SREG_CS
+        || iSegReg > X86_SREG_GS)
+        return IEMOP_RAISE_INVALID_OPCODE(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0);
+        IEM_MC_ARG(uint16_t,      u16Value,          1);
+        IEM_MC_FETCH_GREG_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_CALL_CIMPL_2(iemCImpl_LoadSReg, iSRegArg, u16Value);
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * We're loading the register from memory.  The access is word sized
+         * regardless of operand size prefixes.
+         */
+        IEM_MC_BEGIN(2, 1);
+        IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0);
+        IEM_MC_ARG(uint16_t,      u16Value,          1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_FETCH_MEM_U16(u16Value, pIemCpu->iEffSeg, GCPtrEffDst);
+        IEM_MC_CALL_CIMPL_2(iemCImpl_LoadSReg, iSRegArg, u16Value);
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x8f. */
+FNIEMOP_STUB(iemOp_pop_Ev);
+
+
+/**
+ * Common 'xchg reg,rAX' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonXchgGRegRax, uint8_t, iReg)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    iReg |= pIemCpu->uRexB;
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint16_t, u16Tmp1);
+            IEM_MC_LOCAL(uint16_t, u16Tmp2);
+            IEM_MC_FETCH_GREG_U16(u16Tmp1, iReg);
+            IEM_MC_FETCH_GREG_U16(u16Tmp2, X86_GREG_xAX);
+            IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Tmp1);
+            IEM_MC_STORE_GREG_U16(iReg,         u16Tmp2);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint32_t, u32Tmp1);
+            IEM_MC_LOCAL(uint32_t, u32Tmp2);
+            IEM_MC_FETCH_GREG_U32(u32Tmp1, iReg);
+            IEM_MC_FETCH_GREG_U32(u32Tmp2, X86_GREG_xAX);
+            IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u32Tmp1);
+            IEM_MC_STORE_GREG_U32(iReg,         u32Tmp2);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint64_t, u64Tmp1);
+            IEM_MC_LOCAL(uint64_t, u64Tmp2);
+            IEM_MC_FETCH_GREG_U64(u64Tmp1, iReg);
+            IEM_MC_FETCH_GREG_U64(u64Tmp2, X86_GREG_xAX);
+            IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u64Tmp1);
+            IEM_MC_STORE_GREG_U64(iReg,         u64Tmp2);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0x90. */
+FNIEMOP_DEF(iemOp_nop)
+{
+    /* R8/R8D and RAX/EAX can be exchanged. */
+    if (pIemCpu->fPrefixes & IEM_OP_PRF_REX_B)
+    {
+        IEMOP_MNEMONIC("xchg r8,rAX");
+        return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xAX);
+    }
+
+    if (pIemCpu->fPrefixes & IEM_OP_PRF_LOCK)
+        IEMOP_MNEMONIC("pause");
+    else
+        IEMOP_MNEMONIC("nop");
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0x91. */
+FNIEMOP_DEF(iemOp_xchg_eCX_eAX)
+{
+    IEMOP_MNEMONIC("xchg rCX,rAX");
+    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xCX);
+}
+
+
+/** Opcode 0x92. */
+FNIEMOP_DEF(iemOp_xchg_eDX_eAX)
+{
+    IEMOP_MNEMONIC("xchg rDX,rAX");
+    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xDX);
+}
+
+
+/** Opcode 0x93. */
+FNIEMOP_DEF(iemOp_xchg_eBX_eAX)
+{
+    IEMOP_MNEMONIC("xchg rBX,rAX");
+    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xBX);
+}
+
+
+/** Opcode 0x94. */
+FNIEMOP_DEF(iemOp_xchg_eSP_eAX)
+{
+    IEMOP_MNEMONIC("xchg rSX,rAX");
+    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xSP);
+}
+
+
+/** Opcode 0x95. */
+FNIEMOP_DEF(iemOp_xchg_eBP_eAX)
+{
+    IEMOP_MNEMONIC("xchg rBP,rAX");
+    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xBP);
+}
+
+
+/** Opcode 0x96. */
+FNIEMOP_DEF(iemOp_xchg_eSI_eAX)
+{
+    IEMOP_MNEMONIC("xchg rSI,rAX");
+    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xSI);
+}
+
+
+/** Opcode 0x97. */
+FNIEMOP_DEF(iemOp_xchg_eDI_eAX)
+{
+    IEMOP_MNEMONIC("xchg rDI,rAX");
+    return FNIEMOP_CALL_1(iemOpCommonXchgGRegRax, X86_GREG_xDI);
+}
+
+
+/** Opcode 0x98. */
+FNIEMOP_STUB(iemOp_cbw);
+/** Opcode 0x99. */
+FNIEMOP_STUB(iemOp_cwd);
+/** Opcode 0x9a. */
+FNIEMOP_STUB(iemOp_call_Ap);
+/** Opcode 0x9b. */
+FNIEMOP_STUB(iemOp_wait);
+
+
+/** Opcode 0x9c. */
+FNIEMOP_DEF(iemOp_pushf_Fv)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_pushf, pIemCpu->enmEffOpSize);
+}
+
+
+/** Opcode 0x9d. */
+FNIEMOP_DEF(iemOp_popf_Fv)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_popf, pIemCpu->enmEffOpSize);
+}
+
+
+/** Opcode 0x9e. */
+FNIEMOP_STUB(iemOp_sahf);
+/** Opcode 0x9f. */
+FNIEMOP_STUB(iemOp_lahf);
+
+/**
+ * Macro used by iemOp_mov_Al_Ob, iemOp_mov_rAX_Ov, iemOp_mov_Ob_AL and
+ * iemOp_mov_Ov_rAX to fetch the moffsXX bit of the opcode and fend of lock
+ * prefixes.  Will return on failures.
+ * @param   a_GCPtrMemOff   The variable to store the offset in.
+ */
+#define IEMOP_FETCH_MOFFS_XX(a_GCPtrMemOff) \
+    do \
+    { \
+        switch (pIemCpu->enmEffAddrMode) \
+        { \
+            case IEMMODE_16BIT: \
+            { \
+                uint16_t u16Off; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Off); \
+                (a_GCPtrMemOff) = u16Off; \
+                break; \
+            } \
+            case IEMMODE_32BIT: \
+            { \
+                uint32_t u32Off; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Off); \
+                (a_GCPtrMemOff) = u32Off; \
+                break; \
+            } \
+            case IEMMODE_64BIT: \
+                IEM_OPCODE_GET_NEXT_U64(pIemCpu, &(a_GCPtrMemOff)); \
+                break; \
+            IEM_NOT_REACHED_DEFAULT_CASE_RET(); \
+        } \
+        IEMOP_HLP_NO_LOCK_PREFIX(); \
+    } while (0)
+
+/** Opcode 0xa0. */
+FNIEMOP_DEF(iemOp_mov_Al_Ob)
+{
+    /*
+     * Get the offset and fend of lock prefixes.
+     */
+    RTGCPTR GCPtrMemOff;
+    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
+
+    /*
+     * Fetch AL.
+     */
+    IEM_MC_BEGIN(0,1);
+    IEM_MC_LOCAL(uint8_t, u8Tmp);
+    IEM_MC_FETCH_MEM_U8(u8Tmp, pIemCpu->iEffSeg, GCPtrMemOff);
+    IEM_MC_STORE_GREG_U8(X86_GREG_xAX, u8Tmp);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xa1. */
+FNIEMOP_DEF(iemOp_mov_rAX_Ov)
+{
+    /*
+     * Get the offset and fend of lock prefixes.
+     */
+    RTGCPTR GCPtrMemOff;
+    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
+
+    /*
+     * Fetch rAX.
+     */
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0,1);
+            IEM_MC_LOCAL(uint16_t, u16Tmp);
+            IEM_MC_FETCH_MEM_U16(u16Tmp, pIemCpu->iEffSeg, GCPtrMemOff);
+            IEM_MC_STORE_GREG_U16(X86_GREG_xAX, u16Tmp);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0,1);
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_MEM_U32(u32Tmp, pIemCpu->iEffSeg, GCPtrMemOff);
+            IEM_MC_STORE_GREG_U32(X86_GREG_xAX, u32Tmp);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0,1);
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_MEM_U64(u64Tmp, pIemCpu->iEffSeg, GCPtrMemOff);
+            IEM_MC_STORE_GREG_U64(X86_GREG_xAX, u64Tmp);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xa2. */
+FNIEMOP_DEF(iemOp_mov_Ob_AL)
+{
+    /*
+     * Get the offset and fend of lock prefixes.
+     */
+    RTGCPTR GCPtrMemOff;
+    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
+
+    /*
+     * Store AL.
+     */
+    IEM_MC_BEGIN(0,1);
+    IEM_MC_LOCAL(uint8_t, u8Tmp);
+    IEM_MC_FETCH_GREG_U8(u8Tmp, X86_GREG_xAX);
+    IEM_MC_STORE_MEM_U8(pIemCpu->iEffSeg, GCPtrMemOff, u8Tmp);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xa3. */
+FNIEMOP_DEF(iemOp_mov_Ov_rAX)
+{
+    /*
+     * Get the offset and fend of lock prefixes.
+     */
+    RTGCPTR GCPtrMemOff;
+    IEMOP_FETCH_MOFFS_XX(GCPtrMemOff);
+
+    /*
+     * Store rAX.
+     */
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0,1);
+            IEM_MC_LOCAL(uint16_t, u16Tmp);
+            IEM_MC_FETCH_GREG_U16(u16Tmp, X86_GREG_xAX);
+            IEM_MC_STORE_MEM_U16(pIemCpu->iEffSeg, GCPtrMemOff, u16Tmp);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0,1);
+            IEM_MC_LOCAL(uint32_t, u32Tmp);
+            IEM_MC_FETCH_GREG_U32(u32Tmp, X86_GREG_xAX);
+            IEM_MC_STORE_MEM_U32(pIemCpu->iEffSeg, GCPtrMemOff, u32Tmp);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0,1);
+            IEM_MC_LOCAL(uint64_t, u64Tmp);
+            IEM_MC_FETCH_GREG_U64(u64Tmp, X86_GREG_xAX);
+            IEM_MC_STORE_MEM_U64(pIemCpu->iEffSeg, GCPtrMemOff, u64Tmp);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+/** Macro used by iemOp_movsb_Xb_Yb and iemOp_movswd_Xv_Yv */
+#define IEM_MOVS_CASE(ValBits, AddrBits) \
+        IEM_MC_BEGIN(0, 2); \
+        IEM_MC_LOCAL(uint##ValBits##_t, uValue); \
+        IEM_MC_LOCAL(uint##AddrBits##_t, uAddr); \
+        IEM_MC_FETCH_GREG_U##AddrBits(uAddr, X86_GREG_xSI); \
+        IEM_MC_FETCH_MEM_U##ValBits(uValue, pIemCpu->iEffSeg, uAddr); \
+        IEM_MC_FETCH_GREG_U##AddrBits(uAddr, X86_GREG_xDI); \
+        IEM_MC_STORE_MEM_U##ValBits(X86_SREG_ES, uAddr, uValue); \
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_DF) { \
+            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
+            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
+        } IEM_MC_ELSE() { \
+            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
+            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xSI, ValBits / 8); \
+        } IEM_MC_ENDIF(); \
+        IEM_MC_ADVANCE_RIP(); \
+        IEM_MC_END(); \
+
+
+/** Opcode 0xa4. */
+FNIEMOP_DEF(iemOp_movsb_Xb_Yb)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    /*
+     * Use the C implementation if a repeate prefix is encountered.
+     */
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
+    {
+        IEMOP_MNEMONIC("rep movsb Xb,Yb");
+        switch (pIemCpu->enmEffAddrMode)
+        {
+            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op8_addr16, pIemCpu->iEffSeg);
+            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op8_addr32, pIemCpu->iEffSeg);
+            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op8_addr64, pIemCpu->iEffSeg);
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    IEMOP_MNEMONIC("movsb Xb,Yb");
+
+    /*
+     * Sharing case implementation with movs[wdq] below.
+     */
+    switch (pIemCpu->enmEffAddrMode)
+    {
+        case IEMMODE_16BIT: IEM_MOVS_CASE(8, 16); break;
+        case IEMMODE_32BIT: IEM_MOVS_CASE(8, 32); break;
+        case IEMMODE_64BIT: IEM_MOVS_CASE(8, 64); break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xa5. */
+FNIEMOP_DEF(iemOp_movswd_Xv_Yv)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    /*
+     * Use the C implementation if a repeate prefix is encountered.
+     */
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
+    {
+        IEMOP_MNEMONIC("rep movs Xv,Yv");
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op16_addr16, pIemCpu->iEffSeg);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op16_addr32, pIemCpu->iEffSeg);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op16_addr64, pIemCpu->iEffSeg);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                }
+                break;
+            case IEMMODE_32BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op32_addr16, pIemCpu->iEffSeg);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op32_addr32, pIemCpu->iEffSeg);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op32_addr64, pIemCpu->iEffSeg);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                }
+            case IEMMODE_64BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op64_addr32, pIemCpu->iEffSeg);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_rep_movs_op64_addr64, pIemCpu->iEffSeg);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                }
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    IEMOP_MNEMONIC("movs Xv,Yv");
+
+    /*
+     * Annoying double switch here.
+     * Using ugly macro for implementing the cases, sharing it with movsb.
+     */
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            switch (pIemCpu->enmEffAddrMode)
+            {
+                case IEMMODE_16BIT: IEM_MOVS_CASE(16, 16); break;
+                case IEMMODE_32BIT: IEM_MOVS_CASE(16, 32); break;
+                case IEMMODE_64BIT: IEM_MOVS_CASE(16, 64); break;
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+            break;
+
+        case IEMMODE_32BIT:
+            switch (pIemCpu->enmEffAddrMode)
+            {
+                case IEMMODE_16BIT: IEM_MOVS_CASE(32, 16); break;
+                case IEMMODE_32BIT: IEM_MOVS_CASE(32, 32); break;
+                case IEMMODE_64BIT: IEM_MOVS_CASE(32, 64); break;
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+            break;
+
+        case IEMMODE_64BIT:
+            switch (pIemCpu->enmEffAddrMode)
+            {
+                case IEMMODE_16BIT: AssertFailedReturn(VERR_INTERNAL_ERROR_4); /* cannot be encoded */ break;
+                case IEMMODE_32BIT: IEM_MOVS_CASE(64, 32); break;
+                case IEMMODE_64BIT: IEM_MOVS_CASE(64, 64); break;
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+            break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+    return VINF_SUCCESS;
+}
+
+#undef IEM_MOVS_CASE
+
+/** Opcode 0xa6. */
+FNIEMOP_STUB(iemOp_cmpsb_Xb_Yb);
+/** Opcode 0xa7. */
+FNIEMOP_STUB(iemOp_cmpswd_Xv_Yv);
+
+
+/** Opcode 0xa8. */
+FNIEMOP_DEF(iemOp_test_AL_Ib)
+{
+    IEMOP_MNEMONIC("test al,Ib");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_AL_Ib, &g_iemAImpl_test);
+}
+
+
+/** Opcode 0xa9. */
+FNIEMOP_DEF(iemOp_test_eAX_Iz)
+{
+    IEMOP_MNEMONIC("test rAX,Iz");
+    return FNIEMOP_CALL_1(iemOpHlpBinaryOperator_rAX_Iz, &g_iemAImpl_test);
+}
+
+
+/** Macro used by iemOp_stosb_Yb_AL and iemOp_stoswd_Yv_eAX */
+#define IEM_STOS_CASE(ValBits, AddrBits) \
+        IEM_MC_BEGIN(0, 2); \
+        IEM_MC_LOCAL(uint##ValBits##_t, uValue); \
+        IEM_MC_LOCAL(uint##AddrBits##_t, uAddr); \
+        IEM_MC_FETCH_GREG_U##ValBits(uValue, X86_GREG_xAX); \
+        IEM_MC_FETCH_GREG_U##AddrBits(uAddr,  X86_GREG_xDI); \
+        IEM_MC_STORE_MEM_U##ValBits(X86_SREG_ES, uAddr, uValue); \
+        IEM_MC_IF_EFL_BIT_SET(X86_EFL_DF) { \
+            IEM_MC_SUB_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
+        } IEM_MC_ELSE() { \
+            IEM_MC_ADD_GREG_U##AddrBits(X86_GREG_xDI, ValBits / 8); \
+        } IEM_MC_ENDIF(); \
+        IEM_MC_ADVANCE_RIP(); \
+        IEM_MC_END(); \
+
+/** Opcode 0xaa. */
+FNIEMOP_DEF(iemOp_stosb_Yb_AL)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    /*
+     * Use the C implementation if a repeate prefix is encountered.
+     */
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
+    {
+        IEMOP_MNEMONIC("rep stos Yb,al");
+        switch (pIemCpu->enmEffAddrMode)
+        {
+            case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_al_m16);
+            case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_al_m32);
+            case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_al_m64);
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    IEMOP_MNEMONIC("stos Yb,al");
+
+    /*
+     * Sharing case implementation with stos[wdq] below.
+     */
+    switch (pIemCpu->enmEffAddrMode)
+    {
+        case IEMMODE_16BIT: IEM_STOS_CASE(8, 16); break;
+        case IEMMODE_32BIT: IEM_STOS_CASE(8, 32); break;
+        case IEMMODE_64BIT: IEM_STOS_CASE(8, 64); break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xab. */
+FNIEMOP_DEF(iemOp_stoswd_Yv_eAX)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    /*
+     * Use the C implementation if a repeate prefix is encountered.
+     */
+    if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
+    {
+        IEMOP_MNEMONIC("rep stos Yv,rAX");
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_ax_m16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_ax_m32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_ax_m64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                }
+                break;
+            case IEMMODE_32BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_eax_m16);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_eax_m32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_eax_m64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                }
+            case IEMMODE_64BIT:
+                switch (pIemCpu->enmEffAddrMode)
+                {
+                    case IEMMODE_16BIT: AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+                    case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_rax_m32);
+                    case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_stos_rax_m64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
+                }
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    IEMOP_MNEMONIC("stos Yv,rAX");
+
+    /*
+     * Annoying double switch here.
+     * Using ugly macro for implementing the cases, sharing it with stosb.
+     */
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            switch (pIemCpu->enmEffAddrMode)
+            {
+                case IEMMODE_16BIT: IEM_STOS_CASE(16, 16); break;
+                case IEMMODE_32BIT: IEM_STOS_CASE(16, 32); break;
+                case IEMMODE_64BIT: IEM_STOS_CASE(16, 64); break;
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+            break;
+
+        case IEMMODE_32BIT:
+            switch (pIemCpu->enmEffAddrMode)
+            {
+                case IEMMODE_16BIT: IEM_STOS_CASE(32, 16); break;
+                case IEMMODE_32BIT: IEM_STOS_CASE(32, 32); break;
+                case IEMMODE_64BIT: IEM_STOS_CASE(32, 64); break;
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+            break;
+
+        case IEMMODE_64BIT:
+            switch (pIemCpu->enmEffAddrMode)
+            {
+                case IEMMODE_16BIT: AssertFailedReturn(VERR_INTERNAL_ERROR_4); /* cannot be encoded */ break;
+                case IEMMODE_32BIT: IEM_STOS_CASE(64, 32); break;
+                case IEMMODE_64BIT: IEM_STOS_CASE(64, 64); break;
+                IEM_NOT_REACHED_DEFAULT_CASE_RET();
+            }
+            break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+    return VINF_SUCCESS;
+}
+
+#undef IEM_STOS_CASE
+
+/** Opcode 0xac. */
+FNIEMOP_STUB(iemOp_lodsb_AL_Xb);
+/** Opcode 0xad. */
+FNIEMOP_STUB(iemOp_lodswd_eAX_Xv);
+/** Opcode 0xae. */
+FNIEMOP_STUB(iemOp_scasb_AL_Xb);
+/** Opcode 0xaf. */
+FNIEMOP_STUB(iemOp_scaswd_eAX_Xv);
+
+/**
+ * Common 'mov r8, imm8' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonMov_r8_Ib, uint8_t, iReg)
+{
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+
+    IEM_MC_BEGIN(0, 1);
+    IEM_MC_LOCAL_CONST(uint8_t, u8Value,/*=*/ u8Imm);
+    IEM_MC_STORE_GREG_U8(iReg, u8Value);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xb0. */
+FNIEMOP_DEF(iemOp_mov_AL_Ib)
+{
+    IEMOP_MNEMONIC("mov AL,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xAX);
+}
+
+
+/** Opcode 0xb1. */
+FNIEMOP_DEF(iemOp_CL_Ib)
+{
+    IEMOP_MNEMONIC("mov CL,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xCX);
+}
+
+
+/** Opcode 0xb2. */
+FNIEMOP_DEF(iemOp_DL_Ib)
+{
+    IEMOP_MNEMONIC("mov DL,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xDX);
+}
+
+
+/** Opcode 0xb3. */
+FNIEMOP_DEF(iemOp_BL_Ib)
+{
+    IEMOP_MNEMONIC("mov BL,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xBX);
+}
+
+
+/** Opcode 0xb4. */
+FNIEMOP_DEF(iemOp_mov_AH_Ib)
+{
+    IEMOP_MNEMONIC("mov AH,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xSP);
+}
+
+
+/** Opcode 0xb5. */
+FNIEMOP_DEF(iemOp_CH_Ib)
+{
+    IEMOP_MNEMONIC("mov CH,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xBP);
+}
+
+
+/** Opcode 0xb6. */
+FNIEMOP_DEF(iemOp_DH_Ib)
+{
+    IEMOP_MNEMONIC("mov DH,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xSI);
+}
+
+
+/** Opcode 0xb7. */
+FNIEMOP_DEF(iemOp_BH_Ib)
+{
+    IEMOP_MNEMONIC("mov BH,Ib");
+    return FNIEMOP_CALL_1(iemOpCommonMov_r8_Ib, X86_GREG_xDI);
+}
+
+
+/**
+ * Common 'mov regX,immX' helper.
+ */
+FNIEMOP_DEF_1(iemOpCommonMov_Rv_Iv, uint8_t, iReg)
+{
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL_CONST(uint16_t, u16Value,/*=*/ u16Imm);
+            IEM_MC_STORE_GREG_U16(iReg, u16Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+        }
+
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL_CONST(uint32_t, u32Value,/*=*/ u32Imm);
+            IEM_MC_STORE_GREG_U32(iReg, u32Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+        }
+        case IEMMODE_64BIT:
+        {
+            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_U64(pIemCpu, &u64Imm);
+            IEMOP_HLP_NO_LOCK_PREFIX();
+
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL_CONST(uint64_t, u64Value,/*=*/ u64Imm);
+            IEM_MC_STORE_GREG_U64(iReg, u64Value);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            break;
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xb8. */
+FNIEMOP_DEF(iemOp_eAX_Iv)
+{
+    IEMOP_MNEMONIC("mov rAX,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xAX);
+}
+
+
+/** Opcode 0xb9. */
+FNIEMOP_DEF(iemOp_eCX_Iv)
+{
+    IEMOP_MNEMONIC("mov rCX,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xCX);
+}
+
+
+/** Opcode 0xba. */
+FNIEMOP_DEF(iemOp_eDX_Iv)
+{
+    IEMOP_MNEMONIC("mov rDX,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xDX);
+}
+
+
+/** Opcode 0xbb. */
+FNIEMOP_DEF(iemOp_eBX_Iv)
+{
+    IEMOP_MNEMONIC("mov rBX,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xBX);
+}
+
+
+/** Opcode 0xbc. */
+FNIEMOP_DEF(iemOp_eSP_Iv)
+{
+    IEMOP_MNEMONIC("mov rSP,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xSP);
+}
+
+
+/** Opcode 0xbd. */
+FNIEMOP_DEF(iemOp_eBP_Iv)
+{
+    IEMOP_MNEMONIC("mov rBP,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xBP);
+}
+
+
+/** Opcode 0xbe. */
+FNIEMOP_DEF(iemOp_eSI_Iv)
+{
+    IEMOP_MNEMONIC("mov rSI,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xSI);
+}
+
+
+/** Opcode 0xbf. */
+FNIEMOP_DEF(iemOp_eDI_Iv)
+{
+    IEMOP_MNEMONIC("mov rDI,IV");
+    return FNIEMOP_CALL_1(iemOpCommonMov_Rv_Iv, X86_GREG_xDI);
+}
+
+
+/** Opcode 0xc0. */
+FNIEMOP_DEF(iemOp_Grp2_Eb_Ib)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    PCIEMOPSHIFTSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,Ib"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,Ib"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,Ib"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,Ib"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,Ib"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,Ib"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,Ib"); break;
+        case 6: return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    }
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register */
+        uint8_t cShift; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &cShift);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,            0);
+        IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
+        IEM_MC_ARG(uint32_t *,      pEFlags,           2);
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(uint8_t *,   pu8Dst,    0);
+        IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        uint8_t cShift; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &cShift);
+        IEM_MC_ASSIGN(cShiftArg, cShift);
+        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xc1. */
+FNIEMOP_DEF(iemOp_Grp2_Ev_Ib)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    PCIEMOPSHIFTSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,Ib"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,Ib"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,Ib"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,Ib"); break;
+        case 4:
+            pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,Ib");
+#ifdef IEM_VERIFICATION_MODE
+            pIemCpu->fShlHack = true;
+#endif
+            break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,Ib"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,Ib"); break;
+        case 6: return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    }
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register */
+        uint8_t cShift; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &cShift);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,           0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,           0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,           0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg, cShift, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,  pu16Dst,    0);
+                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint8_t cShift; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &cShift);
+                IEM_MC_ASSIGN(cShiftArg, cShift);
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,  pu32Dst,    0);
+                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint8_t cShift; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &cShift);
+                IEM_MC_ASSIGN(cShiftArg, cShift);
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,  pu64Dst,    0);
+                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint8_t cShift; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &cShift);
+                IEM_MC_ASSIGN(cShiftArg, cShift);
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode 0xc2. */
+FNIEMOP_STUB(iemOp_retn_Iw);
+
+
+/** Opcode 0xc3. */
+FNIEMOP_DEF(iemOp_retn)
+{
+    IEMOP_MNEMONIC("retn");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint16_t, u16NewIP);
+            IEM_MC_POP_U16(&u16NewIP);
+            /** @todo This should raise GP(0) if u16NewIP > csHid.u32Limit.
+             *        The intel manual does not indicate that this is the
+             *        case for 32-bit or 64-bit (canonical check). Needs to
+             *        be tested. */
+            IEM_MC_SET_RIP_U16(u16NewIP);
+            IEM_MC_END()
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint32_t, u32NewIP);
+            IEM_MC_POP_U32(&u32NewIP);
+            IEM_MC_SET_RIP_U32(u32NewIP);
+            IEM_MC_END()
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 1);
+            IEM_MC_LOCAL(uint64_t, u64NewIP);
+            IEM_MC_POP_U64(&u64NewIP);
+            IEM_MC_SET_RIP_U64(u64NewIP);
+            IEM_MC_END()
+            return VINF_SUCCESS;
+
+        default:
+            AssertFailedReturn(VERR_INTERNAL_ERROR_2);
+    }
+}
+
+
+/** Opcode 0xc4. */
+FNIEMOP_STUB(iemOp_les_Gv_Mp);
+/** Opcode 0xc5. */
+FNIEMOP_STUB(iemOp_lds_Gv_Mp);
+
+
+/** Opcode 0xc6. */
+FNIEMOP_DEF(iemOp_Grp11_Eb_Ib)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+    if ((bRm & X86_MODRM_REG_MASK) != (0 << X86_MODRM_REG_SHIFT)) /* only mov Eb,Ib in this group. */
+        return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    IEMOP_MNEMONIC("mov Eb,Ib");
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register access */
+        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+        IEM_MC_BEGIN(0, 0);
+        IEM_MC_STORE_GREG_U8((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u8Imm);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory access. */
+        IEM_MC_BEGIN(0, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+        IEM_MC_STORE_MEM_U8(pIemCpu->iEffSeg, GCPtrEffDst, u8Imm);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xc7. */
+FNIEMOP_DEF(iemOp_Grp11_Ev_Iz)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+    if ((bRm & X86_MODRM_REG_MASK) != (0 << X86_MODRM_REG_SHIFT)) /* only mov Eb,Ib in this group. */
+        return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    IEMOP_MNEMONIC("mov Ev,Iz");
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register access */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 0);
+                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+                IEM_MC_STORE_GREG_U16((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u16Imm);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 0);
+                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+                IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u32Imm);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 0);
+                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_U64(pIemCpu, &u64Imm);
+                IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u64Imm);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory access. */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+                IEM_MC_STORE_MEM_U16(pIemCpu->iEffSeg, GCPtrEffDst, u16Imm);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+                IEM_MC_STORE_MEM_U32(pIemCpu->iEffSeg, GCPtrEffDst, u32Imm);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_U64(pIemCpu, &u64Imm);
+                IEM_MC_STORE_MEM_U64(pIemCpu->iEffSeg, GCPtrEffDst, u64Imm);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+
+
+/** Opcode 0xc8. */
+FNIEMOP_STUB(iemOp_enter_Iw_Ib);
+/** Opcode 0xc9. */
+FNIEMOP_STUB(iemOp_leave);
+
+
+/** Opcode 0xca. */
+FNIEMOP_DEF(iemOp_retf_Iw)
+{
+    IEMOP_MNEMONIC("retf Iw");
+    uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retf, pIemCpu->enmEffOpSize, u16Imm);
+}
+
+
+/** Opcode 0xcb. */
+FNIEMOP_DEF(iemOp_retf)
+{
+    IEMOP_MNEMONIC("retf");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retf, pIemCpu->enmEffOpSize, 0);
+}
+
+
+/** Opcode 0xcc. */
+FNIEMOP_DEF(iemOp_int_3)
+{
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_int, X86_XCPT_BP, true /*fIsBpInstr*/);
+}
+
+
+/** Opcode 0xcd. */
+FNIEMOP_DEF(iemOp_int_Ib)
+{
+    uint8_t u8Int; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Int);
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_int, u8Int, false /*fIsBpInstr*/);
+}
+
+
+/** Opcode 0xce. */
+FNIEMOP_DEF(iemOp_into)
+{
+    IEM_MC_BEGIN(2, 0);
+    IEM_MC_ARG_CONST(uint8_t,   u8Int,      /*=*/ X86_XCPT_OF, 0);
+    IEM_MC_ARG_CONST(bool,      fIsBpInstr, /*=*/ false, 1);
+    IEM_MC_CALL_CIMPL_2(iemCImpl_int, u8Int, fIsBpInstr);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xcf. */
+FNIEMOP_DEF(iemOp_iret)
+{
+    IEMOP_MNEMONIC("iret");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_iret, pIemCpu->enmEffOpSize);
+}
+
+
+/** Opcode 0xd0. */
+FNIEMOP_DEF(iemOp_Grp2_Eb_1)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    PCIEMOPSHIFTSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,1"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,1"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,1"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,1"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,1"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,1"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,1"); break;
+        case 6: return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    }
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register */
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
+        IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=*/1,   1);
+        IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
+        IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=*/1,   1);
+        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+
+/** Opcode 0xd1. */
+FNIEMOP_DEF(iemOp_Grp2_Ev_1)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    PCIEMOPSHIFTSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,1"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,1"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,1"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,1"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,1"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,1"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,1"); break;
+        case 6: return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    }
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register */
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,           0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,           0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,           0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1, 1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,           2);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
+                IEM_MC_ARG_CONST(uint8_t,   cShiftArg,/*=1*/1,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags,        2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode 0xd2. */
+FNIEMOP_DEF(iemOp_Grp2_Eb_CL)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    PCIEMOPSHIFTSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Eb,CL"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Eb,CL"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Eb,CL"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Eb,CL"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Eb,CL"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Eb,CL"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Eb,CL"); break;
+        case 6: return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    }
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register */
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,   pu8Dst,     0);
+        IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+        IEM_MC_ARG(uint32_t *,  pEFlags,    2);
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(uint8_t *,   pu8Dst,          0);
+        IEM_MC_ARG(uint8_t,     cShiftArg,       1);
+        IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU8, pu8Dst, cShiftArg, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xd3. */
+FNIEMOP_DEF(iemOp_Grp2_Ev_CL)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    PCIEMOPSHIFTSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0: pImpl = &g_iemAImpl_rol; IEMOP_MNEMONIC("rol Ev,CL"); break;
+        case 1: pImpl = &g_iemAImpl_ror; IEMOP_MNEMONIC("ror Ev,CL"); break;
+        case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,CL"); break;
+        case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,CL"); break;
+        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,CL"); break;
+        case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,CL"); break;
+        case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,CL"); break;
+        case 6: return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+    }
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register */
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,    0);
+                IEM_MC_ARG(uint8_t,         cShiftArg,  1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    2);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,    0);
+                IEM_MC_ARG(uint8_t,         cShiftArg,  1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    2);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,    0);
+                IEM_MC_ARG(uint8_t,         cShiftArg,  1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    2);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,  pu16Dst,    0);
+                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU16, pu16Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,  pu32Dst,    0);
+                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU32, pu32Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,  pu64Dst,    0);
+                IEM_MC_ARG(uint8_t,     cShiftArg,  1);
+                IEM_MC_ARG_LOCAL_EFLAGS(pEFlags, EFlags, 2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_GREG_U8(cShiftArg, X86_GREG_xCX);
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(pImpl->pfnNormalU64, pu64Dst, cShiftArg, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+/** Opcode 0xd4. */
+FNIEMOP_STUB(iemOp_aam_Ib);
+/** Opcode 0xd5. */
+FNIEMOP_STUB(iemOp_aad_Ib);
+/** Opcode 0xd7. */
+FNIEMOP_STUB(iemOp_xlat);
+/** Opcode 0xd8. */
+FNIEMOP_STUB(iemOp_EscF0);
+/** Opcode 0xd9. */
+FNIEMOP_STUB(iemOp_EscF1);
+/** Opcode 0xda. */
+FNIEMOP_STUB(iemOp_EscF2);
+/** Opcode 0xdb. */
+FNIEMOP_STUB(iemOp_EscF3);
+/** Opcode 0xdc. */
+FNIEMOP_STUB(iemOp_EscF4);
+/** Opcode 0xdd. */
+FNIEMOP_STUB(iemOp_EscF5);
+/** Opcode 0xde. */
+FNIEMOP_STUB(iemOp_EscF6);
+/** Opcode 0xdf. */
+FNIEMOP_STUB(iemOp_EscF7);
+
+
+/** Opcode 0xe0. */
+FNIEMOP_DEF(iemOp_loopne_Jb)
+{
+    IEMOP_MNEMONIC("loopne Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    switch (pIemCpu->enmEffAddrMode)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U16(X86_GREG_xCX, 1);
+            IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(X86_EFL_ZF) {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U32(X86_GREG_xCX, 1);
+            IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(X86_EFL_ZF) {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U64(X86_GREG_xCX, 1);
+            IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(X86_EFL_ZF) {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xe1. */
+FNIEMOP_DEF(iemOp_loope_Jb)
+{
+    IEMOP_MNEMONIC("loope Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    switch (pIemCpu->enmEffAddrMode)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U16(X86_GREG_xCX, 1);
+            IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(X86_EFL_ZF) {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U32(X86_GREG_xCX, 1);
+            IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(X86_EFL_ZF) {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U64(X86_GREG_xCX, 1);
+            IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(X86_EFL_ZF) {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xe2. */
+FNIEMOP_DEF(iemOp_loop_Jb)
+{
+    IEMOP_MNEMONIC("loop Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    /** @todo Check out the #GP case if EIP < CS.Base or EIP > CS.Limit when
+     * using the 32-bit operand size override.  How can that be restarted?  See
+     * weird pseudo code in intel manual. */
+    switch (pIemCpu->enmEffAddrMode)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U16(X86_GREG_xCX, 1);
+            IEM_MC_IF_CX_IS_NZ() {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U32(X86_GREG_xCX, 1);
+            IEM_MC_IF_ECX_IS_NZ() {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_SUB_GREG_U64(X86_GREG_xCX, 1);
+            IEM_MC_IF_RCX_IS_NZ() {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xe3. */
+FNIEMOP_DEF(iemOp_jecxz_Jb)
+{
+    IEMOP_MNEMONIC("jecxz Jb");
+    int8_t i8Imm; IEM_OPCODE_GET_NEXT_S8(pIemCpu, &i8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    switch (pIemCpu->enmEffAddrMode)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_IF_CX_IS_NZ() {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_IF_ECX_IS_NZ() {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0,0);
+            IEM_MC_IF_RCX_IS_NZ() {
+                IEM_MC_REL_JMP_S8(i8Imm);
+            } IEM_MC_ELSE() {
+                IEM_MC_ADVANCE_RIP();
+            } IEM_MC_ENDIF();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xe4 */
+FNIEMOP_DEF(iemOp_in_AL_Ib)
+{
+    IEMOP_MNEMONIC("in eAX,Ib");
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_in, u8Imm, 1);
+}
+
+
+/** Opcode 0xe5 */
+FNIEMOP_DEF(iemOp_in_eAX_Ib)
+{
+    IEMOP_MNEMONIC("in eAX,Ib");
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_in, u8Imm, pIemCpu->enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
+}
+
+
+/** Opcode 0xe6 */
+FNIEMOP_DEF(iemOp_out_Ib_AL)
+{
+    IEMOP_MNEMONIC("out Ib,AL");
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_out, u8Imm, 1);
+}
+
+
+/** Opcode 0xe7 */
+FNIEMOP_DEF(iemOp_out_Ib_eAX)
+{
+    IEMOP_MNEMONIC("out Ib,eAX");
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_out, u8Imm, pIemCpu->enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
+}
+
+
+/** Opcode 0xe8. */
+FNIEMOP_DEF(iemOp_call_Jv)
+{
+    IEMOP_MNEMONIC("call Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+            return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_call_rel_16, (int32_t)u16Imm);
+        }
+
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+            return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_call_rel_32, (int32_t)u32Imm);
+        }
+
+        case IEMMODE_64BIT:
+        {
+            uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+            return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_call_rel_64, u64Imm);
+        }
+        default:
+            AssertFailedReturn(VERR_INTERNAL_ERROR_3);
+    }
+}
+
+
+/** Opcode 0xe9. */
+FNIEMOP_DEF(iemOp_jmp_Jv)
+{
+    IEMOP_MNEMONIC("jmp Jv");
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+        {
+            uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+            IEM_MC_BEGIN(0, 0);
+            IEM_MC_REL_JMP_S16((int16_t)u16Imm);
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        case IEMMODE_64BIT:
+        case IEMMODE_32BIT:
+        {
+            uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+            IEM_MC_BEGIN(0, 0);
+            IEM_MC_REL_JMP_S32((int32_t)u32Imm);
+            IEM_MC_END();
+            return VINF_SUCCESS;
+        }
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xea. */
+FNIEMOP_DEF(iemOp_jmp_Ap)
+{
+    IEMOP_MNEMONIC("jmp Ap");
+    IEMOP_HLP_NO_64BIT();
+
+    /* Decode the far pointer address and pass it on to the far call C implementation. */
+    uint32_t offSeg;
+    if (pIemCpu->enmEffOpSize != IEMMODE_16BIT)
+        IEM_OPCODE_GET_NEXT_U32(pIemCpu, &offSeg);
+    else
+    {
+        uint16_t offSeg16; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &offSeg16);
+        offSeg = offSeg16;
+    }
+    uint16_t uSel;  IEM_OPCODE_GET_NEXT_U16(pIemCpu, &uSel);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_FarJmp, uSel, offSeg);
+}
+
+
+/** Opcode 0xeb. */
+FNIEMOP_DEF(iemOp_jmp_Jb)
+{
+    IEMOP_MNEMONIC("jmp Jb");
+    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_REL_JMP_S8((int8_t)u8Imm);
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xec */
+FNIEMOP_DEF(iemOp_in_AL_DX)
+{
+    IEMOP_MNEMONIC("in  AL,DX");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_in_eAX_DX, 1);
+}
+
+
+/** Opcode 0xed */
+FNIEMOP_DEF(iemOp_eAX_DX)
+{
+    IEMOP_MNEMONIC("in  eAX,DX");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_in_eAX_DX, pIemCpu->enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
+}
+
+
+/** Opcode 0xee */
+FNIEMOP_DEF(iemOp_out_DX_AL)
+{
+    IEMOP_MNEMONIC("out DX,AL");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_out_DX_eAX, 1);
+}
+
+
+/** Opcode 0xef */
+FNIEMOP_DEF(iemOp_out_DX_eAX)
+{
+    IEMOP_MNEMONIC("out DX,eAX");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_1(iemCImpl_out_DX_eAX, pIemCpu->enmEffOpSize == IEMMODE_16BIT ? 2 : 4);
+}
+
+
+/** Opcode 0xf0. */
+FNIEMOP_DEF(iemOp_lock)
+{
+    pIemCpu->fPrefixes |= IEM_OP_PRF_LOCK;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0xf2. */
+FNIEMOP_DEF(iemOp_repne)
+{
+    /* This overrides any previous REPE prefix. */
+    pIemCpu->fPrefixes &= ~IEM_OP_PRF_REPZ;
+    pIemCpu->fPrefixes |= IEM_OP_PRF_REPNZ;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0xf3. */
+FNIEMOP_DEF(iemOp_repe)
+{
+    /* This overrides any previous REPNE prefix. */
+    pIemCpu->fPrefixes &= ~IEM_OP_PRF_REPNZ;
+    pIemCpu->fPrefixes |= IEM_OP_PRF_REPZ;
+
+    uint8_t b; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &b);
+    return FNIEMOP_CALL(g_apfnOneByteMap[b]);
+}
+
+
+/** Opcode 0xf4. */
+FNIEMOP_STUB(iemOp_hlt);
+/** Opcode 0xf5. */
+FNIEMOP_STUB(iemOp_cmc);
+
+
+/**
+ * Common implementation of 'inc/dec/not/neg Eb'.
+ *
+ * @param   bRm             The RM byte.
+ * @param   pImpl           The instruction implementation.
+ */
+FNIEMOP_DEF_2(iemOpCommonUnaryEb, uint8_t, bRm, PCIEMOPUNARYSIZES, pImpl)
+{
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register access */
+        IEM_MC_BEGIN(2, 0);
+        IEM_MC_ARG(uint8_t *,   pu8Dst, 0);
+        IEM_MC_ARG(uint32_t *,  pEFlags, 1);
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU8, pu8Dst, pEFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory access. */
+        IEM_MC_BEGIN(2, 2);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,          0);
+        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU8, pu8Dst, pEFlags);
+        else
+            IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU8, pu8Dst, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_RW);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Common implementation of 'inc/dec/not/neg Ev'.
+ *
+ * @param   bRm             The RM byte.
+ * @param   pImpl           The instruction implementation.
+ */
+FNIEMOP_DEF_2(iemOpCommonUnaryEv, uint8_t, bRm, PCIEMOPUNARYSIZES, pImpl)
+{
+    /* Registers are handled by a common worker. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        return FNIEMOP_CALL_2(iemOpCommonUnaryGReg, pImpl, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+
+    /* Memory we do here. */
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(2, 2);
+            IEM_MC_ARG(uint16_t *,      pu16Dst,         0);
+            IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+            IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+            IEM_MC_FETCH_EFLAGS(EFlags);
+            if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU16, pu16Dst, pEFlags);
+            else
+                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU16, pu16Dst, pEFlags);
+
+            IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_RW);
+            IEM_MC_COMMIT_EFLAGS(EFlags);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(2, 2);
+            IEM_MC_ARG(uint32_t *,      pu32Dst,         0);
+            IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+            IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+            IEM_MC_FETCH_EFLAGS(EFlags);
+            if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU32, pu32Dst, pEFlags);
+            else
+                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU32, pu32Dst, pEFlags);
+
+            IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_RW);
+            IEM_MC_COMMIT_EFLAGS(EFlags);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(2, 2);
+            IEM_MC_ARG(uint64_t *,      pu64Dst,         0);
+            IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags, 1);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+            IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_RW, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+            IEM_MC_FETCH_EFLAGS(EFlags);
+            if (!(pIemCpu->fPrefixes & IEM_OP_PRF_LOCK))
+                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnNormalU64, pu64Dst, pEFlags);
+            else
+                IEM_MC_CALL_VOID_AIMPL_2(pImpl->pfnLockedU64, pu64Dst, pEFlags);
+
+            IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_RW);
+            IEM_MC_COMMIT_EFLAGS(EFlags);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xf6 /0. */
+FNIEMOP_DEF_1(iemOp_grp3_test_Eb, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC("test Eb,Ib");
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register access */
+        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+        IEMOP_HLP_NO_LOCK_PREFIX();
+
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
+        IEM_MC_ARG_CONST(uint8_t,   u8Src,/*=*/u8Imm,   1);
+        IEM_MC_ARG(uint32_t *,      pEFlags,            2);
+        IEM_MC_REF_GREG_U8(pu8Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u8, pu8Dst, u8Src, pEFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory access. */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_ARG(uint8_t *,       pu8Dst,             0);
+        IEM_MC_ARG(uint8_t,         u8Src,              1);
+        IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
+        IEM_MC_ASSIGN(u8Src, u8Imm);
+        IEM_MC_MEM_MAP(pu8Dst, IEM_ACCESS_DATA_R, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+        IEM_MC_FETCH_EFLAGS(EFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u8, pu8Dst, u8Src, pEFlags);
+
+        IEM_MC_MEM_COMMIT_AND_UNMAP(pu8Dst, IEM_ACCESS_DATA_R);
+        IEM_MC_COMMIT_EFLAGS(EFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xf7 /0. */
+FNIEMOP_DEF_1(iemOp_grp3_test_Ev, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC("test Ev,Iv");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register access */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,                0);
+                IEM_MC_ARG_CONST(uint16_t,  u16Src,/*=*/u16Imm,     1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+                IEM_MC_REF_GREG_U16(pu16Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u16, pu16Dst, u16Src, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_32BIT:
+            {
+                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,                0);
+                IEM_MC_ARG_CONST(uint32_t,  u32Src,/*=*/u32Imm,     1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+                IEM_MC_REF_GREG_U32(pu32Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u32, pu32Dst, u32Src, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_64BIT:
+            {
+                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+                IEM_MC_BEGIN(3, 0);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,                0);
+                IEM_MC_ARG_CONST(uint64_t,  u64Src,/*=*/u64Imm,     1);
+                IEM_MC_ARG(uint32_t *,      pEFlags,                2);
+                IEM_MC_REF_GREG_U64(pu64Dst, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u64, pu64Dst, u64Src, pEFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory access. */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,      pu16Dst,            0);
+                IEM_MC_ARG(uint16_t,        u16Src,             1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+                IEM_MC_ASSIGN(u16Src, u16Imm);
+                IEM_MC_MEM_MAP(pu16Dst, IEM_ACCESS_DATA_R, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u16, pu16Dst, u16Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu16Dst, IEM_ACCESS_DATA_R);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_32BIT:
+            {
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,      pu32Dst,            0);
+                IEM_MC_ARG(uint32_t,        u32Src,             1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint32_t u32Imm; IEM_OPCODE_GET_NEXT_U32(pIemCpu, &u32Imm);
+                IEM_MC_ASSIGN(u32Src, u32Imm);
+                IEM_MC_MEM_MAP(pu32Dst, IEM_ACCESS_DATA_R, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u32, pu32Dst, u32Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu32Dst, IEM_ACCESS_DATA_R);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_64BIT:
+            {
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,      pu64Dst,            0);
+                IEM_MC_ARG(uint64_t,        u64Src,             1);
+                IEM_MC_ARG_LOCAL_EFLAGS(    pEFlags, EFlags,    2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                uint64_t u64Imm; IEM_OPCODE_GET_NEXT_S32_SX_U64(pIemCpu, &u64Imm);
+                IEM_MC_ASSIGN(u64Src, u64Imm);
+                IEM_MC_MEM_MAP(pu64Dst, IEM_ACCESS_DATA_R, pIemCpu->iEffSeg, GCPtrEffDst, 0 /*arg*/);
+                IEM_MC_FETCH_EFLAGS(EFlags);
+                IEM_MC_CALL_VOID_AIMPL_3(iemAImpl_test_u64, pu64Dst, u64Src, pEFlags);
+
+                IEM_MC_MEM_COMMIT_AND_UNMAP(pu64Dst, IEM_ACCESS_DATA_R);
+                IEM_MC_COMMIT_EFLAGS(EFlags);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
+/** Opcode 0xf6 /4, /5, /6 and /7. */
+FNIEMOP_DEF_2(iemOpCommonGrp3MulDivEb, uint8_t, bRm, PFNIEMAIMPLMULDIVU8, pfnU8)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+#ifdef IEM_VERIFICATION_MODE
+    pIemCpu->fMulDivHack = true;
+#endif
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register access */
+        IEMOP_HLP_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(3, 0);
+        IEM_MC_ARG(uint16_t *,      pu16AX,     0);
+        IEM_MC_ARG(uint8_t,         u8Value,    1);
+        IEM_MC_ARG(uint32_t *,      pEFlags,    2);
+        IEM_MC_FETCH_GREG_U8(u8Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+        IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pfnU8, pu16AX, u8Value, pEFlags);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /* memory access. */
+        IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+        IEM_MC_BEGIN(3, 1);
+        IEM_MC_ARG(uint16_t *,      pu16AX,     0);
+        IEM_MC_ARG(uint8_t,         u8Value,    1);
+        IEM_MC_ARG(uint32_t *,      pEFlags,    2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+        IEM_MC_FETCH_MEM_U8(u8Value, pIemCpu->iEffSeg, GCPtrEffDst);
+        IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
+        IEM_MC_REF_EFLAGS(pEFlags);
+        IEM_MC_CALL_VOID_AIMPL_3(pfnU8, pu16AX, u8Value, pEFlags);
+
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xf7 /4, /5, /6 and /7. */
+FNIEMOP_DEF_2(iemOpCommonGrp3MulDivEv, uint8_t, bRm, PCIEMOPMULDIVSIZES, pImpl)
+{
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+#ifdef IEM_VERIFICATION_MODE
+    pIemCpu->fMulDivHack = true;
+#endif
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* register access */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t *,      pu16AX,     0);
+                IEM_MC_ARG(uint16_t *,      pu16DX,     1);
+                IEM_MC_ARG(uint16_t,        u16Value,   2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
+                IEM_MC_LOCAL(int32_t,       rc);
+
+                IEM_MC_FETCH_GREG_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
+                IEM_MC_REF_GREG_U16(pu16DX, X86_GREG_xDX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU16, pu16AX, pu16DX, u16Value, pEFlags);
+                IEM_MC_IF_LOCAL_IS_Z(rc) {
+                    IEM_MC_ADVANCE_RIP();
+                } IEM_MC_ELSE() {
+                    IEM_MC_RAISE_DIVIDE_ERROR();
+                } IEM_MC_ENDIF();
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_32BIT:
+            {
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint32_t *,      pu32AX,     0);
+                IEM_MC_ARG(uint32_t *,      pu32DX,     1);
+                IEM_MC_ARG(uint32_t,        u32Value,   2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
+                IEM_MC_LOCAL(int32_t,       rc);
+
+                IEM_MC_FETCH_GREG_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_GREG_U32(pu32AX, X86_GREG_xAX);
+                IEM_MC_REF_GREG_U32(pu32DX, X86_GREG_xDX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU32, pu32AX, pu32DX, u32Value, pEFlags);
+                IEM_MC_IF_LOCAL_IS_Z(rc) {
+                    IEM_MC_ADVANCE_RIP();
+                } IEM_MC_ELSE() {
+                    IEM_MC_RAISE_DIVIDE_ERROR();
+                } IEM_MC_ENDIF();
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_64BIT:
+            {
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint64_t *,      pu64AX,     0);
+                IEM_MC_ARG(uint64_t *,      pu64DX,     1);
+                IEM_MC_ARG(uint64_t,        u64Value,   2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
+                IEM_MC_LOCAL(int32_t,       rc);
+
+                IEM_MC_FETCH_GREG_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_REF_GREG_U64(pu64AX, X86_GREG_xAX);
+                IEM_MC_REF_GREG_U64(pu64DX, X86_GREG_xDX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU64, pu64AX, pu64DX, u64Value, pEFlags);
+                IEM_MC_IF_LOCAL_IS_Z(rc) {
+                    IEM_MC_ADVANCE_RIP();
+                } IEM_MC_ELSE() {
+                    IEM_MC_RAISE_DIVIDE_ERROR();
+                } IEM_MC_ENDIF();
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* memory access. */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+            {
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint16_t *,      pu16AX,     0);
+                IEM_MC_ARG(uint16_t *,      pu16DX,     1);
+                IEM_MC_ARG(uint16_t,        u16Value,   2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_LOCAL(int32_t,       rc);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U16(u16Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_GREG_U16(pu16AX, X86_GREG_xAX);
+                IEM_MC_REF_GREG_U16(pu16DX, X86_GREG_xDX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU16, pu16AX, pu16DX, u16Value, pEFlags);
+                IEM_MC_IF_LOCAL_IS_Z(rc) {
+                    IEM_MC_ADVANCE_RIP();
+                } IEM_MC_ELSE() {
+                    IEM_MC_RAISE_DIVIDE_ERROR();
+                } IEM_MC_ENDIF();
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_32BIT:
+            {
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint32_t *,      pu32AX,     0);
+                IEM_MC_ARG(uint32_t *,      pu32DX,     1);
+                IEM_MC_ARG(uint32_t,        u32Value,   2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_LOCAL(int32_t,       rc);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U32(u32Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_GREG_U32(pu32AX, X86_GREG_xAX);
+                IEM_MC_REF_GREG_U32(pu32DX, X86_GREG_xDX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU32, pu32AX, pu32DX, u32Value, pEFlags);
+                IEM_MC_IF_LOCAL_IS_Z(rc) {
+                    IEM_MC_ADVANCE_RIP();
+                } IEM_MC_ELSE() {
+                    IEM_MC_RAISE_DIVIDE_ERROR();
+                } IEM_MC_ENDIF();
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            case IEMMODE_64BIT:
+            {
+                IEMOP_HLP_NO_LOCK_PREFIX();
+                IEM_MC_BEGIN(3, 2);
+                IEM_MC_ARG(uint64_t *,      pu64AX,     0);
+                IEM_MC_ARG(uint64_t *,      pu64DX,     1);
+                IEM_MC_ARG(uint64_t,        u64Value,   2);
+                IEM_MC_ARG(uint32_t *,      pEFlags,    3);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_LOCAL(int32_t,       rc);
+
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U64(u64Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_REF_GREG_U64(pu64AX, X86_GREG_xAX);
+                IEM_MC_REF_GREG_U64(pu64DX, X86_GREG_xDX);
+                IEM_MC_REF_EFLAGS(pEFlags);
+                IEM_MC_CALL_AIMPL_4(rc, pImpl->pfnU64, pu64AX, pu64DX, u64Value, pEFlags);
+                IEM_MC_IF_LOCAL_IS_Z(rc) {
+                    IEM_MC_ADVANCE_RIP();
+                } IEM_MC_ELSE() {
+                    IEM_MC_RAISE_DIVIDE_ERROR();
+                } IEM_MC_ENDIF();
+
+                IEM_MC_END();
+                return VINF_SUCCESS;
+            }
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+/** Opcode 0xf6. */
+FNIEMOP_DEF(iemOp_Grp3_Eb)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0:
+            return FNIEMOP_CALL_1(iemOp_grp3_test_Eb, bRm);
+        case 1:
+            return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+        case 2:
+            IEMOP_MNEMONIC("not Eb");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_not);
+        case 3:
+            IEMOP_MNEMONIC("neg Eb");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_neg);
+        case 4:
+            IEMOP_MNEMONIC("mul Eb");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, &iemAImpl_mul_u8);
+        case 5:
+            IEMOP_MNEMONIC("imul Eb");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, &iemAImpl_imul_u8);
+        case 6:
+            IEMOP_MNEMONIC("div Eb");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, &iemAImpl_div_u8);
+        case 7:
+            IEMOP_MNEMONIC("idiv Eb");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEb, bRm, &iemAImpl_idiv_u8);
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xf7. */
+FNIEMOP_DEF(iemOp_Grp3_Ev)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0:
+            return FNIEMOP_CALL_1(iemOp_grp3_test_Ev, bRm);
+        case 1:
+            return IEMOP_RAISE_INVALID_LOCK_PREFIX();
+        case 2:
+            IEMOP_MNEMONIC("not Ev");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_not);
+        case 3:
+            IEMOP_MNEMONIC("neg Ev");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_neg);
+        case 4:
+            IEMOP_MNEMONIC("mul Ev");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_mul);
+        case 5:
+            IEMOP_MNEMONIC("imul Ev");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_imul);
+        case 6:
+            IEMOP_MNEMONIC("div Ev");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_div);
+        case 7:
+            IEMOP_MNEMONIC("idiv Ev");
+            return FNIEMOP_CALL_2(iemOpCommonGrp3MulDivEv, bRm, &g_iemAImpl_idiv);
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/** Opcode 0xf8. */
+FNIEMOP_DEF(iemOp_clc)
+{
+    IEMOP_MNEMONIC("clc");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_CLEAR_EFL_BIT(X86_EFL_CF);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xf9. */
+FNIEMOP_DEF(iemOp_stc)
+{
+    IEMOP_MNEMONIC("slc");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_SET_EFL_BIT(X86_EFL_CF);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xfa. */
+FNIEMOP_DEF(iemOp_cli)
+{
+    IEMOP_MNEMONIC("cli");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_cli);
+}
+
+
+FNIEMOP_DEF(iemOp_sti)
+{
+    IEMOP_MNEMONIC("sti");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_sti);
+}
+
+
+/** Opcode 0xfc. */
+FNIEMOP_DEF(iemOp_cld)
+{
+    IEMOP_MNEMONIC("cld");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_CLEAR_EFL_BIT(X86_EFL_DF);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xfd. */
+FNIEMOP_DEF(iemOp_std)
+{
+    IEMOP_MNEMONIC("std");
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEM_MC_BEGIN(0, 0);
+    IEM_MC_SET_EFL_BIT(X86_EFL_DF);
+    IEM_MC_ADVANCE_RIP();
+    IEM_MC_END();
+    return VINF_SUCCESS;
+}
+
+
+/** Opcode 0xfe. */
+FNIEMOP_DEF(iemOp_Grp4)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    PCIEMOPUNARYSIZES pImpl;
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0:
+            IEMOP_MNEMONIC("inc Ev");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_inc);
+        case 1:
+            IEMOP_MNEMONIC("dec Ev");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEb, bRm, &g_iemAImpl_dec);
+        default:
+            IEMOP_MNEMONIC("grp4-ud");
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+}
+
+
+/**
+ * Opcode 0xff /2.
+ * @param   bRm             The RM byte.
+ */
+FNIEMOP_DEF_1(iemOp_Grp5_calln_Ev, uint8_t, bRm)
+{
+    AssertFailed(); // FNIEMOP_STUB
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Opcode 0xff /3.
+ * @param   bRm             The RM byte.
+ */
+FNIEMOP_DEF_1(iemOp_Grp5_callf_Ep, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC("callf Ep");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo Too early? */
+
+    /* Registers? How?? */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /** @todo How the heck does a 'callf eax' work? Probably just have to
+         *        search the docs... */
+        AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+    }
+
+    /* Far pointer loaded from memory. */
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(3, 1);
+            IEM_MC_ARG(uint16_t,        u16Sel,                         0);
+            IEM_MC_ARG(uint16_t,        offSeg,                         1);
+            IEM_MC_ARG_CONST(uint16_t,  enmEffOpSize, IEMMODE_16BIT,    2);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_FETCH_MEM_U16(offSeg, pIemCpu->iEffSeg, GCPtrEffSrc);
+            IEM_MC_FETCH_MEM_U16(u16Sel, pIemCpu->iEffSeg, GCPtrEffSrc + 2);
+            IEM_MC_CALL_CIMPL_3(iemCImpl_callf, u16Sel, offSeg, IEMMODE_16BIT);
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            if (pIemCpu->enmCpuMode != IEMMODE_64BIT)
+            {
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t,        u16Sel,                         0);
+                IEM_MC_ARG(uint32_t,        offSeg,                         1);
+                IEM_MC_ARG_CONST(uint16_t,  enmEffOpSize, IEMMODE_16BIT,    2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+                IEM_MC_FETCH_MEM_U32(offSeg, pIemCpu->iEffSeg, GCPtrEffSrc);
+                IEM_MC_FETCH_MEM_U16(u16Sel, pIemCpu->iEffSeg, GCPtrEffSrc + 4);
+                IEM_MC_CALL_CIMPL_3(iemCImpl_callf, u16Sel, offSeg, IEMMODE_16BIT);
+                IEM_MC_END();
+            }
+            else
+            {
+                IEM_MC_BEGIN(3, 1);
+                IEM_MC_ARG(uint16_t,        u16Sel,                         0);
+                IEM_MC_ARG(uint64_t,        offSeg,                         1);
+                IEM_MC_ARG_CONST(uint16_t,  enmEffOpSize, IEMMODE_16BIT,    2);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+                IEM_MC_FETCH_MEM_S32_SX_U64(offSeg, pIemCpu->iEffSeg, GCPtrEffSrc);
+                IEM_MC_FETCH_MEM_U16(u16Sel, pIemCpu->iEffSeg, GCPtrEffSrc + 4);
+                IEM_MC_CALL_CIMPL_3(iemCImpl_callf, u16Sel, offSeg, IEMMODE_16BIT);
+                IEM_MC_END();
+            }
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(3, 1);
+            IEM_MC_ARG(uint16_t,        u16Sel,                         0);
+            IEM_MC_ARG(uint64_t,        offSeg,                         1);
+            IEM_MC_ARG_CONST(uint16_t,  enmEffOpSize, IEMMODE_16BIT,    2);
+            IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_FETCH_MEM_U64(offSeg, pIemCpu->iEffSeg, GCPtrEffSrc);
+            IEM_MC_FETCH_MEM_U16(u16Sel, pIemCpu->iEffSeg, GCPtrEffSrc + 8);
+            IEM_MC_CALL_CIMPL_3(iemCImpl_callf, u16Sel, offSeg, IEMMODE_16BIT);
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+}
+
+
+/**
+ * Opcode 0xff /4.
+ * @param   bRm             The RM byte.
+ */
+FNIEMOP_DEF_1(iemOp_Grp5_jmpn_Ev, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC("callf Ep");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo Too early? */
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        /* The new RIP is taken from a register. */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Target);
+                IEM_MC_FETCH_GREG_U16(u16Target, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_SET_RIP_U16(u16Target);
+                IEM_MC_END()
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Target);
+                IEM_MC_FETCH_GREG_U32(u32Target, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_SET_RIP_U32(u32Target);
+                IEM_MC_END()
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Target);
+                IEM_MC_FETCH_GREG_U64(u64Target, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_SET_RIP_U64(u64Target);
+                IEM_MC_END()
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /* The new RIP is taken from a register. */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint16_t, u16Target);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+                IEM_MC_FETCH_MEM_U16(u16Target, pIemCpu->iEffSeg, GCPtrEffSrc);
+                IEM_MC_SET_RIP_U16(u16Target);
+                IEM_MC_END()
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Target);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+                IEM_MC_FETCH_MEM_U32(u32Target, pIemCpu->iEffSeg, GCPtrEffSrc);
+                IEM_MC_SET_RIP_U32(u32Target);
+                IEM_MC_END()
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Target);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+                IEM_MC_FETCH_MEM_U32(u32Target, pIemCpu->iEffSeg, GCPtrEffSrc);
+                IEM_MC_SET_RIP_U32(u32Target);
+                IEM_MC_END()
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+
+    }
+}
+
+
+/**
+ * Opcode 0xff /5.
+ * @param   bRm             The RM byte.
+ */
+FNIEMOP_DEF_1(iemOp_Grp5_jmpf_Ep, uint8_t, bRm)
+{
+    /* decode and use a C worker.  */
+    AssertFailed(); // FNIEMOP_STUB
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Opcode 0xff /6.
+ * @param   bRm             The RM byte.
+ */
+FNIEMOP_DEF_1(iemOp_Grp5_push_Ev, uint8_t, bRm)
+{
+    IEMOP_MNEMONIC("push Ev");
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo Too early? */
+
+    /* Registers are handled by a common worker. */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+        return FNIEMOP_CALL_1(iemOpCommonPushGReg, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+
+    /* Memory we do here. */
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    switch (pIemCpu->enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint16_t,  u16Src);
+            IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_FETCH_MEM_U16(u16Src, pIemCpu->iEffSeg, GCPtrEffSrc);
+            IEM_MC_PUSH_U16(u16Src);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_32BIT:
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint32_t,  u32Src);
+            IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_FETCH_MEM_U32(u32Src, pIemCpu->iEffSeg, GCPtrEffSrc);
+            IEM_MC_PUSH_U32(u32Src);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+
+        case IEMMODE_64BIT:
+            IEM_MC_BEGIN(0, 2);
+            IEM_MC_LOCAL(uint64_t,  u64Src);
+            IEM_MC_LOCAL(RTGCPTR,   GCPtrEffSrc);
+            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm);
+            IEM_MC_FETCH_MEM_U64(u64Src, pIemCpu->iEffSeg, GCPtrEffSrc);
+            IEM_MC_PUSH_U64(u64Src);
+            IEM_MC_ADVANCE_RIP();
+            IEM_MC_END();
+            return VINF_SUCCESS;
+    }
+    AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+}
+
+
+/** Opcode 0xff. */
+FNIEMOP_DEF(iemOp_Grp5)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    switch ((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK)
+    {
+        case 0:
+            IEMOP_MNEMONIC("inc Ev");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_inc);
+        case 1:
+            IEMOP_MNEMONIC("dec Ev");
+            return FNIEMOP_CALL_2(iemOpCommonUnaryEv, bRm, &g_iemAImpl_dec);
+        case 2:
+            return FNIEMOP_CALL_1(iemOp_Grp5_calln_Ev, bRm);
+        case 3:
+            return FNIEMOP_CALL_1(iemOp_Grp5_callf_Ep, bRm);
+        case 4:
+            return FNIEMOP_CALL_1(iemOp_Grp5_jmpn_Ev, bRm);
+        case 5:
+            return FNIEMOP_CALL_1(iemOp_Grp5_jmpf_Ep, bRm);
+        case 6:
+            return FNIEMOP_CALL_1(iemOp_Grp5_push_Ev, bRm);
+        case 7:
+            IEMOP_MNEMONIC("grp5-ud");
+            return IEMOP_RAISE_INVALID_OPCODE();
+    }
+    AssertFailedReturn(VERR_INTERNAL_ERROR_2);
+}
+
+
+
+const PFNIEMOP g_apfnOneByteMap[256] =
+{
+    /* 0x00 */  iemOp_add_Eb_Gb,        iemOp_add_Ev_Gv,        iemOp_add_Gb_Eb,        iemOp_add_Gv_Ev,
+    /* 0x04 */  iemOp_add_Al_Ib,        iemOp_add_eAX_Iz,       iemOp_push_ES,          iemOp_pop_ES,
+    /* 0x08 */  iemOp_or_Eb_Gb,         iemOp_or_Ev_Gv,         iemOp_or_Gb_Eb,         iemOp_or_Gv_Ev,
+    /* 0x0c */  iemOp_or_Al_Ib,         iemOp_or_eAX_Iz,        iemOp_push_CS,          iemOp_2byteEscape,
+    /* 0x10 */  iemOp_adc_Eb_Gb,        iemOp_adc_Ev_Gv,        iemOp_adc_Gb_Eb,        iemOp_adc_Gv_Ev,
+    /* 0x14 */  iemOp_adc_Al_Ib,        iemOp_adc_eAX_Iz,       iemOp_push_SS,          iemOp_pop_SS,
+    /* 0x18 */  iemOp_sbb_Eb_Gb,        iemOp_sbb_Ev_Gv,        iemOp_sbb_Gb_Eb,        iemOp_sbb_Gv_Ev,
+    /* 0x1c */  iemOp_sbb_Al_Ib,        iemOp_sbb_eAX_Iz,       iemOp_push_DS,          iemOp_pop_DS,
+    /* 0x20 */  iemOp_and_Eb_Gb,        iemOp_and_Ev_Gv,        iemOp_and_Gb_Eb,        iemOp_and_Gv_Ev,
+    /* 0x24 */  iemOp_and_Al_Ib,        iemOp_and_eAX_Iz,       iemOp_seg_ES,           iemOp_daa,
+    /* 0x28 */  iemOp_sub_Eb_Gb,        iemOp_sub_Ev_Gv,        iemOp_sub_Gb_Eb,        iemOp_sub_Gv_Ev,
+    /* 0x2c */  iemOp_sub_Al_Ib,        iemOp_sub_eAX_Iz,       iemOp_seg_CS,           iemOp_das,
+    /* 0x30 */  iemOp_xor_Eb_Gb,        iemOp_xor_Ev_Gv,        iemOp_xor_Gb_Eb,        iemOp_xor_Gv_Ev,
+    /* 0x34 */  iemOp_xor_Al_Ib,        iemOp_xor_eAX_Iz,       iemOp_seg_SS,           iemOp_aaa,
+    /* 0x38 */  iemOp_cmp_Eb_Gb,        iemOp_cmp_Ev_Gv,        iemOp_cmp_Gb_Eb,        iemOp_cmp_Gv_Ev,
+    /* 0x3c */  iemOp_cmp_Al_Ib,        iemOp_cmp_eAX_Iz,       iemOp_seg_DS,           iemOp_aas,
+    /* 0x40 */  iemOp_inc_eAX,          iemOp_inc_eCX,          iemOp_inc_eDX,          iemOp_inc_eBX,
+    /* 0x44 */  iemOp_inc_eSP,          iemOp_inc_eBP,          iemOp_inc_eSI,          iemOp_inc_eDI,
+    /* 0x48 */  iemOp_dec_eAX,          iemOp_dec_eCX,          iemOp_dec_eDX,          iemOp_dec_eBX,
+    /* 0x4c */  iemOp_dec_eSP,          iemOp_dec_eBP,          iemOp_dec_eSI,          iemOp_dec_eDI,
+    /* 0x50 */  iemOp_push_eAX,         iemOp_push_eCX,         iemOp_push_eDX,         iemOp_push_eBX,
+    /* 0x54 */  iemOp_push_eSP,         iemOp_push_eBP,         iemOp_push_eSI,         iemOp_push_eDI,
+    /* 0x58 */  iemOp_pop_eAX,          iemOp_pop_eCX,          iemOp_pop_eDX,          iemOp_pop_eBX,
+    /* 0x5c */  iemOp_pop_eSP,          iemOp_pop_eBP,          iemOp_pop_eSI,          iemOp_pop_eDI,
+    /* 0x60 */  iemOp_pusha,            iemOp_popa,             iemOp_bound_Gv_Ma,      iemOp_arpl_Ew_Gw,
+    /* 0x64 */  iemOp_seg_FS,           iemOp_seg_GS,           iemOp_op_size,          iemOp_addr_size,
+    /* 0x68 */  iemOp_push_Iz,          iemOp_imul_Gv_Ev_Iz,    iemOp_push_Ib,          iemOp_imul_Gv_Ev_Ib,
+    /* 0x6c */  iemOp_insb_Yb_DX,       iemOp_inswd_Yv_DX,      iemOp_outsb_Yb_DX,      iemOp_outswd_Yv_DX,
+    /* 0x70 */  iemOp_jo_Jb,            iemOp_jno_Jb,           iemOp_jc_Jb,            iemOp_jnc_Jb,
+    /* 0x74 */  iemOp_je_Jb,            iemOp_jne_Jb,           iemOp_jbe_Jb,           iemOp_jnbe_Jb,
+    /* 0x78 */  iemOp_js_Jb,            iemOp_jns_Jb,           iemOp_jp_Jb,            iemOp_jnp_Jb,
+    /* 0x7c */  iemOp_jl_Jb,            iemOp_jnl_Jb,           iemOp_jle_Jb,           iemOp_jnle_Jb,
+    /* 0x80 */  iemOp_Grp1_Eb_Ib_80,    iemOp_Grp1_Ev_Iz,       iemOp_Grp1_Eb_Ib_82,    iemOp_Grp1_Ev_Ib,
+    /* 0x84 */  iemOp_test_Eb_Gb,       iemOp_test_Ev_Gv,       iemOp_xchg_Eb_Gb,       iemOp_xchg_Ev_Gv,
+    /* 0x88 */  iemOp_mov_Eb_Gb,        iemOp_mov_Ev_Gv,        iemOp_mov_Gb_Eb,        iemOp_mov_Gv_Ev,
+    /* 0x8c */  iemOp_mov_Ev_Sw,        iemOp_lea_Gv_M,         iemOp_mov_Sw_Ev,        iemOp_pop_Ev,
+    /* 0x90 */  iemOp_nop,              iemOp_xchg_eCX_eAX,     iemOp_xchg_eDX_eAX,     iemOp_xchg_eBX_eAX,
+    /* 0x94 */  iemOp_xchg_eSP_eAX,     iemOp_xchg_eBP_eAX,     iemOp_xchg_eSI_eAX,     iemOp_xchg_eDI_eAX,
+    /* 0x98 */  iemOp_cbw,              iemOp_cwd,              iemOp_call_Ap,          iemOp_wait,
+    /* 0x9c */  iemOp_pushf_Fv,         iemOp_popf_Fv,          iemOp_sahf,             iemOp_lahf,
+    /* 0xa0 */  iemOp_mov_Al_Ob,        iemOp_mov_rAX_Ov,       iemOp_mov_Ob_AL,        iemOp_mov_Ov_rAX,
+    /* 0xa4 */  iemOp_movsb_Xb_Yb,      iemOp_movswd_Xv_Yv,     iemOp_cmpsb_Xb_Yb,      iemOp_cmpswd_Xv_Yv,
+    /* 0xa8 */  iemOp_test_AL_Ib,       iemOp_test_eAX_Iz,      iemOp_stosb_Yb_AL,      iemOp_stoswd_Yv_eAX,
+    /* 0xac */  iemOp_lodsb_AL_Xb,      iemOp_lodswd_eAX_Xv,    iemOp_scasb_AL_Xb,      iemOp_scaswd_eAX_Xv,
+    /* 0xb0 */  iemOp_mov_AL_Ib,        iemOp_CL_Ib,            iemOp_DL_Ib,            iemOp_BL_Ib,
+    /* 0xb4 */  iemOp_mov_AH_Ib,        iemOp_CH_Ib,            iemOp_DH_Ib,            iemOp_BH_Ib,
+    /* 0xb8 */  iemOp_eAX_Iv,           iemOp_eCX_Iv,           iemOp_eDX_Iv,           iemOp_eBX_Iv,
+    /* 0xbc */  iemOp_eSP_Iv,           iemOp_eBP_Iv,           iemOp_eSI_Iv,           iemOp_eDI_Iv,
+    /* 0xc0 */  iemOp_Grp2_Eb_Ib,       iemOp_Grp2_Ev_Ib,       iemOp_retn_Iw,          iemOp_retn,
+    /* 0xc4 */  iemOp_les_Gv_Mp,        iemOp_lds_Gv_Mp,        iemOp_Grp11_Eb_Ib,      iemOp_Grp11_Ev_Iz,
+    /* 0xc8 */  iemOp_enter_Iw_Ib,      iemOp_leave,            iemOp_retf_Iw,          iemOp_retf,
+    /* 0xcc */  iemOp_int_3,            iemOp_int_Ib,           iemOp_into,             iemOp_iret,
+    /* 0xd0 */  iemOp_Grp2_Eb_1,        iemOp_Grp2_Ev_1,        iemOp_Grp2_Eb_CL,       iemOp_Grp2_Ev_CL,
+    /* 0xd4 */  iemOp_aam_Ib,           iemOp_aad_Ib,           iemOp_Invalid,          iemOp_xlat,
+    /* 0xd8 */  iemOp_EscF0,            iemOp_EscF1,            iemOp_EscF2,            iemOp_EscF3,
+    /* 0xdc */  iemOp_EscF4,            iemOp_EscF5,            iemOp_EscF6,            iemOp_EscF7,
+    /* 0xe0 */  iemOp_loopne_Jb,        iemOp_loope_Jb,         iemOp_loop_Jb,          iemOp_jecxz_Jb,
+    /* 0xe4 */  iemOp_in_AL_Ib,         iemOp_in_eAX_Ib,        iemOp_out_Ib_AL,        iemOp_out_Ib_eAX,
+    /* 0xe8 */  iemOp_call_Jv,          iemOp_jmp_Jv,           iemOp_jmp_Ap,           iemOp_jmp_Jb,
+    /* 0xec */  iemOp_in_AL_DX,         iemOp_eAX_DX,           iemOp_out_DX_AL,        iemOp_out_DX_eAX,
+    /* 0xf0 */  iemOp_lock,             iemOp_Invalid,          iemOp_repne,            iemOp_repe,
+    /* 0xf4 */  iemOp_hlt,              iemOp_cmc,              iemOp_Grp3_Eb,          iemOp_Grp3_Ev,
+    /* 0xf8 */  iemOp_clc,              iemOp_stc,              iemOp_cli,              iemOp_sti,
+    /* 0xfc */  iemOp_cld,              iemOp_std,              iemOp_Grp4,             iemOp_Grp5,
+};
+
+
+/** @} */
+
Index: /trunk/src/VBox/VMM/VMMR3/DBGF.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/DBGF.cpp	(revision 36767)
+++ /trunk/src/VBox/VMM/VMMR3/DBGF.cpp	(revision 36768)
@@ -244,5 +244,5 @@
 #ifndef RT_OS_L4
 
-# if !defined(DEBUG) || defined(DEBUG_sandervl) || defined(DEBUG_frank)
+# if !defined(DEBUG) || defined(DEBUG_sandervl) || defined(DEBUG_frank) || defined(IEM_VERIFICATION_MODE)
     int cWait = 10;
 # else
Index: /trunk/src/VBox/VMM/VMMR3/EM.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/EM.cpp	(revision 36767)
+++ /trunk/src/VBox/VMM/VMMR3/EM.cpp	(revision 36768)
@@ -5,5 +5,5 @@
 
 /*
- * Copyright (C) 2006-2007 Oracle Corporation
+ * Copyright (C) 2006-2011 Oracle Corporation
  *
  * This file is part of VirtualBox Open Source Edition (OSE), as
@@ -54,4 +54,7 @@
 #include <VBox/vmm/hwaccm.h>
 #include <VBox/vmm/patm.h>
+#ifdef IEM_VERIFICATION_MODE
+# include <VBox/vmm/iem.h>
+#endif
 #include "EMInternal.h"
 #include "internal/em.h"
@@ -1057,4 +1060,8 @@
 EMSTATE emR3Reschedule(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
 {
+#ifdef IEM_VERIFICATION_MODE
+    return EMSTATE_REM;
+#else
+
     /*
      * When forcing raw-mode execution, things are simple.
@@ -1102,10 +1109,10 @@
     }
 
-#ifndef VBOX_RAW_V86
+# ifndef VBOX_RAW_V86
     if (EFlags.u32 & X86_EFL_VM) {
         Log2(("raw mode refused: VM_MASK\n"));
         return EMSTATE_REM;
     }
-#endif
+# endif
 
     /** @todo check up the X86_CR0_AM flag in respect to raw mode!!! We're probably not emulating it right! */
@@ -1179,5 +1186,5 @@
         }
 
-#if !defined(VBOX_ALLOW_IF0) && !defined(VBOX_RUN_INTERRUPT_GATE_HANDLERS)
+# if !defined(VBOX_ALLOW_IF0) && !defined(VBOX_RUN_INTERRUPT_GATE_HANDLERS)
         if (!(EFlags.u32 & X86_EFL_IF))
         {
@@ -1186,5 +1193,5 @@
             return EMSTATE_REM;
         }
-#endif
+# endif
 
         /** @todo still necessary??? */
@@ -1198,4 +1205,6 @@
     Assert(PGMPhysIsA20Enabled(pVCpu));
     return EMSTATE_RAW;
+#endif /* !IEM_VERIFICATION_MODE */
+
 }
 
@@ -1986,4 +1995,7 @@
                  */
                 case EMSTATE_RAW:
+#ifdef IEM_VERIFICATION_MODE /* remove later */
+                    AssertFailed();
+#endif
                     rc = emR3RawExecute(pVM, pVCpu, &fFFDone);
                     break;
@@ -1993,4 +2005,7 @@
                  */
                 case EMSTATE_HWACC:
+#ifdef IEM_VERIFICATION_MODE /* remove later */
+                    AssertFailed();
+#endif
                     rc = emR3HwAccExecute(pVM, pVCpu, &fFFDone);
                     break;
@@ -2000,5 +2015,9 @@
                  */
                 case EMSTATE_REM:
+#ifdef IEM_VERIFICATION_MODE
+                    rc = VBOXSTRICTRC_TODO(IEMExecOne(pVCpu)); fFFDone = false;
+#else
                     rc = emR3RemExecute(pVM, pVCpu, &fFFDone);
+#endif
                     Log2(("EMR3ExecuteVM: emR3RemExecute -> %Rrc\n", rc));
                     break;
Index: /trunk/src/VBox/VMM/VMMR3/VM.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/VM.cpp	(revision 36767)
+++ /trunk/src/VBox/VMM/VMMR3/VM.cpp	(revision 36768)
@@ -55,4 +55,5 @@
 #include <VBox/vmm/pdmcritsect.h>
 #include <VBox/vmm/em.h>
+#include <VBox/vmm/iem.h>
 #include <VBox/vmm/rem.h>
 #include <VBox/vmm/tm.h>
@@ -926,34 +927,41 @@
                                                         if (RT_SUCCESS(rc))
                                                         {
-                                                            rc = DBGFR3Init(pVM);
+                                                            rc = IEMR3Init(pVM);
                                                             if (RT_SUCCESS(rc))
                                                             {
-                                                                rc = PDMR3Init(pVM);
+                                                                rc = DBGFR3Init(pVM);
                                                                 if (RT_SUCCESS(rc))
                                                                 {
-                                                                    rc = PGMR3InitDynMap(pVM);
-                                                                    if (RT_SUCCESS(rc))
-                                                                        rc = MMR3HyperInitFinalize(pVM);
-                                                                    if (RT_SUCCESS(rc))
-                                                                        rc = PATMR3InitFinalize(pVM);
-                                                                    if (RT_SUCCESS(rc))
-                                                                        rc = PGMR3InitFinalize(pVM);
-                                                                    if (RT_SUCCESS(rc))
-                                                                        rc = SELMR3InitFinalize(pVM);
-                                                                    if (RT_SUCCESS(rc))
-                                                                        rc = TMR3InitFinalize(pVM);
-                                                                    if (RT_SUCCESS(rc))
-                                                                        rc = REMR3InitFinalize(pVM);
-                                                                    if (RT_SUCCESS(rc))
-                                                                        rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_RING3);
+                                                                    rc = PDMR3Init(pVM);
                                                                     if (RT_SUCCESS(rc))
                                                                     {
-                                                                        LogFlow(("vmR3InitRing3: returns %Rrc\n", VINF_SUCCESS));
-                                                                        return VINF_SUCCESS;
+                                                                        rc = PGMR3InitDynMap(pVM);
+                                                                        if (RT_SUCCESS(rc))
+                                                                            rc = MMR3HyperInitFinalize(pVM);
+                                                                        if (RT_SUCCESS(rc))
+                                                                            rc = PATMR3InitFinalize(pVM);
+                                                                        if (RT_SUCCESS(rc))
+                                                                            rc = PGMR3InitFinalize(pVM);
+                                                                        if (RT_SUCCESS(rc))
+                                                                            rc = SELMR3InitFinalize(pVM);
+                                                                        if (RT_SUCCESS(rc))
+                                                                            rc = TMR3InitFinalize(pVM);
+                                                                        if (RT_SUCCESS(rc))
+                                                                            rc = REMR3InitFinalize(pVM);
+                                                                        if (RT_SUCCESS(rc))
+                                                                            rc = vmR3InitDoCompleted(pVM, VMINITCOMPLETED_RING3);
+                                                                        if (RT_SUCCESS(rc))
+                                                                        {
+                                                                            LogFlow(("vmR3InitRing3: returns %Rrc\n", VINF_SUCCESS));
+                                                                            return VINF_SUCCESS;
+                                                                        }
+
+                                                                        int rc2 = PDMR3Term(pVM);
+                                                                        AssertRC(rc2);
                                                                     }
-                                                                    int rc2 = PDMR3Term(pVM);
+                                                                    int rc2 = DBGFR3Term(pVM);
                                                                     AssertRC(rc2);
                                                                 }
-                                                                int rc2 = DBGFR3Term(pVM);
+                                                                int rc2 = IEMR3Term(pVM);
                                                                 AssertRC(rc2);
                                                             }
@@ -1157,4 +1165,5 @@
     EMR3Relocate(pVM);
     TMR3Relocate(pVM, offDelta);
+    IEMR3Relocate(pVM);
     DBGFR3Relocate(pVM, offDelta);
     PDMR3Relocate(pVM, offDelta);
@@ -2394,4 +2403,6 @@
         rc = PDMR3Term(pVM);
         AssertRC(rc);
+        rc = IEMR3Term(pVM);
+        AssertRC(rc);
         rc = EMR3Term(pVM);
         AssertRC(rc);
Index: /trunk/src/VBox/VMM/include/IEMInternal.h
===================================================================
--- /trunk/src/VBox/VMM/include/IEMInternal.h	(revision 36768)
+++ /trunk/src/VBox/VMM/include/IEMInternal.h	(revision 36768)
@@ -0,0 +1,643 @@
+/* $Id$ */
+/** @file
+ * IEM - Internal header file.
+ */
+
+/*
+ * Copyright (C) 2011 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef ___IEMInternal_h
+#define ___IEMInternal_h
+
+#include <VBox/vmm/stam.h>
+#include <VBox/vmm/cpum.h>
+#include <VBox/param.h>
+
+
+RT_C_DECLS_BEGIN
+
+
+/** @defgroup grp_iem_int       Internals
+ * @ingroup grp_iem
+ * @internal
+ * @{
+ */
+
+
+/**
+ * Operand or addressing mode.
+ */
+typedef enum IEMMODE
+{
+    IEMMODE_16BIT = 0,
+    IEMMODE_32BIT,
+    IEMMODE_64BIT
+} IEMMODE;
+AssertCompileSize(IEMMODE, 4);
+
+/**
+ * Extended operand mode that includes a representation of 8-bit.
+ *
+ * This is used for packing down modes when invoking some C instruction
+ * implementations.
+ */
+typedef enum IEMMODEX
+{
+    IEMMODEX_16BIT = IEMMODE_16BIT,
+    IEMMODEX_32BIT = IEMMODE_32BIT,
+    IEMMODEX_64BIT = IEMMODE_64BIT,
+    IEMMODEX_8BIT
+} IEMMODEX;
+AssertCompileSize(IEMMODEX, 4);
+
+
+
+/**
+ * The per-CPU IEM state.
+ */
+typedef struct IEMCPU
+{
+    /** Pointer to the CPU context - ring-3 contex. */
+    R3PTRTYPE(PCPUMCTX)     pCtxR3;
+    /** Pointer to the CPU context - ring-0 contex. */
+    R0PTRTYPE(PCPUMCTX)     pCtxR0;
+    /** Pointer to the CPU context - raw-mode contex. */
+    RCPTRTYPE(PCPUMCTX)     pCtxRC;
+
+    /** Offset of the VMCPU structure relative to this structure (negative). */
+    int32_t                 offVMCpu;
+    /** Offset of the VM structure relative to this structure (negative). */
+    int32_t                 offVM;
+
+    /** Whether to bypass access handlers or not. */
+    bool                    fByPassHandlers;
+    /** Explicit alignment padding. */
+    bool                    afAlignment0[6];
+
+    /** The CPL. */
+    uint8_t                 uCpl;
+    /** The current CPU execution mode (CS). */
+    IEMMODE                 enmCpuMode;
+
+    /** @name Statistics
+     * @{  */
+    /** The number of instructions we've executed. */
+    uint32_t                cInstructions;
+    /** The number of potential exits. */
+    uint32_t                cPotentialExits;
+#ifdef IEM_VERIFICATION_MODE
+    /** The Number of I/O port reads that has been performed. */
+    uint32_t                cIOReads;
+    /** The Number of I/O port writes that has been performed. */
+    uint32_t                cIOWrites;
+    /** Hack for ignoring differences in undefined EFLAGS after MUL and DIV. */
+    bool                    fMulDivHack;
+    /** Hack for ignoring differences in OF after SHL.  At least two intel CPUs
+     * this code is running on will not set it correctly (i.e. like AMD and QEMU
+     * does). */
+    bool                    fShlHack;
+    bool                    afAlignment1[6];
+#endif
+    /** @}  */
+
+    /** @name Decoder state.
+     * @{ */
+
+    /** The default addressing mode . */
+    IEMMODE                 enmDefAddrMode;
+    /** The effective addressing mode . */
+    IEMMODE                 enmEffAddrMode;
+    /** The default operand mode . */
+    IEMMODE                 enmDefOpSize;
+    /** The effective operand mode . */
+    IEMMODE                 enmEffOpSize;
+
+    /** The prefix mask (IEM_OP_PRF_XXX). */
+    uint32_t                fPrefixes;
+    /** The extra REX ModR/M register field bit (REX.R << 3). */
+    uint8_t                 uRexReg;
+    /** The extra REX ModR/M r/m field, SIB base and opcode reg bit
+     * (REX.B << 3). */
+    uint8_t                 uRexB;
+    /** The extra REX SIB index field bit (REX.X << 3). */
+    uint8_t                 uRexIndex;
+    /** The effective segment register (X86_SREG_XXX). */
+    uint8_t                 iEffSeg;
+
+    /** The current offset into abOpcode. */
+    uint8_t                 offOpcode;
+    /** The size of what has currently been fetched into abOpcode. */
+    uint8_t                 cbOpcode;
+    /** The opcode bytes. */
+    uint8_t                 abOpcode[15];
+
+    /** @}*/
+
+    /** Alignment padding for aMemMappings. */
+    uint8_t                 abAlignment2[5];
+
+    /** The number of active guest memory mappings. */
+    uint8_t                 cActiveMappings;
+    /** The next unused mapping index. */
+    uint8_t                 iNextMapping;
+    /** Records for tracking guest memory mappings. */
+    struct
+    {
+        /** The address of the mapped bytes. */
+        void               *pv;
+#if defined(IN_RC) && HC_ARCH_BITS == 64
+        uint32_t            u32Alignment3; /**< Alignment padding. */
+#endif
+        /** The access flags (IEM_ACCESS_XXX).
+         * IEM_ACCESS_INVALID if the entry is unused. */
+        uint32_t            fAccess;
+#if HC_ARCH_BITS == 64
+        uint32_t            u32Alignment4; /**< Alignment padding. */
+#endif
+    } aMemMappings[3];
+
+    /** Bounce buffer info.
+     * This runs in parallel to aMemMappings. */
+    struct
+    {
+        /** The physical address of the first byte. */
+        RTGCPHYS            GCPhysFirst;
+        /** The physical address of the second page. */
+        RTGCPHYS            GCPhysSecond;
+        /** The number of bytes in the first page. */
+        uint16_t            cbFirst;
+        /** The number of bytes in the second page. */
+        uint16_t            cbSecond;
+        /** Whether it's unassigned memory. */
+        bool                fUnassigned;
+        /** Explicit alignment padding. */
+        bool                afAlignment5[3];
+    } aMemBbMappings[3];
+
+    /** Bounce buffer storage.
+     * This runs in parallel to aMemMappings and aMemBbMappings. */
+    struct
+    {
+        uint8_t             ab[/*PAGE_SIZE*/16];
+    } aBounceBuffers[3];
+
+} IEMCPU;
+/** Pointer to the per-CPU IEM state. */
+typedef IEMCPU *PIEMCPU;
+
+/** Converts a IEMCPU pointer to a VMCPU pointer.
+ * @returns VMCPU pointer.
+ * @param   a_pIemCpu       The IEM per CPU instance data.
+ */
+#define IEMCPU_TO_VMCPU(a_pIemCpu)  ((PVMCPU)( (uintptr_t)(a_pIemCpu) + a_pIemCpu->offVMCpu ))
+
+/** Converts a IEMCPU pointer to a VM pointer.
+ * @returns VM pointer.
+ * @param   a_pIemCpu       The IEM per CPU instance data.
+ */
+#define IEMCPU_TO_VM(a_pIemCpu)     ((PVM)( (uintptr_t)(a_pIemCpu) + a_pIemCpu->offVM ))
+
+/** @name IEM_ACCESS_XXX - Access details.
+ * @{ */
+#define IEM_ACCESS_INVALID              UINT32_C(0x000000ff)
+#define IEM_ACCESS_TYPE_READ            UINT32_C(0x00000001)
+#define IEM_ACCESS_TYPE_WRITE           UINT32_C(0x00000002)
+#define IEM_ACCESS_TYPE_EXEC            UINT32_C(0x00000004)
+#define IEM_ACCESS_TYPE_MASK            UINT32_C(0x00000007)
+#define IEM_ACCESS_WHAT_CODE            UINT32_C(0x00000010)
+#define IEM_ACCESS_WHAT_DATA            UINT32_C(0x00000020)
+#define IEM_ACCESS_WHAT_STACK           UINT32_C(0x00000030)
+#define IEM_ACCESS_WHAT_SYS             UINT32_C(0x00000040)
+#define IEM_ACCESS_WHAT_MASK            UINT32_C(0x00000070)
+/** Used in aMemMappings to indicate that the entry is bounce buffered. */
+#define IEM_ACCESS_BOUNCE_BUFFERED      UINT32_C(0x00000100)
+/** Read+write data alias. */
+#define IEM_ACCESS_DATA_RW              (IEM_ACCESS_TYPE_READ  | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_DATA)
+/** Write data alias. */
+#define IEM_ACCESS_DATA_W               (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_DATA)
+/** Read data alias. */
+#define IEM_ACCESS_DATA_R               (IEM_ACCESS_TYPE_READ  | IEM_ACCESS_WHAT_DATA)
+/** Instruction fetch alias. */
+#define IEM_ACCESS_INSTRUCTION          (IEM_ACCESS_TYPE_EXEC  | IEM_ACCESS_WHAT_CODE)
+/** Stack write alias. */
+#define IEM_ACCESS_STACK_W              (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_STACK)
+/** Stack read alias. */
+#define IEM_ACCESS_STACK_R              (IEM_ACCESS_TYPE_READ  | IEM_ACCESS_WHAT_STACK)
+/** @} */
+
+/** @name Prefix constants (IEMCPU::fPrefixes)
+ * @{ */
+#define IEM_OP_PRF_SEG_CS               RT_BIT_32(0)
+#define IEM_OP_PRF_SEG_SS               RT_BIT_32(1)
+#define IEM_OP_PRF_SEG_DS               RT_BIT_32(2)
+#define IEM_OP_PRF_SEG_ES               RT_BIT_32(3)
+#define IEM_OP_PRF_SEG_FS               RT_BIT_32(4)
+#define IEM_OP_PRF_SEG_GS               RT_BIT_32(5)
+#define IEM_OP_PRF_SEG_MASK             UINT32_C(0x3f)
+
+#define IEM_OP_PRF_SIZE_OP              RT_BIT_32(8)
+#define IEM_OP_PRF_SIZE_REX_W           RT_BIT_32(9)
+#define IEM_OP_PRF_SIZE_ADDR            RT_BIT_32(10)
+
+#define IEM_OP_PRF_LOCK                 RT_BIT_32(16)
+#define IEM_OP_PRF_REPNZ                RT_BIT_32(17)
+#define IEM_OP_PRF_REPZ                 RT_BIT_32(18)
+
+#define IEM_OP_PRF_REX                  RT_BIT_32(24)
+#define IEM_OP_PRF_REX_R                RT_BIT_32(25)
+#define IEM_OP_PRF_REX_B                RT_BIT_32(26)
+#define IEM_OP_PRF_REX_X                RT_BIT_32(27)
+/** @} */
+
+
+
+
+/** @def IEM_DECL_IMPL_TYPE
+ * For typedef'ing an instruction implementation function.
+ *
+ * @param   a_RetType           The return type.
+ * @param   a_Name              The name of the type.
+ * @param   a_ArgList           The argument list enclosed in parentheses.
+ */
+
+/** @def IEM_DECL_IMPL_DEF
+ * For defining an instruction implementation function.
+ *
+ * @param   a_RetType           The return type.
+ * @param   a_Name              The name of the type.
+ * @param   a_ArgList           The argument list enclosed in parentheses.
+ */
+
+#if defined(__GNUC__) && defined(RT_ARCH_X86)
+# define IEM_DECL_IMPL_TYPE(a_RetType, a_Name, a_ArgList) \
+    __attribute__((__fastcall__)) a_RetType (a_Name) a_ArgList
+# define IEM_DECL_IMPL_DEF(a_RetType, a_Name, a_ArgList) \
+    __attribute__((__fastcall__, __nothrow__)) a_RetType a_Name a_ArgList
+
+#elif defined(_MSC_VER) && defined(RT_ARCH_X86)
+# define IEM_DECL_IMPL_TYPE(a_RetType, a_Name, a_ArgList) \
+    a_RetType (__fastcall a_Name) a_ArgList
+# define IEM_DECL_IMPL_DEF(a_RetType, a_Name, a_ArgList) \
+    a_RetType __fastcall a_Name a_ArgList
+
+#else
+# define IEM_DECL_IMPL_TYPE(a_RetType, a_Name, a_ArgList) \
+    a_RetType (VBOXCALL a_Name) a_ArgList
+# define IEM_DECL_IMPL_DEF(a_RetType, a_Name, a_ArgList) \
+    a_RetType VBOXCALL a_Name a_ArgList
+
+#endif
+
+/** @name Arithmetic assignment operations on bytes (binary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU8,  (uint8_t  *pu8Dst,  uint8_t  u8Src,  uint32_t *pEFlags));
+typedef FNIEMAIMPLBINU8  *PFNIEMAIMPLBINU8;
+FNIEMAIMPLBINU8 iemAImpl_add_u8, iemAImpl_add_u8_locked;
+FNIEMAIMPLBINU8 iemAImpl_adc_u8, iemAImpl_adc_u8_locked;
+FNIEMAIMPLBINU8 iemAImpl_sub_u8, iemAImpl_sub_u8_locked;
+FNIEMAIMPLBINU8 iemAImpl_sbb_u8, iemAImpl_sbb_u8_locked;
+FNIEMAIMPLBINU8  iemAImpl_or_u8,  iemAImpl_or_u8_locked;
+FNIEMAIMPLBINU8 iemAImpl_xor_u8, iemAImpl_xor_u8_locked;
+FNIEMAIMPLBINU8 iemAImpl_and_u8, iemAImpl_and_u8_locked;
+/** @} */
+
+/** @name Arithmetic assignment operations on words (binary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU16,  (uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pEFlags));
+typedef FNIEMAIMPLBINU16  *PFNIEMAIMPLBINU16;
+FNIEMAIMPLBINU16 iemAImpl_add_u16, iemAImpl_add_u16_locked;
+FNIEMAIMPLBINU16 iemAImpl_adc_u16, iemAImpl_adc_u16_locked;
+FNIEMAIMPLBINU16 iemAImpl_sub_u16, iemAImpl_sub_u16_locked;
+FNIEMAIMPLBINU16 iemAImpl_sbb_u16, iemAImpl_sbb_u16_locked;
+FNIEMAIMPLBINU16  iemAImpl_or_u16,  iemAImpl_or_u16_locked;
+FNIEMAIMPLBINU16 iemAImpl_xor_u16, iemAImpl_xor_u16_locked;
+FNIEMAIMPLBINU16 iemAImpl_and_u16, iemAImpl_and_u16_locked;
+/** @}  */
+
+/** @name Arithmetic assignment operations on double words (binary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU32, (uint32_t *pu32Dst, uint32_t u32Src, uint32_t *pEFlags));
+typedef FNIEMAIMPLBINU32 *PFNIEMAIMPLBINU32;
+FNIEMAIMPLBINU32 iemAImpl_add_u32, iemAImpl_add_u32_locked;
+FNIEMAIMPLBINU32 iemAImpl_adc_u32, iemAImpl_adc_u32_locked;
+FNIEMAIMPLBINU32 iemAImpl_sub_u32, iemAImpl_sub_u32_locked;
+FNIEMAIMPLBINU32 iemAImpl_sbb_u32, iemAImpl_sbb_u32_locked;
+FNIEMAIMPLBINU32  iemAImpl_or_u32,  iemAImpl_or_u32_locked;
+FNIEMAIMPLBINU32 iemAImpl_xor_u32, iemAImpl_xor_u32_locked;
+FNIEMAIMPLBINU32 iemAImpl_and_u32, iemAImpl_and_u32_locked;
+/** @}  */
+
+/** @name Arithmetic assignment operations on quad words (binary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLBINU64, (uint64_t *pu64Dst, uint64_t u64Src, uint32_t *pEFlags));
+typedef FNIEMAIMPLBINU64 *PFNIEMAIMPLBINU64;
+FNIEMAIMPLBINU64 iemAImpl_add_u64, iemAImpl_add_u64_locked;
+FNIEMAIMPLBINU64 iemAImpl_adc_u64, iemAImpl_adc_u64_locked;
+FNIEMAIMPLBINU64 iemAImpl_sub_u64, iemAImpl_sub_u64_locked;
+FNIEMAIMPLBINU64 iemAImpl_sbb_u64, iemAImpl_sbb_u64_locked;
+FNIEMAIMPLBINU64  iemAImpl_or_u64,  iemAImpl_or_u64_locked;
+FNIEMAIMPLBINU64 iemAImpl_xor_u64, iemAImpl_xor_u64_locked;
+FNIEMAIMPLBINU64 iemAImpl_and_u64, iemAImpl_and_u64_locked;
+/** @}  */
+
+/** @name Compare operations (thrown in with the binary ops).
+ * @{ */
+FNIEMAIMPLBINU8  iemAImpl_cmp_u8;
+FNIEMAIMPLBINU16 iemAImpl_cmp_u16;
+FNIEMAIMPLBINU32 iemAImpl_cmp_u32;
+FNIEMAIMPLBINU64 iemAImpl_cmp_u64;
+/** @}  */
+
+/** @name Test operations (thrown in with the binary ops).
+ * @{ */
+FNIEMAIMPLBINU8  iemAImpl_test_u8;
+FNIEMAIMPLBINU16 iemAImpl_test_u16;
+FNIEMAIMPLBINU32 iemAImpl_test_u32;
+FNIEMAIMPLBINU64 iemAImpl_test_u64;
+/** @}  */
+
+/** @name Signed multiplication operations (thrown in with the binary ops).
+ * @{ */
+FNIEMAIMPLBINU8  iemAImpl_imul_two_u8;
+FNIEMAIMPLBINU16 iemAImpl_imul_two_u16;
+FNIEMAIMPLBINU32 iemAImpl_imul_two_u32;
+FNIEMAIMPLBINU64 iemAImpl_imul_two_u64;
+/** @}  */
+
+/** @name Arithmetic assignment operations on bytes (unary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU8,  (uint8_t  *pu8Dst,  uint32_t *pEFlags));
+typedef FNIEMAIMPLUNARYU8  *PFNIEMAIMPLUNARYU8;
+FNIEMAIMPLUNARYU8 iemAImpl_inc_u8, iemAImpl_inc_u8_locked;
+FNIEMAIMPLUNARYU8 iemAImpl_dec_u8, iemAImpl_dec_u8_locked;
+FNIEMAIMPLUNARYU8 iemAImpl_not_u8, iemAImpl_not_u8_locked;
+FNIEMAIMPLUNARYU8 iemAImpl_neg_u8, iemAImpl_neg_u8_locked;
+/** @} */
+
+/** @name Arithmetic assignment operations on words (unary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU16,  (uint16_t  *pu16Dst,  uint32_t *pEFlags));
+typedef FNIEMAIMPLUNARYU16  *PFNIEMAIMPLUNARYU16;
+FNIEMAIMPLUNARYU16 iemAImpl_inc_u16, iemAImpl_inc_u16_locked;
+FNIEMAIMPLUNARYU16 iemAImpl_dec_u16, iemAImpl_dec_u16_locked;
+FNIEMAIMPLUNARYU16 iemAImpl_not_u16, iemAImpl_not_u16_locked;
+FNIEMAIMPLUNARYU16 iemAImpl_neg_u16, iemAImpl_neg_u16_locked;
+/** @} */
+
+/** @name Arithmetic assignment operations on double words (unary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU32,  (uint32_t  *pu32Dst,  uint32_t *pEFlags));
+typedef FNIEMAIMPLUNARYU32  *PFNIEMAIMPLUNARYU32;
+FNIEMAIMPLUNARYU32 iemAImpl_inc_u32, iemAImpl_inc_u32_locked;
+FNIEMAIMPLUNARYU32 iemAImpl_dec_u32, iemAImpl_dec_u32_locked;
+FNIEMAIMPLUNARYU32 iemAImpl_not_u32, iemAImpl_not_u32_locked;
+FNIEMAIMPLUNARYU32 iemAImpl_neg_u32, iemAImpl_neg_u32_locked;
+/** @} */
+
+/** @name Arithmetic assignment operations on quad words (unary).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLUNARYU64,  (uint64_t  *pu64Dst,  uint32_t *pEFlags));
+typedef FNIEMAIMPLUNARYU64  *PFNIEMAIMPLUNARYU64;
+FNIEMAIMPLUNARYU64 iemAImpl_inc_u64, iemAImpl_inc_u64_locked;
+FNIEMAIMPLUNARYU64 iemAImpl_dec_u64, iemAImpl_dec_u64_locked;
+FNIEMAIMPLUNARYU64 iemAImpl_not_u64, iemAImpl_not_u64_locked;
+FNIEMAIMPLUNARYU64 iemAImpl_neg_u64, iemAImpl_neg_u64_locked;
+/** @} */
+
+
+/** @name Shift operations on bytes (Group 2).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU8,(uint8_t *pu8Dst, uint8_t cShift, uint32_t *pEFlags));
+typedef FNIEMAIMPLSHIFTU8  *PFNIEMAIMPLSHIFTU8;
+FNIEMAIMPLSHIFTU8 iemAImpl_rol_u8;
+FNIEMAIMPLSHIFTU8 iemAImpl_ror_u8;
+FNIEMAIMPLSHIFTU8 iemAImpl_rcl_u8;
+FNIEMAIMPLSHIFTU8 iemAImpl_rcr_u8;
+FNIEMAIMPLSHIFTU8 iemAImpl_shl_u8;
+FNIEMAIMPLSHIFTU8 iemAImpl_shr_u8;
+FNIEMAIMPLSHIFTU8 iemAImpl_sar_u8;
+/** @} */
+
+/** @name Shift operations on words (Group 2).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU16,(uint16_t *pu16Dst, uint8_t cShift, uint32_t *pEFlags));
+typedef FNIEMAIMPLSHIFTU16  *PFNIEMAIMPLSHIFTU16;
+FNIEMAIMPLSHIFTU16 iemAImpl_rol_u16;
+FNIEMAIMPLSHIFTU16 iemAImpl_ror_u16;
+FNIEMAIMPLSHIFTU16 iemAImpl_rcl_u16;
+FNIEMAIMPLSHIFTU16 iemAImpl_rcr_u16;
+FNIEMAIMPLSHIFTU16 iemAImpl_shl_u16;
+FNIEMAIMPLSHIFTU16 iemAImpl_shr_u16;
+FNIEMAIMPLSHIFTU16 iemAImpl_sar_u16;
+/** @} */
+
+/** @name Shift operations on double words (Group 2).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU32,(uint32_t *pu32Dst, uint8_t cShift, uint32_t *pEFlags));
+typedef FNIEMAIMPLSHIFTU32  *PFNIEMAIMPLSHIFTU32;
+FNIEMAIMPLSHIFTU32 iemAImpl_rol_u32;
+FNIEMAIMPLSHIFTU32 iemAImpl_ror_u32;
+FNIEMAIMPLSHIFTU32 iemAImpl_rcl_u32;
+FNIEMAIMPLSHIFTU32 iemAImpl_rcr_u32;
+FNIEMAIMPLSHIFTU32 iemAImpl_shl_u32;
+FNIEMAIMPLSHIFTU32 iemAImpl_shr_u32;
+FNIEMAIMPLSHIFTU32 iemAImpl_sar_u32;
+/** @} */
+
+/** @name Shift operations on words (Group 2).
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLSHIFTU64,(uint64_t *pu64Dst, uint8_t cShift, uint32_t *pEFlags));
+typedef FNIEMAIMPLSHIFTU64  *PFNIEMAIMPLSHIFTU64;
+FNIEMAIMPLSHIFTU64 iemAImpl_rol_u64;
+FNIEMAIMPLSHIFTU64 iemAImpl_ror_u64;
+FNIEMAIMPLSHIFTU64 iemAImpl_rcl_u64;
+FNIEMAIMPLSHIFTU64 iemAImpl_rcr_u64;
+FNIEMAIMPLSHIFTU64 iemAImpl_shl_u64;
+FNIEMAIMPLSHIFTU64 iemAImpl_shr_u64;
+FNIEMAIMPLSHIFTU64 iemAImpl_sar_u64;
+/** @} */
+
+/** @name Multiplication and division operations.
+ * @{ */
+typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU8,(uint16_t *pu16AX, uint8_t u8FactorDivisor, uint32_t *pEFlags));
+typedef FNIEMAIMPLMULDIVU8  *PFNIEMAIMPLMULDIVU8;
+FNIEMAIMPLMULDIVU8 iemAImpl_mul_u8, iemAImpl_imul_u8;
+FNIEMAIMPLMULDIVU8 iemAImpl_div_u8, iemAImpl_idiv_u8;
+
+typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU16,(uint16_t *pu16AX, uint16_t *pu16DX, uint16_t u16FactorDivisor, uint32_t *pEFlags));
+typedef FNIEMAIMPLMULDIVU16  *PFNIEMAIMPLMULDIVU16;
+FNIEMAIMPLMULDIVU16 iemAImpl_mul_u16, iemAImpl_imul_u16;
+FNIEMAIMPLMULDIVU16 iemAImpl_div_u16, iemAImpl_idiv_u16;
+
+typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU32,(uint32_t *pu32EAX, uint32_t *pu32EDX, uint32_t u32FactorDivisor, uint32_t *pEFlags));
+typedef FNIEMAIMPLMULDIVU32  *PFNIEMAIMPLMULDIVU32;
+FNIEMAIMPLMULDIVU32 iemAImpl_mul_u32, iemAImpl_imul_u32;
+FNIEMAIMPLMULDIVU32 iemAImpl_div_u32, iemAImpl_idiv_u32;
+
+typedef IEM_DECL_IMPL_TYPE(int, FNIEMAIMPLMULDIVU64,(uint64_t *pu64RAX, uint64_t *pu64RDX, uint64_t u64FactorDivisor, uint32_t *pEFlags));
+typedef FNIEMAIMPLMULDIVU64  *PFNIEMAIMPLMULDIVU64;
+FNIEMAIMPLMULDIVU64 iemAImpl_mul_u64, iemAImpl_imul_u64;
+FNIEMAIMPLMULDIVU64 iemAImpl_div_u64, iemAImpl_idiv_u64;
+/** @} */
+
+
+/** @name C instruction implementations for anything slightly complicated.
+ * @{ */
+
+/**
+ * For typedef'ing or declaring a C instruction implementation function taking
+ * no extra arguments.
+ *
+ * @param   a_Name              The name of the type.
+ */
+# define IEM_CIMPL_DECL_TYPE_0(a_Name) \
+    IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr))
+/**
+ * For defining a C instruction implementation function taking no extra
+ * arguments.
+ *
+ * @param   a_Name              The name of the function
+ */
+# define IEM_CIMPL_DEF_0(a_Name) \
+    IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr))
+/**
+ * For calling a C instruction implementation function taking no extra
+ * arguments.
+ *
+ * This special call macro adds default arguments to the call and allow us to
+ * change these later.
+ *
+ * @param   a_fn                The name of the function.
+ */
+# define IEM_CIMPL_CALL_0(a_fn)            a_fn(pIemCpu, cbInstr)
+
+/**
+ * For typedef'ing or declaring a C instruction implementation function taking
+ * one extra argument.
+ *
+ * @param   a_Name              The name of the type.
+ * @param   a_Type0             The argument type.
+ * @param   a_Arg0              The argument name.
+ */
+# define IEM_CIMPL_DECL_TYPE_1(a_Name, a_Type0, a_Arg0) \
+    IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr, a_Type0 a_Arg0))
+/**
+ * For defining a C instruction implementation function taking one extra
+ * argument.
+ *
+ * @param   a_Name              The name of the function
+ * @param   a_Type0             The argument type.
+ * @param   a_Arg0              The argument name.
+ */
+# define IEM_CIMPL_DEF_1(a_Name, a_Type0, a_Arg0) \
+    IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr, a_Type0 a_Arg0))
+/**
+ * For calling a C instruction implementation function taking one extra
+ * argument.
+ *
+ * This special call macro adds default arguments to the call and allow us to
+ * change these later.
+ *
+ * @param   a_fn                The name of the function.
+ * @param   a0                  The name of the 1st argument.
+ */
+# define IEM_CIMPL_CALL_1(a_fn, a0)        a_fn(pIemCpu, cbInstr, (a0))
+
+/**
+ * For typedef'ing or declaring a C instruction implementation function taking
+ * two extra arguments.
+ *
+ * @param   a_Name              The name of the type.
+ * @param   a_Type0             The type of the 1st argument
+ * @param   a_Arg0              The name of the 1st argument.
+ * @param   a_Type1             The type of the 2nd argument.
+ * @param   a_Arg1              The name of the 2nd argument.
+ */
+# define IEM_CIMPL_DECL_TYPE_2(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1) \
+    IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1))
+/**
+ * For defining a C instruction implementation function taking two extra
+ * arguments.
+ *
+ * @param   a_Name              The name of the function.
+ * @param   a_Type0             The type of the 1st argument
+ * @param   a_Arg0              The name of the 1st argument.
+ * @param   a_Type1             The type of the 2nd argument.
+ * @param   a_Arg1              The name of the 2nd argument.
+ */
+# define IEM_CIMPL_DEF_2(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1) \
+    IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1))
+/**
+ * For calling a C instruction implementation function taking two extra
+ * arguments.
+ *
+ * This special call macro adds default arguments to the call and allow us to
+ * change these later.
+ *
+ * @param   a_fn                The name of the function.
+ * @param   a0                  The name of the 1st argument.
+ * @param   a1                  The name of the 2nd argument.
+ */
+# define IEM_CIMPL_CALL_2(a_fn, a0, a1)    a_fn(pIemCpu, cbInstr, (a0), (a1))
+
+/**
+ * For typedef'ing or declaring a C instruction implementation function taking
+ * three extra arguments.
+ *
+ * @param   a_Name              The name of the type.
+ * @param   a_Type0             The type of the 1st argument
+ * @param   a_Arg0              The name of the 1st argument.
+ * @param   a_Type1             The type of the 2nd argument.
+ * @param   a_Arg1              The name of the 2nd argument.
+ * @param   a_Type2             The type of the 3rd argument.
+ * @param   a_Arg2              The name of the 3rd argument.
+ */
+# define IEM_CIMPL_DECL_TYPE_3(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2) \
+    IEM_DECL_IMPL_TYPE(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1, a_Type2 a_Arg2))
+/**
+ * For defining a C instruction implementation function taking three extra
+ * arguments.
+ *
+ * @param   a_Name              The name of the function.
+ * @param   a_Type0             The type of the 1st argument
+ * @param   a_Arg0              The name of the 1st argument.
+ * @param   a_Type1             The type of the 2nd argument.
+ * @param   a_Arg1              The name of the 2nd argument.
+ * @param   a_Type2             The type of the 3rd argument.
+ * @param   a_Arg2              The name of the 3rd argument.
+ */
+# define IEM_CIMPL_DEF_3(a_Name, a_Type0, a_Arg0, a_Type1, a_Arg1, a_Type2, a_Arg2) \
+    IEM_DECL_IMPL_DEF(VBOXSTRICTRC, a_Name, (PIEMCPU pIemCpu, uint8_t cbInstr, a_Type0 a_Arg0, a_Type1 a_Arg1, a_Type2 a_Arg2))
+/**
+ * For calling a C instruction implementation function taking three extra
+ * arguments.
+ *
+ * This special call macro adds default arguments to the call and allow us to
+ * change these later.
+ *
+ * @param   a_fn                The name of the function.
+ * @param   a0                  The name of the 1st argument.
+ * @param   a1                  The name of the 2nd argument.
+ * @param   a2                  The name of the 3rd argument.
+ */
+# define IEM_CIMPL_CALL_3(a_fn, a0, a1, a2) a_fn(pIemCpu, cbInstr, (a0), (a1))
+
+/** @}  */
+
+
+/** @} */
+
+RT_C_DECLS_END
+
+#endif
+
+
Index: /trunk/src/VBox/VMM/testcase/Makefile.kmk
===================================================================
--- /trunk/src/VBox/VMM/testcase/Makefile.kmk	(revision 36767)
+++ /trunk/src/VBox/VMM/testcase/Makefile.kmk	(revision 36768)
@@ -43,5 +43,6 @@
   	tstCompiler \
   	tstVMMR0CallHost-1 \
-  	tstVMMR0CallHost-2
+  	tstVMMR0CallHost-2 \
+	tstX86-1
   ifneq ($(KBUILD_TARGET),l4)
    PROGRAMS += tstAnimate
@@ -232,4 +233,8 @@
 tstCompiler_SOURCES     = tstCompiler.cpp
 tstCompiler_LIBS        = $(LIB_VMM) $(LIB_REM) $(LIB_RUNTIME)
+
+tstX86-1_TEMPLATE       = VBOXR3TSTEXE
+tstX86-1_SOURCES        = tstX86-1.cpp tstX86-1A.asm
+tstX86-1_LIBS           = $(LIB_RUNTIME)
 
 ifdef VBOX_WITH_RAW_MODE
Index: /trunk/src/VBox/VMM/testcase/tstVMStructRC.cpp
===================================================================
--- /trunk/src/VBox/VMM/testcase/tstVMStructRC.cpp	(revision 36767)
+++ /trunk/src/VBox/VMM/testcase/tstVMStructRC.cpp	(revision 36768)
@@ -77,4 +77,5 @@
 #include "CSAMInternal.h"
 #include "EMInternal.h"
+#include "IEMInternal.h"
 #include "REMInternal.h"
 #include <VBox/vmm/vm.h>
@@ -150,4 +151,22 @@
     GEN_CHECK_OFF(EMCPU, pStatsRC);
     GEN_CHECK_OFF(EMCPU, pCliStatTree);
+
+    GEN_CHECK_SIZE(IEMCPU);
+    GEN_CHECK_OFF(IEMCPU, pCtxR0);
+    GEN_CHECK_OFF(IEMCPU, pCtxR3);
+    GEN_CHECK_OFF(IEMCPU, pCtxRC);
+    GEN_CHECK_OFF(IEMCPU, offVM);
+    GEN_CHECK_OFF(IEMCPU, offVMCpu);
+    GEN_CHECK_OFF(IEMCPU, enmCpuMode);
+    GEN_CHECK_OFF(IEMCPU, fPrefixes);
+    GEN_CHECK_OFF(IEMCPU, abOpcode);
+    GEN_CHECK_OFF(IEMCPU, cActiveMappings);
+    GEN_CHECK_OFF(IEMCPU, iNextMapping);
+    GEN_CHECK_OFF(IEMCPU, aMemMappings);
+    GEN_CHECK_OFF(IEMCPU, aMemMappings[1]);
+    GEN_CHECK_OFF(IEMCPU, aBounceBuffers);
+    GEN_CHECK_OFF(IEMCPU, aBounceBuffers[1]);
+    GEN_CHECK_OFF(IEMCPU, aMemBbMappings);
+    GEN_CHECK_OFF(IEMCPU, aMemBbMappings[1]);
 
     GEN_CHECK_SIZE(IOM);
Index: /trunk/src/VBox/VMM/testcase/tstVMStructSize.cpp
===================================================================
--- /trunk/src/VBox/VMM/testcase/tstVMStructSize.cpp	(revision 36767)
+++ /trunk/src/VBox/VMM/testcase/tstVMStructSize.cpp	(revision 36768)
@@ -49,4 +49,5 @@
 #include "CSAMInternal.h"
 #include "EMInternal.h"
+#include "IEMInternal.h"
 #include "REMInternal.h"
 #include "../VMMR0/GMMR0Internal.h"
@@ -211,4 +212,5 @@
     CHECK_PADDING_VM(64, csam);
     CHECK_PADDING_VM(64, em);
+    /*CHECK_PADDING_VM(64, iem);*/
     CHECK_PADDING_VM(64, tm);
     CHECK_PADDING_VM(64, dbgf);
@@ -222,4 +224,5 @@
     CHECK_PADDING_VMCPU(64, hwaccm);
     CHECK_PADDING_VMCPU(64, em);
+    CHECK_PADDING_VMCPU(64, iem);
     CHECK_PADDING_VMCPU(64, trpm);
     CHECK_PADDING_VMCPU(64, tm);
Index: /trunk/src/VBox/VMM/testcase/tstX86-1.cpp
===================================================================
--- /trunk/src/VBox/VMM/testcase/tstX86-1.cpp	(revision 36768)
+++ /trunk/src/VBox/VMM/testcase/tstX86-1.cpp	(revision 36768)
@@ -0,0 +1,147 @@
+/* $Id$ */
+/** @file
+ * X86 instruction set exploration/testcase #1.
+ */
+
+/*
+ * Copyright (C) 2011 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*******************************************************************************
+*   Header Files                                                               *
+*******************************************************************************/
+#include <iprt/test.h>
+#include <iprt/param.h>
+
+#ifdef RT_OS_WINDOWS
+# include <Windows.h>
+#else
+# ifdef RT_OS_DARWIN
+#  define _XOPEN_SOURCE
+# endif
+# include <signal.h>
+# include <ucontext.h>
+# define USE_SIGNAL
+#endif
+
+
+/*******************************************************************************
+*   Structures and Typedefs                                                    *
+*******************************************************************************/
+typedef struct TRAPINFO
+{
+    uintptr_t   uTrapPC;
+    uintptr_t   uResumePC;
+    uint8_t     u8Trap;
+    uint8_t     cbInstr;
+    uint8_t     auAlignment[sizeof(uintptr_t) * 2 - 2];
+} TRAPINFO;
+typedef TRAPINFO const *PCTRAPINFO;
+
+/*******************************************************************************
+*   Global Variables                                                           *
+*******************************************************************************/
+RT_C_DECLS_BEGIN
+uint8_t *g_pbEfPage = NULL;
+extern TRAPINFO g_aTrapInfo[];
+RT_C_DECLS_END
+
+
+/*******************************************************************************
+*   Internal Functions                                                         *
+*******************************************************************************/
+DECLASM(int32_t) x861_Test1(void);
+
+
+
+static PCTRAPINFO findTrapInfo(uintptr_t uTrapPC)
+{
+    for (unsigned i = 0; g_aTrapInfo[i].uTrapPC; i++)
+        if (g_aTrapInfo[i].uTrapPC == uTrapPC)
+            return &g_aTrapInfo[i];
+
+    return NULL;
+}
+
+#ifdef USE_SIGNAL
+static void sigHandler(int iSig, siginfo_t *pSigInfo, void *pvSigCtx)
+{
+    ucontext_t *pCtx = (ucontext_t *)pvSigCtx;
+# if defined(RT_ARCH_AMD64) && defined(RT_OS_DARWIN)
+    uintptr_t  *puPC = (uintptr_t *)&pCtx->uc_mcontext->__ss.__rip;
+# elif defined(RT_ARCH_AMD64)
+    uintptr_t  *puPC = (uintptr_t *)&pCtx->uc_mcontext.gregs[REG_RIP];
+# elif defined(RT_ARCH_X86) && defined(RT_OS_DARWIN)
+    uintptr_t  *puPC = (uintptr_t *)&pCtx->uc_mcontext->__ss.__eip;
+# elif defined(RT_ARCH_X86)
+    uintptr_t  *puPC = (uintptr_t *)&pCtx->uc_mcontext.gregs[REG_EIP];
+# else
+    uintptr_t  *puPC = NULL;
+# endif
+
+    PCTRAPINFO  pTrapInfo = findTrapInfo(*puPC);
+    if (pTrapInfo)
+    {
+        /** @todo verify the kind of trap */
+        *puPC = pTrapInfo->uResumePC;
+        return;
+    }
+
+    /* die */
+    signal(iSig, SIG_IGN);
+}
+#else
+
+#endif
+
+int main()
+{
+    /*
+     * Set up the test environment.
+     */
+    RTTEST hTest;
+    RTEXITCODE rcExit = RTTestInitAndCreate("tstX86-1", &hTest);
+    if (rcExit != RTEXITCODE_SUCCESS)
+        return rcExit;
+    g_pbEfPage = (uint8_t *)RTTestGuardedAllocTail(hTest, PAGE_SIZE);
+    RTTESTI_CHECK(g_pbEfPage != NULL);
+
+#ifdef USE_SIGNAL
+    static int const s_aiSigs[] = { SIGBUS, SIGSEGV, SIGFPE };
+    for (unsigned i = 0; i < RT_ELEMENTS(s_aiSigs); i++)
+    {
+        struct sigaction SigAct;
+        RTTESTI_CHECK_BREAK(sigaction(s_aiSigs[i], NULL, &SigAct) == 0);
+        SigAct.sa_sigaction = sigHandler;
+        SigAct.sa_flags    |= SA_SIGINFO;
+        RTTESTI_CHECK(sigaction(s_aiSigs[i], &SigAct, NULL) == 0);
+    }
+#else
+    /** @todo implement me. */
+#endif
+
+
+    if (!RTTestErrorCount(hTest))
+    {
+
+        /*
+         * Do the testing.
+         */
+        RTTestSub(hTest, "part 1");
+        int32_t rc = x861_Test1();
+        if (rc != 0)
+            RTTestFailed(hTest, "x861_Test1 -> %d", rc);
+    }
+
+    return RTTestSummaryAndDestroy(hTest);
+}
+
Index: /trunk/src/VBox/VMM/testcase/tstX86-1A.asm
===================================================================
--- /trunk/src/VBox/VMM/testcase/tstX86-1A.asm	(revision 36768)
+++ /trunk/src/VBox/VMM/testcase/tstX86-1A.asm	(revision 36768)
@@ -0,0 +1,482 @@
+; $Id$
+;; @file
+; X86 instruction set testcase #1.
+;
+
+;
+; Copyright (C) 2011 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;   Header Files                                                              ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%include "iprt/asmdefs.mac"
+%include "VBox/x86.mac"
+
+;; @todo Move this to a header?
+struc TRAPINFO
+        .uTrapPC        RTCCPTR_RES 1
+        .uResumePC      RTCCPTR_RES 1
+        .u8TrapNo       resb 1
+        .cbInstr        resb 1
+        .au8Padding     resb (RTCCPTR_CB*2 - 2)
+endstruc
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;   Global Variables                                                          ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+BEGINDATA
+extern NAME(g_pbEfPage)
+
+g_szAlpha:
+        db      "abcdefghijklmnopqrstuvwxyz", 0
+g_szAlpha_end:
+%define g_cchAlpha (g_szAlpha_end - g_szAlpha)
+        db      0, 0, 0,
+
+;;
+; The last global data item. We build this as we write the code.
+GLOBALNAME g_aTrapInfo
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;   Defined Constants And Macros                                              ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define X86_XCPT_GP 13
+%define X86_XCPT_PF 14
+
+;;
+; Macro for recording a trapping instruction (simple).
+;
+; @param        1       The trap number.
+; @param        2+      The instruction which should trap.
+%macro ShouldTrap 2+
+..trap:
+        %2
+..trap_end:
+        mov     eax, __LINE__
+        jmp     .failed
+BEGINDATA
+..trapinfo: istruc TRAPINFO
+        at TRAPINFO.uTrapPC,    RTCCPTR_DEF     ..trap
+        at TRAPINFO.uResumePC,  RTCCPTR_DEF     ..resume
+        at TRAPINFO.u8TrapNo,   db              %1
+        at TRAPINFO.cbInstr,    db              (..trap_end - ..trap)
+iend
+BEGINCODE
+..resume:
+%endmacro
+
+
+
+BEGINCODE
+
+;;
+; Loads all general registers except xBP and xSP with unique values.
+;
+x861_LoadUniqueRegValues:
+%ifdef RT_ARCH_AMD64
+        mov     rax, 00000000000000000h
+        mov     rcx, 01111111111111111h
+        mov     rdx, 02222222222222222h
+        mov     rbx, 03333333333333333h
+        mov     rsi, 06666666666666666h
+        mov     rdi, 07777777777777777h
+        mov     r8,  08888888888888888h
+        mov     r9,  09999999999999999h
+        mov     r10, 0aaaaaaaaaaaaaaaah
+        mov     r11, 0bbbbbbbbbbbbbbbbh
+        mov     r12, 0cccccccccccccccch
+        mov     r13, 0ddddddddddddddddh
+        mov     r14, 0eeeeeeeeeeeeeeeeh
+        mov     r15, 0ffffffffffffffffh
+%else
+        mov     eax, 000000000h
+        mov     ecx, 011111111h
+        mov     edx, 022222222h
+        mov     ebx, 033333333h
+        mov     esi, 066666666h
+        mov     edi, 077777777h
+%endif
+        ret
+; end x861_LoadUniqueRegValues
+
+
+;;
+; Clears all general registers except xBP and xSP.
+;
+x861_ClearRegisters:
+        xor     eax, eax
+        xor     ebx, ebx
+        xor     ecx, ecx
+        xor     edx, edx
+        xor     esi, esi
+        xor     edi, edi
+%ifdef RT_ARCH_AMD64
+        xor     r8,  r8
+        xor     r9,  r9
+        xor     r10, r10
+        xor     r11, r11
+        xor     r12, r12
+        xor     r13, r13
+        xor     r14, r14
+        xor     r15, r15
+%endif
+        ret
+; x861_ClearRegisters
+
+
+BEGINPROC x861_Test1
+        push    xBP
+        mov     xBP, xSP
+        pushf
+        push    xBX
+        push    xCX
+        push    xDX
+        push    xSI
+        push    xDI
+%ifdef RT_ARCH_AMD64
+        push    r8
+        push    r9
+        push    r10
+        push    r11
+        push    r12
+        push    r13
+        push    r14
+        push    r15
+%endif
+
+        ;
+        ; Odd push behavior
+        ;
+%ifdef RT_ARCH_X86
+        ; upper word of a 'push cs' is cleared.
+        mov     eax, __LINE__
+        mov     dword [esp - 4], 0f0f0f0fh
+        push    cs
+        pop     ecx
+        mov     bx, cs
+        and     ebx, 0000ffffh
+        cmp     ecx, ebx
+        jne     .failed
+
+        ; upper word of a 'push ds' is cleared.
+        mov     eax, __LINE__
+        mov     dword [esp - 4], 0f0f0f0fh
+        push    ds
+        pop     ecx
+        mov     bx, ds
+        and     ebx, 0000ffffh
+        cmp     ecx, ebx
+        jne     .failed
+
+        ; upper word of a 'push es' is cleared.
+        mov     eax, __LINE__
+        mov     dword [esp - 4], 0f0f0f0fh
+        push    es
+        pop     ecx
+        mov     bx, es
+        and     ebx, 0000ffffh
+        cmp     ecx, ebx
+        jne     .failed
+%endif ; RT_ARCH_X86
+
+        ; The upper part of a 'push fs' is cleared.
+        mov     eax, __LINE__
+        xor     ecx, ecx
+        not     xCX
+        push    xCX
+        pop     xCX
+        push    fs
+        pop     xCX
+        mov     bx, fs
+        and     ebx, 0000ffffh
+        cmp     xCX, xBX
+        jne     .failed
+
+        ; The upper part of a 'push gs' is cleared.
+        mov     eax, __LINE__
+        xor     ecx, ecx
+        not     xCX
+        push    xCX
+        pop     xCX
+        push    gs
+        pop     xCX
+        mov     bx, gs
+        and     ebx, 0000ffffh
+        cmp     xCX, xBX
+        jne     .failed
+
+%ifdef RT_ARCH_AMD64
+        ; REX.B works with 'push r64'.
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        push    rcx
+        pop     rdx
+        cmp     rdx, rcx
+        jne     .failed
+
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        db 041h                         ; REX.B
+        push    rcx
+        pop     rdx
+        cmp     rdx, r9
+        jne     .failed
+
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        db 042h                         ; REX.X
+        push    rcx
+        pop     rdx
+        cmp     rdx, rcx
+        jne     .failed
+
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        db 044h                         ; REX.R
+        push    rcx
+        pop     rdx
+        cmp     rdx, rcx
+        jne     .failed
+
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        db 048h                         ; REX.W
+        push    rcx
+        pop     rdx
+        cmp     rdx, rcx
+        jne     .failed
+
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        db 04fh                         ; REX.*
+        push    rcx
+        pop     rdx
+        cmp     rdx, r9
+        jne     .failed
+%endif
+
+        ;
+        ; Zero extening when moving from a segreg as well as memory access sizes.
+        ;
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        mov     ecx, ds
+        shr     xCX, 16
+        cmp     xCX, 0
+        jnz     .failed
+
+%ifdef RT_ARCH_AMD64
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        mov     rcx, ds
+        shr     rcx, 16
+        cmp     rcx, 0
+        jnz     .failed
+%endif
+
+        call    x861_LoadUniqueRegValues
+        mov     eax, __LINE__
+        mov     xDX, xCX
+        mov     cx, ds
+        shr     xCX, 16
+        shr     xDX, 16
+        cmp     xCX, xDX
+        jnz     .failed
+
+        ; Loading is always a word access.
+        mov     eax, __LINE__
+        mov     xDI, [NAME(g_pbEfPage)]
+        lea     xDI, [xDI + 0x1000 - 2]
+        mov     xDX, es
+        mov     [xDI], dx
+        mov     es, [xDI]               ; should not crash
+
+        ; Saving is always a word access.
+        mov     eax, __LINE__
+        mov     xDI, [NAME(g_pbEfPage)]
+        mov     dword [xDI + 0x1000 - 4], -1
+        mov     [xDI + 0x1000 - 2], ss ; Should not crash.
+        mov     bx, ss
+        mov     cx, [xDI + 0x1000 - 2]
+        cmp     cx, bx
+        jne     .failed
+
+%ifdef RT_ARCH_AMD64
+        ; Check that the rex.R and rex.W bits don't have any influence over a memory write.
+        call    x861_ClearRegisters
+        mov     eax, __LINE__
+        mov     xDI, [NAME(g_pbEfPage)]
+        mov     dword [xDI + 0x1000 - 4], -1
+        db 04ah
+        mov     [xDI + 0x1000 - 2], ss ; Should not crash.
+        mov     bx, ss
+        mov     cx, [xDI + 0x1000 - 2]
+        cmp     cx, bx
+        jne     .failed
+%endif
+
+
+        ;
+        ; Check what happens when both string prefixes are used.
+        ;
+        cld
+        mov     dx, ds
+        mov     es, dx
+
+        ; check that repne scasb (al=0) behaves like expected.
+        mov     xDI, g_szAlpha
+        xor     eax, eax                ; find the end
+        mov     ecx, g_cchAlpha + 1
+        repne scasb
+        cmp     ecx, 1
+        mov     eax, __LINE__
+        jne     .failed
+
+        ; check that repe scasb (al=0) behaves like expected.
+        mov     xDI, g_szAlpha
+        xor     eax, eax                ; find the end
+        mov     ecx, g_cchAlpha + 1
+        repe scasb
+        cmp     ecx, g_cchAlpha
+        mov     eax, __LINE__
+        jne     .failed
+
+        ; repne is last, it wins.
+        mov     xDI, g_szAlpha
+        xor     eax, eax                ; find the end
+        mov     ecx, g_cchAlpha + 1
+        db 0f3h                         ; repe  - ignored
+        db 0f2h                         ; repne
+        scasb
+        cmp     ecx, 1
+        mov     eax, __LINE__
+        jne     .failed
+
+        ; repe is last, it wins.
+        mov     xDI, g_szAlpha
+        xor     eax, eax                ; find the end
+        mov     ecx, g_cchAlpha + 1
+        db 0f2h                         ; repne - ignored
+        db 0f3h                         ; repe
+        scasb
+        cmp     ecx, g_cchAlpha
+        mov     eax, __LINE__
+        jne     .failed
+
+        ;
+        ; Check if stosb works with both prefixes.
+        ;
+        cld
+        mov     dx, ds
+        mov     es, dx
+        mov     xDI, [NAME(g_pbEfPage)]
+        xor     eax, eax
+        mov     ecx, 01000h
+        rep stosb
+
+        mov     xDI, [NAME(g_pbEfPage)]
+        mov     ecx, 4
+        mov     eax, 0ffh
+        db 0f2h                         ; repne
+        stosb
+        mov     eax, __LINE__
+        cmp     ecx, 0
+        jne     .failed
+        mov     eax, __LINE__
+        mov     xDI, [NAME(g_pbEfPage)]
+        cmp     dword [xDI], 0ffffffffh
+        jne     .failed
+        cmp     dword [xDI+4], 0
+        jne     .failed
+
+        mov     xDI, [NAME(g_pbEfPage)]
+        mov     ecx, 4
+        mov     eax, 0feh
+        db 0f3h                         ; repe
+        stosb
+        mov     eax, __LINE__
+        cmp     ecx, 0
+        jne     .failed
+        mov     eax, __LINE__
+        mov     xDI, [NAME(g_pbEfPage)]
+        cmp     dword [xDI], 0fefefefeh
+        jne     .failed
+        cmp     dword [xDI+4], 0
+        jne     .failed
+
+        ;
+        ; String operations shouldn't crash because of an invalid address if rCX is 0.
+        ;
+        mov     eax, __LINE__
+        cld
+        mov     dx, ds
+        mov     es, dx
+        mov     xDI, [NAME(g_pbEfPage)]
+        xor     xCX, xCX
+        rep stosb                       ; no trap
+
+        ;
+        ; INS/OUTS will trap in ring-3 even when rCX is 0. (ASSUMES IOPL < 3)
+        ;
+        mov     eax, __LINE__
+        cld
+        mov     dx, ss
+        mov     ss, dx
+        mov     xDI, xSP
+        xor     xCX, xCX
+        ShouldTrap X86_XCPT_GP, rep insb
+
+
+
+.success:
+        xor     eax, eax
+.return:
+%ifdef RT_ARCH_AMD64
+        pop     r15
+        pop     r14
+        pop     r13
+        pop     r12
+        pop     r11
+        pop     r10
+        pop     r9
+        pop     r8
+%endif
+        pop     xDI
+        pop     xSI
+        pop     xDX
+        pop     xCX
+        pop     xBX
+        popf
+        leave
+        ret
+
+.failed2:
+        mov     eax, -1
+.failed:
+        jmp     .return
+ENDPROC   x861_Test1
+
+
+;;
+; Terminate the trap info array with a NIL entry.
+BEGINDATA
+GLOBALNAME g_aTrapInfoEnd
+istruc TRAPINFO
+        at TRAPINFO.uTrapPC,    RTCCPTR_DEF     0
+        at TRAPINFO.uResumePC,  RTCCPTR_DEF     0
+        at TRAPINFO.u8TrapNo,   db              0
+        at TRAPINFO.cbInstr,    db              0
+iend
+
Index: /trunk/src/recompiler/Makefile.kmk
===================================================================
--- /trunk/src/recompiler/Makefile.kmk	(revision 36767)
+++ /trunk/src/recompiler/Makefile.kmk	(revision 36768)
@@ -65,4 +65,7 @@
 #VBoxRemPrimary_DEFS           += DEBUG_ALL_LOGGING DEBUG_DISAS DEBUG_PCALL DEBUG_EXEC DEBUG_FLUSH DEBUG_IOPORT DEBUG_SIGNAL DEBUG_TLB_CHECK DEBUG_TB_INVALIDATE DEBUG_TLB  # Enables huge amounts of debug logging.
 #VBoxRemPrimary_DEFS           += DEBUG_TMP_LOGGING # log qemu parts to "/tmp/vbox-qemu.log" - does not work with VBoxREM2.
+ifdef IEM_VERIFICATION_MODE
+ VBoxRemPrimary_DEFS          += IEM_VERIFICATION_MODE
+endif
 VBoxRemPrimary_DEFS.linux      = _GNU_SOURCE
 ifdef VBOX_SOLARIS_10
Index: /trunk/src/recompiler/cpu-exec.c
===================================================================
--- /trunk/src/recompiler/cpu-exec.c	(revision 36767)
+++ /trunk/src/recompiler/cpu-exec.c	(revision 36768)
@@ -358,4 +358,8 @@
                         /* Clear CPU_INTERRUPT_SINGLE_INSTR and leave CPU_INTERRUPT_SINGLE_INSTR_IN_FLIGHT set. */
                         ASMAtomicAndS32((int32_t volatile *)&env->interrupt_request, ~CPU_INTERRUPT_SINGLE_INSTR);
+#ifdef IEM_VERIFICATION_MODE
+                        env->exception_index = ret = EXCP_SINGLE_INSTR;
+                        cpu_loop_exit();
+#endif
                     }
 
