Index: /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp	(revision 37078)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp	(revision 37079)
@@ -45,5 +45,5 @@
 *   Header Files                                                               *
 *******************************************************************************/
-#define LOG_GROUP   LOG_GROUP_EM /** @todo add log group */
+#define LOG_GROUP   LOG_GROUP_IEM
 #include <VBox/vmm/iem.h>
 #include <VBox/vmm/pgm.h>
@@ -69,4 +69,44 @@
 *   Structures and Typedefs                                                    *
 *******************************************************************************/
+/**
+ * Generic pointer union.
+ * @todo move me to iprt/types.h
+ */
+typedef union RTPTRUNION
+{
+    /** Pointer into the void... */
+    void        *pv;
+    /** Pointer to a 8-bit unsigned value. */
+    uint8_t     *pu8;
+    /** Pointer to a 16-bit unsigned value. */
+    uint16_t    *pu16;
+    /** Pointer to a 32-bit unsigned value. */
+    uint32_t    *pu32;
+    /** Pointer to a 64-bit unsigned value. */
+    uint64_t    *pu64;
+} RTPTRUNION;
+/** Pointer to a pointer union. */
+typedef RTPTRUNION *PRTPTRUNION;
+
+/**
+ * Generic const pointer union.
+ * @todo move me to iprt/types.h
+ */
+typedef union RTCPTRUNION
+{
+    /** Pointer into the void... */
+    void const       *pv;
+    /** Pointer to a 8-bit unsigned value. */
+    uint8_t const     *pu8;
+    /** Pointer to a 16-bit unsigned value. */
+    uint16_t const    *pu16;
+    /** Pointer to a 32-bit unsigned value. */
+    uint32_t const    *pu32;
+    /** Pointer to a 64-bit unsigned value. */
+    uint64_t const    *pu64;
+} RTCPTRUNION;
+/** Pointer to a const pointer union. */
+typedef RTCPTRUNION *PRTCPTRUNION;
+
 /** @typedef PFNIEMOP
  * Pointer to an opcode decoder function.
@@ -139,4 +179,13 @@
 *   Defined Constants And Macros                                               *
 *******************************************************************************/
+/** @name IEM status codes.
+ *
+ * Not quite sure how this will play out in the end, just aliasing safe status
+ * codes for now.
+ *
+ * @{ */
+#define VINF_IEM_RAISED_XCPT    VINF_EM_RESCHEDULE
+/** @} */
+
 /** Temporary hack to disable the double execution.  Will be removed in favor
  * of a dedicated execution mode in EM. */
@@ -545,6 +594,9 @@
 static VBOXSTRICTRC     iemRaiseGeneralProtectionFaultBySelector(PIEMCPU pIemCpu, RTSEL uSel);
 static VBOXSTRICTRC     iemRaiseSelectorBounds(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess);
+static VBOXSTRICTRC     iemRaiseSelectorBoundsBySelector(PIEMCPU pIemCpu, RTSEL Sel);
 static VBOXSTRICTRC     iemRaiseSelectorInvalidAccess(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess);
 static VBOXSTRICTRC     iemRaisePageFault(PIEMCPU pIemCpu, RTGCPTR GCPtrWhere, uint32_t fAccess, int rc);
+static VBOXSTRICTRC     iemMemMap(PIEMCPU pIemCpu, void **ppvMem, size_t cbMem, uint8_t iSegReg, RTGCPTR GCPtrMem, uint32_t fAccess);
+static VBOXSTRICTRC     iemMemCommitAndUnmap(PIEMCPU pIemCpu, void *pvMem, uint32_t fAccess);
 static VBOXSTRICTRC     iemMemFetchDataU32(PIEMCPU pIemCpu, uint32_t *pu32Dst, uint8_t iSegReg, RTGCPTR GCPtrMem);
 static VBOXSTRICTRC     iemMemFetchDataU64(PIEMCPU pIemCpu, uint64_t *pu64Dst, uint8_t iSegReg, RTGCPTR GCPtrMem);
@@ -552,4 +604,5 @@
 static VBOXSTRICTRC     iemMemStackPushCommitSpecial(PIEMCPU pIemCpu, void *pvMem, uint64_t uNewRsp);
 static VBOXSTRICTRC     iemMemStackPushBeginSpecial(PIEMCPU pIemCpu, size_t cbMem, void **ppvMem, uint64_t *puNewRsp);
+static VBOXSTRICTRC     iemMemMarkSelDescAccessed(PIEMCPU pIemCpu, uint16_t uSel);
 
 #ifdef IEM_VERIFICATION_MODE
@@ -1469,5 +1522,5 @@
         iemRaiseXcptAdjustState(pCtx, u8Vector);
 
-    return VINF_SUCCESS;
+    return fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT ? VINF_IEM_RAISED_XCPT : VINF_SUCCESS;
 }
 
@@ -1495,7 +1548,4 @@
                             uint64_t    uCr2)
 {
-    Log(("iemRaiseXcptOrIntInProtMode: %#x at %04x:%08RGv cbInstr=%#x fFlags=%#x uErr=%#x uCr2=%llx\n",
-         u8Vector, pCtx->cs, pCtx->rip, cbInstr, fFlags, uErr, uCr2));
-
     /*
      * Read the IDT entry.
@@ -1619,8 +1669,8 @@
                            ? Idte.Gate.u16OffsetLow
                            : Idte.Gate.u16OffsetLow | ((uint32_t)Idte.Gate.u16OffsetHigh << 16);
-    uint32_t cbLimit = X86DESC_LIMIT(DescCS.Legacy);
+    uint32_t cbLimitCS = X86DESC_LIMIT(DescCS.Legacy);
     if (DescCS.Legacy.Gen.u1Granularity)
-        cbLimit = (cbLimit << PAGE_SHIFT) | PAGE_OFFSET_MASK;
-    if (uNewEip > X86DESC_LIMIT(DescCS.Legacy))
+        cbLimitCS = (cbLimitCS << PAGE_SHIFT) | PAGE_OFFSET_MASK;
+    if (uNewEip > cbLimitCS)
     {
         Log(("RaiseXcptOrIntInProtMode %#x - CS=%#x - DPL (%d) > CPL (%d) -> #GP\n",
@@ -1642,15 +1692,12 @@
     uint8_t const   uNewCpl = DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_CONF
                             ? pIemCpu->uCpl : DescCS.Legacy.Gen.u2Dpl;
-    uint32_t        uNewEsp;
-    RTSEL           NewSS;
-    uint32_t        fNewSSAttr;
-    uint32_t        cbNewSSLimit;
-    uint64_t        uNewSSBase;
-
     if (uNewCpl != pIemCpu->uCpl)
     {
+        RTSEL    NewSS;
+        uint32_t uNewEsp;
         rcStrict = iemRaiseLoadStackFromTss32Or16(pIemCpu, pCtx, uNewCpl, &NewSS, &uNewEsp);
         if (rcStrict != VINF_SUCCESS)
             return rcStrict;
+
         IEMSELDESC DescSS;
         rcStrict = iemMiscValidateNewSS(pIemCpu, pCtx, NewSS, uNewCpl, &DescSS);
@@ -1658,33 +1705,119 @@
             return rcStrict;
 
-        fNewSSAttr   = X86DESC_GET_HID_ATTR(DescSS.Legacy);
-        cbNewSSLimit = X86DESC_LIMIT(DescSS.Legacy);
+        /* Check that there is sufficient space for the stack frame. */
+        uint32_t cbLimitSS = X86DESC_LIMIT(DescSS.Legacy);
         if (DescSS.Legacy.Gen.u1Granularity)
-            cbNewSSLimit = (cbNewSSLimit << PAGE_SHIFT) | PAGE_OFFSET_MASK;
-        uNewSSBase   = X86DESC_BASE(DescSS.Legacy);
-    }
+            cbLimitSS = (cbLimitSS << PAGE_SHIFT) | PAGE_OFFSET_MASK;
+        AssertReturn(!(DescSS.Legacy.Gen.u4Type & X86_SEL_TYPE_DOWN), VERR_NOT_IMPLEMENTED);
+
+        uint8_t const cbStackFrame = fFlags & IEM_XCPT_FLAGS_ERR ? 24 : 20;
+        if (   uNewEsp - 1 > cbLimitSS
+            || uNewEsp < cbStackFrame)
+        {
+            Log(("RaiseXcptOrIntInProtMode: %#x - SS=%#x ESP=%#x cbStackFrame=%#x is out of bounds -> #GP\n",
+                 u8Vector, NewSS, uNewEsp, cbStackFrame));
+            return iemRaiseSelectorBoundsBySelector(pIemCpu, NewSS);
+        }
+
+        /*
+         * Start making changes.
+         */
+
+        /* Create the stack frame. */
+        RTPTRUNION uStackFrame;
+        rcStrict = iemMemMap(pIemCpu, &uStackFrame.pv, cbStackFrame, UINT8_MAX,
+                             uNewEsp - cbStackFrame + X86DESC_BASE(DescSS.Legacy), IEM_ACCESS_STACK_W);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        void * const pvStackFrame = uStackFrame.pv;
+
+        if (fFlags & IEM_XCPT_FLAGS_ERR)
+            *uStackFrame.pu32++ = uErr;
+        uStackFrame.pu32[0] = pCtx->eip;
+        uStackFrame.pu32[1] = (pCtx->cs & ~X86_SEL_RPL) | pIemCpu->uCpl;
+        uStackFrame.pu32[2] = pCtx->eflags.u;
+        uStackFrame.pu32[3] = pCtx->esp;
+        uStackFrame.pu32[4] = pCtx->ss;
+        rcStrict = iemMemCommitAndUnmap(pIemCpu, pvStackFrame, IEM_ACCESS_STACK_W);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+
+        /* Mark the selectors 'accessed' (hope this is the correct time). */
+        /** @todo testcase: excatly _when_ are the accessed bits set - before or
+         *        after pushing the stack frame? (Write protect the gdt + stack to
+         *        find out.) */
+        if (!(DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_ACCESSED))
+        {
+            rcStrict = iemMemMarkSelDescAccessed(pIemCpu, NewCS);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+            DescCS.Legacy.Gen.u4Type |= X86_SEL_TYPE_ACCESSED;
+        }
+
+        if (!(DescSS.Legacy.Gen.u4Type & X86_SEL_TYPE_ACCESSED))
+        {
+            rcStrict = iemMemMarkSelDescAccessed(pIemCpu, NewSS);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+            DescSS.Legacy.Gen.u4Type |= X86_SEL_TYPE_ACCESSED;
+        }
+
+        /*
+         * Start commint the register changes (joins with the DPL=CPL branch).
+         */
+        pCtx->ss                = NewSS;
+        pCtx->ssHid.u32Limit    = cbLimitSS;
+        pCtx->ssHid.u64Base     = X86DESC_BASE(DescSS.Legacy);
+        pCtx->ssHid.Attr.u      = X86DESC_GET_HID_ATTR(DescSS.Legacy);
+        pCtx->rsp               = uNewEsp - cbStackFrame; /** @todo Is the high word cleared for 16-bit stacks and/or interrupt handlers? */
+        pIemCpu->uCpl           = uNewCpl;
+    }
+    /*
+     * Same privilege, no stack change and smaller stack frame.
+     */
     else
     {
-        uNewEsp      = pCtx->esp;
-        NewSS        = pCtx->ss;
-        fNewSSAttr   = pCtx->ssHid.Attr.u;
-        cbNewSSLimit = pCtx->ssHid.u32Limit;
-        uNewSSBase   = pCtx->ssHid.u64Base;
-    }
-
-    /*
-     * Check if we have the space for the stack frame.
-     */
-
-
-    /*
-     * Set the CS and maybe SS accessed bits.
-     */
-    /** @todo testcase: excatly when is the accessed bit set, before or after
-     *        pushing the stack frame. (write protect the gdt + stack to find
-     *        out). */
-
-
-    return VERR_NOT_IMPLEMENTED;
+        uint64_t        uNewRsp;
+        RTPTRUNION      uStackFrame;
+        uint8_t const   cbStackFrame = fFlags & IEM_XCPT_FLAGS_ERR ? 16 : 12;
+        rcStrict = iemMemStackPushBeginSpecial(pIemCpu, cbStackFrame, &uStackFrame.pv, &uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        void * const pvStackFrame = uStackFrame.pv;
+
+        if (fFlags & IEM_XCPT_FLAGS_ERR)
+            *uStackFrame.pu32++ = uErr;
+        uStackFrame.pu32[0] = pCtx->eip;
+        uStackFrame.pu32[1] = (pCtx->cs & ~X86_SEL_RPL) | pIemCpu->uCpl;
+        uStackFrame.pu32[2] = pCtx->eflags.u;
+        rcStrict = iemMemCommitAndUnmap(pIemCpu, pvStackFrame, IEM_ACCESS_STACK_W); /* don't use the commit here */
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+
+        /* Mark the CS selector as 'accessed'. */
+        if (!(DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_ACCESSED))
+        {
+            rcStrict = iemMemMarkSelDescAccessed(pIemCpu, NewCS);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+            DescCS.Legacy.Gen.u4Type |= X86_SEL_TYPE_ACCESSED;
+        }
+
+        /*
+         * Start committing the register changes (joins with the other branch).
+         */
+        pCtx->rsp = uNewRsp;
+    }
+
+    /* ... register commiting continues. */
+    pCtx->cs                = (NewCS & ~X86_SEL_RPL) | uNewCpl;
+    pCtx->csHid.u32Limit    = cbLimitCS;
+    pCtx->csHid.u64Base     = X86DESC_BASE(DescCS.Legacy);
+    pCtx->csHid.Attr.u      = X86DESC_GET_HID_ATTR(DescCS.Legacy);
+
+    pCtx->rip               = uNewEip;
+    pCtx->rflags.u         &= ~fEflToClear;
+
+    return fFlags & IEM_XCPT_FLAGS_T_CPU_XCPT ? VINF_IEM_RAISED_XCPT : VINF_SUCCESS;
 }
 
@@ -1766,10 +1899,18 @@
                   uint64_t    uCr2)
 {
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
     /*
      * Do recursion accounting.
      */
-    uint8_t uPrevXcpt = pIemCpu->uCurXcpt;
-    if (pIemCpu->cXcptRecursions > 0)
-    {
+    uint8_t const uPrevXcpt = pIemCpu->uCurXcpt;
+    if (pIemCpu->cXcptRecursions == 0)
+        Log(("iemRaiseXcptOrInt: %#x at %04x:%RGv cbInstr=%#x fFlags=%#x uErr=%#x uCr2=%llx\n",
+             u8Vector, pCtx->cs, pCtx->rip, cbInstr, fFlags, uErr, uCr2));
+    else
+    {
+        Log(("iemRaiseXcptOrInt: %#x at %04x:%RGv cbInstr=%#x fFlags=%#x uErr=%#x uCr2=%llx; prev=%#x depth=%d\n",
+             u8Vector, pCtx->cs, pCtx->rip, cbInstr, fFlags, uErr, uCr2, pIemCpu->uCurXcpt, pIemCpu->cXcptRecursions + 1));
+
         /** @todo double and tripple faults. */
         AssertReturn(pIemCpu->cXcptRecursions < 3, VERR_NOT_IMPLEMENTED);
@@ -1779,8 +1920,7 @@
 
     /*
-     * Call mode specific worker function.
+     * Call the mode specific worker function.
      */
     VBOXSTRICTRC    rcStrict;
-    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
     if (!(pCtx->cr0 & X86_CR0_PE))
         rcStrict = iemRaiseXcptOrIntInRealMode( pIemCpu, pCtx, cbInstr, u8Vector, fFlags, uErr, uCr2);
@@ -1797,4 +1937,6 @@
     pIemCpu->cXcptRecursions--;
     pIemCpu->uCurXcpt = uPrevXcpt;
+    Log(("iemRaiseXcptOrInt: returns %Rrc (vec=%#x); cs:rip=%04x:%RGv ss:rsp=%04x:%RGv\n",
+         VBOXSTRICTRC_VAL(rcStrict), u8Vector, pCtx->cs, pCtx->rip, pCtx->ss, pCtx->esp));
     return rcStrict;
 }
@@ -1904,4 +2046,12 @@
 /** \#GP(sel) - 0d.  */
 static VBOXSTRICTRC iemRaiseSelectorBounds(PIEMCPU pIemCpu, uint32_t iSegReg, uint32_t fAccess)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+/** \#GP(sel) - 0d.  */
+static VBOXSTRICTRC iemRaiseSelectorBoundsBySelector(PIEMCPU pIemCpu, RTSEL Sel)
 {
     AssertFailed(/** @todo implement this */);
@@ -3061,7 +3211,9 @@
     AssertReturn(pIemCpu->cActiveMappings < RT_ELEMENTS(pIemCpu->aMemMappings), 1024);
 
-    AssertFailed(); /** @todo implement me. */
-    return 1024;
-
+    for (unsigned i = 0; i < RT_ELEMENTS(pIemCpu->aMemMappings); i++)
+        if (pIemCpu->aMemMappings[i].fAccess == IEM_ACCESS_INVALID)
+            return i;
+
+    AssertFailedReturn(1024);
 }
 
@@ -4781,8 +4933,8 @@
 #ifdef DEBUG
 # define IEMOP_MNEMONIC(a_szMnemonic) \
-    Log2(("decode - %04x:%08RGv %s%s\n", pIemCpu->CTX_SUFF(pCtx)->cs, pIemCpu->CTX_SUFF(pCtx)->rip, \
+    Log2(("decode - %04x:%RGv %s%s\n", pIemCpu->CTX_SUFF(pCtx)->cs, pIemCpu->CTX_SUFF(pCtx)->rip, \
           pIemCpu->fPrefixes & IEM_OP_PRF_LOCK ? "lock " : "", a_szMnemonic))
 # define IEMOP_MNEMONIC2(a_szMnemonic, a_szOps) \
-    Log2(("decode - %04x:%08RGv %s%s %s\n", pIemCpu->CTX_SUFF(pCtx)->cs, pIemCpu->CTX_SUFF(pCtx)->rip, \
+    Log2(("decode - %04x:%RGv %s%s %s\n", pIemCpu->CTX_SUFF(pCtx)->cs, pIemCpu->CTX_SUFF(pCtx)->rip, \
           pIemCpu->fPrefixes & IEM_OP_PRF_LOCK ? "lock " : "", a_szMnemonic, a_szOps))
 #else
@@ -5142,6 +5294,7 @@
 static void iemExecVerificationModeSetup(PIEMCPU pIemCpu)
 {
+    PVMCPU   pVCpu   = IEMCPU_TO_VMCPU(pIemCpu);
     PCPUMCTX pOrgCtx = pIemCpu->CTX_SUFF(pCtx);
-    pIemCpu->fNoRem = !LogIsEnabled(); /* logging triggers the no-rem/rem verification stuff */
+    pIemCpu->fNoRem  = !LogIsEnabled(); /* logging triggers the no-rem/rem verification stuff */
 
 #if 0
@@ -5159,4 +5312,15 @@
     }
 #endif
+#if 0 /* auto enable on first paged protected mode interrupt */
+    if (   pIemCpu->fNoRem
+        && pOrgCtx->eflags.Bits.u1IF
+        && (pOrgCtx->cr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG)
+        && TRPMHasTrap(pVCpu)
+        && EMGetInhibitInterruptsPC(pVCpu) != pOrgCtx->rip)
+    {
+        RTLogFlags(NULL, "enabled");
+        pIemCpu->fNoRem = false;
+    }
+#endif
 
     /*
@@ -5174,5 +5338,4 @@
      * See if there is an interrupt pending in TRPM and inject it if we can.
      */
-    PVMCPU pVCpu = IEMCPU_TO_VMCPU(pIemCpu);
     if (   pOrgCtx->eflags.Bits.u1IF
         && TRPMHasTrap(pVCpu)
@@ -5635,49 +5798,44 @@
         if (memcmp(&pOrgCtx->fpu, &pDebugCtx->fpu, sizeof(pDebugCtx->fpu)))
         {
-            if (pIemCpu->cInstructions != 1)
-            {
-                RTAssertMsg2Weak("  the FPU state differs\n");
-                cDiffs++;
-                CHECK_FIELD(fpu.FCW);
-                CHECK_FIELD(fpu.FSW);
-                CHECK_FIELD(fpu.FTW);
-                CHECK_FIELD(fpu.FOP);
-                CHECK_FIELD(fpu.FPUIP);
-                CHECK_FIELD(fpu.CS);
-                CHECK_FIELD(fpu.Rsrvd1);
-                CHECK_FIELD(fpu.FPUDP);
-                CHECK_FIELD(fpu.DS);
-                CHECK_FIELD(fpu.Rsrvd2);
-                CHECK_FIELD(fpu.MXCSR);
-                CHECK_FIELD(fpu.MXCSR_MASK);
-                CHECK_FIELD(fpu.aRegs[0].au64[0]); CHECK_FIELD(fpu.aRegs[0].au64[1]);
-                CHECK_FIELD(fpu.aRegs[1].au64[0]); CHECK_FIELD(fpu.aRegs[1].au64[1]);
-                CHECK_FIELD(fpu.aRegs[2].au64[0]); CHECK_FIELD(fpu.aRegs[2].au64[1]);
-                CHECK_FIELD(fpu.aRegs[3].au64[0]); CHECK_FIELD(fpu.aRegs[3].au64[1]);
-                CHECK_FIELD(fpu.aRegs[4].au64[0]); CHECK_FIELD(fpu.aRegs[4].au64[1]);
-                CHECK_FIELD(fpu.aRegs[5].au64[0]); CHECK_FIELD(fpu.aRegs[5].au64[1]);
-                CHECK_FIELD(fpu.aRegs[6].au64[0]); CHECK_FIELD(fpu.aRegs[6].au64[1]);
-                CHECK_FIELD(fpu.aRegs[7].au64[0]); CHECK_FIELD(fpu.aRegs[7].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 0].au64[0]);  CHECK_FIELD(fpu.aXMM[ 0].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 1].au64[0]);  CHECK_FIELD(fpu.aXMM[ 1].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 2].au64[0]);  CHECK_FIELD(fpu.aXMM[ 2].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 3].au64[0]);  CHECK_FIELD(fpu.aXMM[ 3].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 4].au64[0]);  CHECK_FIELD(fpu.aXMM[ 4].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 5].au64[0]);  CHECK_FIELD(fpu.aXMM[ 5].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 6].au64[0]);  CHECK_FIELD(fpu.aXMM[ 6].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 7].au64[0]);  CHECK_FIELD(fpu.aXMM[ 7].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 8].au64[0]);  CHECK_FIELD(fpu.aXMM[ 8].au64[1]);
-                CHECK_FIELD(fpu.aXMM[ 9].au64[0]);  CHECK_FIELD(fpu.aXMM[ 9].au64[1]);
-                CHECK_FIELD(fpu.aXMM[10].au64[0]);  CHECK_FIELD(fpu.aXMM[10].au64[1]);
-                CHECK_FIELD(fpu.aXMM[11].au64[0]);  CHECK_FIELD(fpu.aXMM[11].au64[1]);
-                CHECK_FIELD(fpu.aXMM[12].au64[0]);  CHECK_FIELD(fpu.aXMM[12].au64[1]);
-                CHECK_FIELD(fpu.aXMM[13].au64[0]);  CHECK_FIELD(fpu.aXMM[13].au64[1]);
-                CHECK_FIELD(fpu.aXMM[14].au64[0]);  CHECK_FIELD(fpu.aXMM[14].au64[1]);
-                CHECK_FIELD(fpu.aXMM[15].au64[0]);  CHECK_FIELD(fpu.aXMM[15].au64[1]);
-                for (unsigned i = 0; i < RT_ELEMENTS(pOrgCtx->fpu.au32RsrvdRest); i++)
-                    CHECK_FIELD(fpu.au32RsrvdRest[i]);
-            }
-            else
-                RTAssertMsg2Weak("  the FPU state differs - happens the first time...\n");
+            RTAssertMsg2Weak("  the FPU state differs\n");
+            cDiffs++;
+            CHECK_FIELD(fpu.FCW);
+            CHECK_FIELD(fpu.FSW);
+            CHECK_FIELD(fpu.FTW);
+            CHECK_FIELD(fpu.FOP);
+            CHECK_FIELD(fpu.FPUIP);
+            CHECK_FIELD(fpu.CS);
+            CHECK_FIELD(fpu.Rsrvd1);
+            CHECK_FIELD(fpu.FPUDP);
+            CHECK_FIELD(fpu.DS);
+            CHECK_FIELD(fpu.Rsrvd2);
+            CHECK_FIELD(fpu.MXCSR);
+            CHECK_FIELD(fpu.MXCSR_MASK);
+            CHECK_FIELD(fpu.aRegs[0].au64[0]); CHECK_FIELD(fpu.aRegs[0].au64[1]);
+            CHECK_FIELD(fpu.aRegs[1].au64[0]); CHECK_FIELD(fpu.aRegs[1].au64[1]);
+            CHECK_FIELD(fpu.aRegs[2].au64[0]); CHECK_FIELD(fpu.aRegs[2].au64[1]);
+            CHECK_FIELD(fpu.aRegs[3].au64[0]); CHECK_FIELD(fpu.aRegs[3].au64[1]);
+            CHECK_FIELD(fpu.aRegs[4].au64[0]); CHECK_FIELD(fpu.aRegs[4].au64[1]);
+            CHECK_FIELD(fpu.aRegs[5].au64[0]); CHECK_FIELD(fpu.aRegs[5].au64[1]);
+            CHECK_FIELD(fpu.aRegs[6].au64[0]); CHECK_FIELD(fpu.aRegs[6].au64[1]);
+            CHECK_FIELD(fpu.aRegs[7].au64[0]); CHECK_FIELD(fpu.aRegs[7].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 0].au64[0]);  CHECK_FIELD(fpu.aXMM[ 0].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 1].au64[0]);  CHECK_FIELD(fpu.aXMM[ 1].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 2].au64[0]);  CHECK_FIELD(fpu.aXMM[ 2].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 3].au64[0]);  CHECK_FIELD(fpu.aXMM[ 3].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 4].au64[0]);  CHECK_FIELD(fpu.aXMM[ 4].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 5].au64[0]);  CHECK_FIELD(fpu.aXMM[ 5].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 6].au64[0]);  CHECK_FIELD(fpu.aXMM[ 6].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 7].au64[0]);  CHECK_FIELD(fpu.aXMM[ 7].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 8].au64[0]);  CHECK_FIELD(fpu.aXMM[ 8].au64[1]);
+            CHECK_FIELD(fpu.aXMM[ 9].au64[0]);  CHECK_FIELD(fpu.aXMM[ 9].au64[1]);
+            CHECK_FIELD(fpu.aXMM[10].au64[0]);  CHECK_FIELD(fpu.aXMM[10].au64[1]);
+            CHECK_FIELD(fpu.aXMM[11].au64[0]);  CHECK_FIELD(fpu.aXMM[11].au64[1]);
+            CHECK_FIELD(fpu.aXMM[12].au64[0]);  CHECK_FIELD(fpu.aXMM[12].au64[1]);
+            CHECK_FIELD(fpu.aXMM[13].au64[0]);  CHECK_FIELD(fpu.aXMM[13].au64[1]);
+            CHECK_FIELD(fpu.aXMM[14].au64[0]);  CHECK_FIELD(fpu.aXMM[14].au64[1]);
+            CHECK_FIELD(fpu.aXMM[15].au64[0]);  CHECK_FIELD(fpu.aXMM[15].au64[1]);
+            for (unsigned i = 0; i < RT_ELEMENTS(pOrgCtx->fpu.au32RsrvdRest); i++)
+                CHECK_FIELD(fpu.au32RsrvdRest[i]);
         }
         CHECK_FIELD(rip);
@@ -5709,8 +5867,9 @@
         }
 
-        if (pIemCpu->cIOReads != 1)
+        if (pIemCpu->cIOReads != 1 && !pIemCpu->fIgnoreRaxRdx)
             CHECK_FIELD(rax);
         CHECK_FIELD(rcx);
-        CHECK_FIELD(rdx);
+        if (!pIemCpu->fIgnoreRaxRdx)
+            CHECK_FIELD(rdx);
         CHECK_FIELD(rbx);
         CHECK_FIELD(rsp);
Index: /trunk/src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h	(revision 37078)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAllCImpl.cpp.h	(revision 37079)
@@ -955,13 +955,13 @@
             return rcStrict;
         uint32_t uNewEip;
-        uint16_t uNewCs;
+        uint16_t uNewCS;
         if (enmEffOpSize == IEMMODE_32BIT)
         {
-            uNewCs  = pu16Frame[2];
+            uNewCS  = pu16Frame[2];
             uNewEip = RT_MAKE_U32(pu16Frame[0], pu16Frame[1]);
         }
         else
         {
-            uNewCs  = pu16Frame[1];
+            uNewCS  = pu16Frame[1];
             uNewEip = pu16Frame[0];
         }
@@ -979,6 +979,6 @@
             return rcStrict;
         pCtx->rip           = uNewEip;
-        pCtx->cs            = uNewCs;
-        pCtx->csHid.u64Base = (uint32_t)uNewCs << 4;
+        pCtx->cs            = uNewCS;
+        pCtx->csHid.u64Base = (uint32_t)uNewCS << 4;
         /** @todo do we load attribs and limit as well? */
         if (cbPop)
@@ -1134,37 +1134,143 @@
 
 /**
- * Implements iret.
+ * Implements iret for real mode and V8086 mode.
  *
  * @param   enmEffOpSize    The effective operand size.
  */
-IEM_CIMPL_DEF_1(iemCImpl_iret, IEMMODE, enmEffOpSize)
-{
-    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+IEM_CIMPL_DEF_1(iemCImpl_iret_real_v8086, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * iret throws an exception if VME isn't enabled.
+     */
+    if (   pCtx->eflags.Bits.u1VM
+        && !(pCtx->cr4 & X86_CR4_VME))
+        return iemRaiseGeneralProtectionFault0(pIemCpu);
+
+    /*
+     * Do the stack bits, but don't commit RSP before everything checks
+     * out right.
+     */
+    Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
     VBOXSTRICTRC    rcStrict;
+    RTCPTRUNION     uFrame;
+    uint16_t        uNewCS;
+    uint32_t        uNewEip;
+    uint32_t        uNewFlags;
     uint64_t        uNewRsp;
-
-    /*
-     * Real mode is easy, V8086 mode is relative similar.
-     */
-    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
-        && IEM_IS_REAL_OR_V86_MODE(pIemCpu))
-    {
-        /* iret throws an exception if VME isn't enabled.  */
-        if (   pCtx->eflags.Bits.u1VM
-            && !(pCtx->cr4 & X86_CR4_VME))
+    if (enmEffOpSize == IEMMODE_32BIT)
+    {
+        rcStrict = iemMemStackPopBeginSpecial(pIemCpu, 12, &uFrame.pv, &uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        uNewEip    = uFrame.pu32[0];
+        uNewCS     = (uint16_t)uFrame.pu32[1];
+        uNewFlags  = uFrame.pu32[2];
+        uNewFlags &= X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF
+                   | X86_EFL_TF | X86_EFL_IF | X86_EFL_DF | X86_EFL_OF | X86_EFL_IOPL | X86_EFL_NT
+                   | X86_EFL_RF /*| X86_EFL_VM*/ | X86_EFL_AC /*|X86_EFL_VIF*/ /*|X86_EFL_VIP*/
+                   | X86_EFL_ID;
+        uNewFlags |= pCtx->eflags.u & (X86_EFL_VM | X86_EFL_VIF | X86_EFL_VIP | X86_EFL_1);
+    }
+    else
+    {
+        rcStrict = iemMemStackPopBeginSpecial(pIemCpu, 6, &uFrame.pv, &uNewRsp);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+        uNewEip    = uFrame.pu16[0];
+        uNewCS     = uFrame.pu16[1];
+        uNewFlags  = uFrame.pu16[2];
+        uNewFlags &= X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF
+                   | X86_EFL_TF | X86_EFL_IF | X86_EFL_DF | X86_EFL_OF | X86_EFL_IOPL | X86_EFL_NT;
+        uNewFlags |= pCtx->eflags.u & (UINT32_C(0xffff0000) | X86_EFL_1);
+        /** @todo The intel pseudo code does not indicate what happens to
+         *        reserved flags. We just ignore them. */
+    }
+    /** @todo Check how this is supposed to work if sp=0xfffe. */
+
+    /*
+     * Check the limit of the new EIP.
+     */
+    /** @todo Only the AMD pseudo code check the limit here, what's
+     *        right? */
+    if (uNewEip > pCtx->csHid.u32Limit)
+        return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
+
+    /*
+     * V8086 checks and flag adjustments
+     */
+    if (pCtx->eflags.Bits.u1VM)
+    {
+        if (pCtx->eflags.Bits.u2IOPL == 3)
+        {
+            /* Preserve IOPL and clear RF. */
+            uNewFlags &=                 ~(X86_EFL_IOPL | X86_EFL_RF);
+            uNewFlags |= pCtx->eflags.u & (X86_EFL_IOPL);
+        }
+        else if (   enmEffOpSize == IEMMODE_16BIT
+                 && (   !(uNewFlags & X86_EFL_IF)
+                     || !pCtx->eflags.Bits.u1VIP )
+                 && !(uNewFlags & X86_EFL_TF)   )
+        {
+            /* Move IF to VIF, clear RF and preserve IF and IOPL.*/
+            uNewFlags &= ~X86_EFL_VIF;
+            uNewFlags |= (uNewFlags & X86_EFL_IF) << (19 - 9);
+            uNewFlags &=                 ~(X86_EFL_IF | X86_EFL_IOPL | X86_EFL_RF);
+            uNewFlags |= pCtx->eflags.u & (X86_EFL_IF | X86_EFL_IOPL);
+        }
+        else
             return iemRaiseGeneralProtectionFault0(pIemCpu);
-
-        /* Do the stack bits, but don't commit RSP before everything checks
-           out right. */
-        union
-        {
-            uint32_t const *pu32;
-            uint16_t const *pu16;
-            void const     *pv;
-        } uFrame;
+    }
+
+    /*
+     * Commit the operation.
+     */
+    rcStrict = iemMemStackPopCommitSpecial(pIemCpu, uFrame.pv, uNewRsp);
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+    pCtx->rip           = uNewEip;
+    pCtx->cs            = uNewCS;
+    pCtx->csHid.u64Base = (uint32_t)uNewCS << 4;
+    /** @todo do we load attribs and limit as well? */
+    Assert(uNewFlags & X86_EFL_1);
+    pCtx->eflags.u      = uNewFlags;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements iret for protected mode
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_iret_prot, IEMMODE, enmEffOpSize)
+{
+    PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /*
+     * Nested task return.
+     */
+    if (pCtx->eflags.Bits.u1NT)
+    {
+        AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+    }
+    /*
+     * Normal return.
+     */
+    else
+    {
+        /*
+         * Do the stack bits, but don't commit RSP before everything checks
+         * out right.
+         */
         Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
-        uint16_t uNewCs;
-        uint32_t uNewEip;
-        uint32_t uNewFlags;
+        VBOXSTRICTRC    rcStrict;
+        RTCPTRUNION     uFrame;
+        uint16_t        uNewCS;
+        uint32_t        uNewEip;
+        uint32_t        uNewFlags;
+        uint64_t        uNewRsp;
         if (enmEffOpSize == IEMMODE_32BIT)
         {
@@ -1173,11 +1279,6 @@
                 return rcStrict;
             uNewEip    = uFrame.pu32[0];
-            uNewCs     = (uint16_t)uFrame.pu32[1];
+            uNewCS     = (uint16_t)uFrame.pu32[1];
             uNewFlags  = uFrame.pu32[2];
-            uNewFlags &= X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF
-                       | X86_EFL_TF | X86_EFL_IF | X86_EFL_DF | X86_EFL_OF | X86_EFL_IOPL | X86_EFL_NT
-                       | X86_EFL_RF /*| X86_EFL_VM*/ | X86_EFL_AC /*|X86_EFL_VIF*/ /*|X86_EFL_VIP*/
-                       | X86_EFL_ID;
-            uNewFlags |= pCtx->eflags.u & (X86_EFL_VM | X86_EFL_VIF | X86_EFL_VIP | X86_EFL_1);
         }
         else
@@ -1187,61 +1288,161 @@
                 return rcStrict;
             uNewEip    = uFrame.pu16[0];
-            uNewCs     = uFrame.pu16[1];
+            uNewCS     = uFrame.pu16[1];
             uNewFlags  = uFrame.pu16[2];
-            uNewFlags &= X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF
-                       | X86_EFL_TF | X86_EFL_IF | X86_EFL_DF | X86_EFL_OF | X86_EFL_IOPL | X86_EFL_NT;
-            uNewFlags |= pCtx->eflags.u & (UINT16_C(0xffff0000) | X86_EFL_1);
-            /** @todo The intel pseudo code does not indicate what happens to
-             *        reserved flags. We just ignore them. */
-        }
-        /** @todo Check how this is supposed to work if sp=0xfffe. */
-
-        /* Check the limit of the new EIP. */
-        /** @todo Only the AMD pseudo code check the limit here, what's
-         *        right? */
-        if (uNewEip > pCtx->csHid.u32Limit)
-            return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
-
-        /* V8086 checks and flag adjustments */
-        if (pCtx->eflags.Bits.u1VM)
-        {
-            if (pCtx->eflags.Bits.u2IOPL == 3)
-            {
-                /* Preserve IOPL and clear RF. */
-                uNewFlags &=                 ~(X86_EFL_IOPL | X86_EFL_RF);
-                uNewFlags |= pCtx->eflags.u & (X86_EFL_IOPL);
-            }
-            else if (   enmEffOpSize == IEMMODE_16BIT
-                     && (   !(uNewFlags & X86_EFL_IF)
-                         || !pCtx->eflags.Bits.u1VIP )
-                     && !(uNewFlags & X86_EFL_TF)   )
-            {
-                /* Move IF to VIF, clear RF and preserve IF and IOPL.*/
-                uNewFlags &= ~X86_EFL_VIF;
-                uNewFlags |= (uNewFlags & X86_EFL_IF) << (19 - 9);
-                uNewFlags &=                 ~(X86_EFL_IF | X86_EFL_IOPL | X86_EFL_RF);
-                uNewFlags |= pCtx->eflags.u & (X86_EFL_IF | X86_EFL_IOPL);
-            }
-            else
-                return iemRaiseGeneralProtectionFault0(pIemCpu);
-        }
-
-        /* commit the operation. */
-        rcStrict = iemMemStackPopCommitSpecial(pIemCpu, uFrame.pv, uNewRsp);
+        }
+        rcStrict = iemMemCommitAndUnmap(pIemCpu, (void *)uFrame.pv, IEM_ACCESS_STACK_R); /* don't use iemMemStackPopCommitSpecial here. */
         if (rcStrict != VINF_SUCCESS)
             return rcStrict;
-        pCtx->rip           = uNewEip;
-        pCtx->cs            = uNewCs;
-        pCtx->csHid.u64Base = (uint32_t)uNewCs << 4;
-        /** @todo do we load attribs and limit as well? */
-        Assert(uNewFlags & X86_EFL_1);
-        pCtx->eflags.u      = uNewFlags;
-
-        return VINF_SUCCESS;
-    }
-
-
-    AssertFailed();
+
+        /*
+         * What are we returning to?
+         */
+        if (   (uNewFlags & X86_EFL_VM)
+            && pIemCpu->uCpl == 0)
+        {
+            /* V8086 mode! */
+            AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+        }
+        else
+        {
+            /*
+             * Protected mode.
+             */
+            /* Read the CS descriptor. */
+            if (!(uNewCS & (X86_SEL_MASK | X86_SEL_LDT)))
+            {
+                Log(("iret %04x:%08x -> invalid CS selector, #GP(0)\n", uNewCS, uNewEip));
+                return iemRaiseGeneralProtectionFault0(pIemCpu);
+            }
+
+            IEMSELDESC DescCS;
+            rcStrict = iemMemFetchSelDesc(pIemCpu, &DescCS, uNewCS);
+            if (rcStrict != VINF_SUCCESS)
+                return rcStrict;
+
+            /* Must be a code descriptor. */
+            if (!DescCS.Legacy.Gen.u1DescType)
+            {
+                Log(("iret %04x:%08x - CS is system segment (%#x) -> #GP\n", uNewCS, uNewEip, DescCS.Legacy.Gen.u4Type));
+                return iemRaiseGeneralProtectionFaultBySelector(pIemCpu, uNewCS);
+            }
+            if (!(DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_CODE))
+            {
+                Log(("iret %04x:%08x - not code segment (%#x) -> #GP\n", uNewCS, uNewEip, DescCS.Legacy.Gen.u4Type));
+                return iemRaiseGeneralProtectionFaultBySelector(pIemCpu, uNewCS);
+            }
+
+            /* Privilege checks. */
+            if ((uNewCS & X86_SEL_RPL) < pIemCpu->uCpl)
+            {
+                Log(("iret %04x:%08x - RPL < CPL (%d) -> #GP\n", uNewCS, uNewEip, pIemCpu->uCpl));
+                return iemRaiseGeneralProtectionFaultBySelector(pIemCpu, uNewCS);
+            }
+            if (   (DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_CONF)
+                && (uNewCS & X86_SEL_RPL) < DescCS.Legacy.Gen.u2Dpl)
+            {
+                Log(("iret %04x:%08x - RPL < DPL (%d) -> #GP\n", uNewCS, uNewEip, DescCS.Legacy.Gen.u2Dpl));
+                return iemRaiseGeneralProtectionFaultBySelector(pIemCpu, uNewCS);
+            }
+
+            /* Present? */
+            if (!DescCS.Legacy.Gen.u1Present)
+            {
+                Log(("iret %04x:%08x - CS not present -> #NP\n", uNewCS, uNewEip));
+                return iemRaiseSelectorNotPresentBySelector(pIemCpu, uNewCS);
+            }
+
+            uint32_t cbLimitCS = X86DESC_LIMIT(DescCS.Legacy);
+            if (DescCS.Legacy.Gen.u1Granularity)
+                cbLimitCS = (cbLimitCS << PAGE_SHIFT) | PAGE_OFFSET_MASK;
+
+            /*
+             * Different level?
+             */
+            if ((uNewCS & X86_SEL_RPL) != pIemCpu->uCpl)
+            {
+                AssertFailedReturn(VERR_NOT_IMPLEMENTED);
+            }
+            /*
+             * Same level.
+             */
+            else
+            {
+                /* Check EIP. */
+                if (uNewEip > cbLimitCS)
+                {
+                    Log(("iret %04x:%08x - EIP is out of bounds (%#x) -> #GP(0)\n", uNewCS, uNewEip, cbLimitCS));
+                    return iemRaiseSelectorBoundsBySelector(pIemCpu, uNewCS);
+                }
+
+                /*
+                 * Commit the changes, marking CS first since it may fail.
+                 */
+                if (!(DescCS.Legacy.Gen.u4Type & X86_SEL_TYPE_ACCESSED))
+                {
+                    rcStrict = iemMemMarkSelDescAccessed(pIemCpu, uNewCS);
+                    if (rcStrict != VINF_SUCCESS)
+                        return rcStrict;
+                    DescCS.Legacy.Gen.u4Type |= X86_SEL_TYPE_ACCESSED;
+                }
+
+                pCtx->rip               = uNewEip;
+                pCtx->cs                = uNewCS;
+                pCtx->csHid.Attr.u      = X86DESC_GET_HID_ATTR(DescCS.Legacy);
+                pCtx->csHid.u32Limit    = cbLimitCS;
+                pCtx->csHid.u64Base     = X86DESC_BASE(DescCS.Legacy);
+                pCtx->rsp               = uNewRsp;
+
+                uint32_t fEFlagsMask = X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF  | X86_EFL_SF
+                                     | X86_EFL_TF | X86_EFL_DF | X86_EFL_OF | X86_EFL_NT;
+                if (enmEffOpSize != IEMMODE_16BIT)
+                    fEFlagsMask |= X86_EFL_RF | X86_EFL_AC | X86_EFL_ID;
+                if (pIemCpu->uCpl == 0)
+                    fEFlagsMask |= X86_EFL_IF | X86_EFL_IOPL | X86_EFL_VIF | X86_EFL_VIP; /* VM is 0 */
+                else if (pIemCpu->uCpl <= pCtx->eflags.Bits.u2IOPL)
+                    fEFlagsMask |= X86_EFL_IF;
+                pCtx->eflags.u         &= ~fEFlagsMask;
+                pCtx->eflags.u         |= fEFlagsMask & uNewFlags;
+                /* Done! */
+            }
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Implements iret for long mode
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_iret_long, IEMMODE, enmEffOpSize)
+{
+    //PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+    //VBOXSTRICTRC    rcStrict;
+    //uint64_t        uNewRsp;
+
     return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Implements iret.
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ */
+IEM_CIMPL_DEF_1(iemCImpl_iret, IEMMODE, enmEffOpSize)
+{
+    /*
+     * Call a mode specific worker.
+     */
+    if (   pIemCpu->enmCpuMode == IEMMODE_16BIT
+        && IEM_IS_REAL_OR_V86_MODE(pIemCpu))
+        return IEM_CIMPL_CALL_1(iemCImpl_iret_real_v8086, enmEffOpSize);
+    if (IEM_IS_LONG_MODE(pIemCpu))
+        return IEM_CIMPL_CALL_1(iemCImpl_iret_long, enmEffOpSize);
+
+    return     IEM_CIMPL_CALL_1(iemCImpl_iret_prot, enmEffOpSize);
 }
 
@@ -2449,4 +2650,7 @@
     pCtx->rax = (uint32_t)uTicks;
     pCtx->rdx = uTicks >> 32;
+#ifdef IEM_VERIFICATION_MODE
+    pIemCpu->fIgnoreRaxRdx = true;
+#endif
 
     iemRegAddToRip(pIemCpu, cbInstr);
@@ -2736,5 +2940,5 @@
         pCtx->fpu.FCW   = 0x37f;
         pCtx->fpu.FSW   = 0;
-        pCtx->fpu.FTW   = 0xff;
+        pCtx->fpu.FTW   = 0x00;         /* 0 - empty. */
         pCtx->fpu.FPUDP = 0;
         pCtx->fpu.DS    = 0; //??
@@ -2748,5 +2952,5 @@
         pFpu->FCW       = 0x37f;
         pFpu->FSW       = 0;
-        pFpu->FTW       = 0xffff;
+        pFpu->FTW       = 0xffff;       /* 11 - empty */
         pFpu->FPUOO     = 0; //??
         pFpu->FPUOS     = 0; //??
Index: /trunk/src/VBox/VMM/include/IEMInternal.h
===================================================================
--- /trunk/src/VBox/VMM/include/IEMInternal.h	(revision 37078)
+++ /trunk/src/VBox/VMM/include/IEMInternal.h	(revision 37079)
@@ -172,5 +172,8 @@
      * This is used to skip past really slow bits.  */
     bool                    fNoRem;
-    bool                    afAlignment1[3];
+    /** Indicates that RAX and RDX differences should be ignored since RDTSC
+     *  and RDTSCP are timing sensitive.  */
+    bool                    fIgnoreRaxRdx;
+    bool                    afAlignment1[2];
     /** Mask of undefined eflags.
      * The verifier will any difference in these flags. */
@@ -316,4 +319,6 @@
 /** Stack read alias. */
 #define IEM_ACCESS_STACK_R              (IEM_ACCESS_TYPE_READ  | IEM_ACCESS_WHAT_STACK)
+/** Stack read+write alias. */
+#define IEM_ACCESS_STACK_RW             (IEM_ACCESS_TYPE_READ  | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_WHAT_STACK)
 /** @} */
 
