Index: /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp	(revision 36779)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAll.cpp	(revision 36780)
@@ -1082,4 +1082,11 @@
 
 static VBOXSTRICTRC iemRaiseGeneralProtectionFault0(PIEMCPU pIemCpu)
+{
+    AssertFailed(/** @todo implement this */);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+static VBOXSTRICTRC iemRaiseNotCanonical(PIEMCPU pIemCpu)
 {
     AssertFailed(/** @todo implement this */);
@@ -3852,4 +3859,7 @@
     PCPUMCTX pCtx  = pIemCpu->CTX_SUFF(pCtx);
     uint64_t OldPC = pCtx->rip + cbInstr;
+    uint64_t NewPC = OldPC + offDisp;
+    if (!IEM_IS_CANONICAL(NewPC))
+        return iemRaiseNotCanonical(pIemCpu);
 
     VBOXSTRICTRC rcStrict = iemMemStackPushU64(pIemCpu, OldPC);
@@ -3857,5 +3867,5 @@
         return rcStrict;
 
-    pCtx->rip = OldPC + offDisp;
+    pCtx->rip = NewPC;
     return VINF_SUCCESS;
 }
@@ -3974,5 +3984,5 @@
         else
         {
-            if (offSeg > offSeg)
+            if (offSeg > cbLimit)
             {
                 Log(("jmpf %04x:%08x -> out of bounds (%#x)\n", uSel, offSeg, cbLimit));
@@ -4158,5 +4168,5 @@
         /* Check the limit of the new EIP. */
         /** @todo Intel pseudo code only does the limit check for 16-bit
-         *        operands, AMD does make any distinction. What is right? */
+         *        operands, AMD does not make any distinction. What is right? */
         if (uNewEip > pCtx->csHid.u32Limit)
             return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
@@ -4181,4 +4191,71 @@
 
 /**
+ * Implements retn.
+ *
+ * We're doing this in C because of the \#GP that might be raised if the popped
+ * program counter is out of bounds.
+ *
+ * @param   enmEffOpSize    The effective operand size.
+ * @param   cbPop           The amount of arguments to pop from the stack
+ *                          (bytes).
+ */
+IEM_CIMPL_DEF_2(iemCImpl_retn, IEMMODE, enmEffOpSize, uint16_t, cbPop)
+{
+    PCPUMCTX        pCtx = pIemCpu->CTX_SUFF(pCtx);
+
+    /* Fetch the RSP from the stack. */
+    VBOXSTRICTRC    rcStrict;
+    RTUINT64U       NewRip;
+    RTUINT64U       NewRsp;
+    NewRsp.u = pCtx->rsp;
+    switch (enmEffOpSize)
+    {
+        case IEMMODE_16BIT:
+            NewRip.u = 0;
+            rcStrict = iemMemStackPopU16Ex(pIemCpu, &NewRip.Words.w0, &NewRsp);
+            break;
+        case IEMMODE_32BIT:
+            NewRip.u = 0;
+            rcStrict = iemMemStackPopU32Ex(pIemCpu, &NewRip.DWords.dw0, &NewRsp);
+            break;
+        case IEMMODE_64BIT:
+            rcStrict = iemMemStackPopU64Ex(pIemCpu, &NewRip.u, &NewRsp);
+            break;
+        IEM_NOT_REACHED_DEFAULT_CASE_RET();
+    }
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+
+    /* Check the new RSP before loading it. */
+    /** @todo Should test this as the intel+amd pseudo code doesn't mention half
+     *        of it.  The canonical test is performed here and for call. */
+    if (enmEffOpSize != IEMMODE_64BIT)
+    {
+        if (NewRip.DWords.dw0 > pCtx->csHid.u32Limit)
+        {
+            Log(("retn newrip=%llx - out of bounds (%x) -> #GP\n", NewRip.u, pCtx->csHid.u32Limit));
+            return iemRaiseSelectorBounds(pIemCpu, X86_SREG_CS, IEM_ACCESS_INSTRUCTION);
+        }
+    }
+    else
+    {
+        if (!IEM_IS_CANONICAL(NewRip.u))
+        {
+            Log(("retn newrip=%llx - not canonical -> #GP\n", NewRip.u));
+            return iemRaiseNotCanonical(pIemCpu);
+        }
+    }
+
+    /* Commit it. */
+    pCtx->rip = NewRip.u;
+    pCtx->rsp = NewRsp.u;
+    if (cbPop)
+        iemRegAddToRsp(pCtx, cbPop);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
  * Implements int3 and int XX.
  *
@@ -4221,8 +4298,9 @@
 
         /* load the vector address into cs:ip. */
-        pCtx->cs            = Idte.sel;
-        pCtx->csHid.u64Base = (uint32_t)Idte.sel << 4;
+        pCtx->cs               = Idte.sel;
+        pCtx->csHid.u64Base    = (uint32_t)Idte.sel << 4;
         /** @todo do we load attribs and limit as well? Should we check against limit like far jump? */
-        pCtx->rip           = Idte.off;
+        pCtx->rip              = Idte.off;
+        pCtx->eflags.Bits.u1IF = 0;
         return VINF_SUCCESS;
     }
@@ -5296,10 +5374,16 @@
 
 #define IEM_MC_FETCH_GREG_U8(a_u8Dst, a_iGReg)          (a_u8Dst)  = iemGRegFetchU8(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U8_ZX_U16(a_u16Dst, a_iGReg)  (a_u16Dst) = iemGRegFetchU8(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U8_ZX_U32(a_u32Dst, a_iGReg)  (a_u32Dst) = iemGRegFetchU8(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U8_ZX_U64(a_u64Dst, a_iGReg)  (a_u64Dst) = iemGRegFetchU8(pIemCpu, (a_iGReg))
 #define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg)        (a_u16Dst) = iemGRegFetchU16(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u32Dst, a_iGReg) (a_u32Dst) = iemGRegFetchU16(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u64Dst, a_iGReg) (a_u64Dst) = iemGRegFetchU16(pIemCpu, (a_iGReg))
 #define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg)        (a_u32Dst) = iemGRegFetchU32(pIemCpu, (a_iGReg))
+#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u64Dst, a_iGReg) (a_u64Dst) = iemGRegFetchU32(pIemCpu, (a_iGReg))
 #define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg)        (a_u64Dst) = iemGRegFetchU64(pIemCpu, (a_iGReg))
 #define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg)        (a_u16Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
-#define IEM_MC_FETCH_SREG_U32_ZX(a_u32Dst, a_iSReg)     (a_u32Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
-#define IEM_MC_FETCH_SREG_U64_ZX(a_u64Dst, a_iSReg)     (a_u64Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
+#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg)     (a_u32Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
+#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg)     (a_u64Dst) = iemSRegFetchU16(pIemCpu, (a_iSReg))
 #define IEM_MC_FETCH_EFLAGS(a_EFlags)                   (a_EFlags) = (pIemCpu)->CTX_SUFF(pCtx)->eflags.u
 
@@ -5352,4 +5436,41 @@
 #define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
     IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU64(pIemCpu, &(a_u64Dst), (a_iSeg), (a_GCPtrMem)))
+
+#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
+    do { \
+        uint8_t u8Tmp; \
+        IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU8(pIemCpu, &u8Tmp, (a_iSeg), (a_GCPtrMem))); \
+        (a_u16Dst) = u8Tmp; \
+    } while (0)
+#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
+    do { \
+        uint8_t u8Tmp; \
+        IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU8(pIemCpu, &u8Tmp, (a_iSeg), (a_GCPtrMem))); \
+        (a_u32Dst) = u8Tmp; \
+    } while (0)
+#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
+    do { \
+        uint8_t u8Tmp; \
+        IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU8(pIemCpu, &u8Tmp, (a_iSeg), (a_GCPtrMem))); \
+        (a_u64Dst) = u8Tmp; \
+    } while (0)
+#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
+    do { \
+        uint16_t u16Tmp; \
+        IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU16(pIemCpu, &u16Tmp, (a_iSeg), (a_GCPtrMem))); \
+        (a_u32Dst) = u16Tmp; \
+    } while (0)
+#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
+    do { \
+        uint16_t u16Tmp; \
+        IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU16(pIemCpu, &u16Tmp, (a_iSeg), (a_GCPtrMem))); \
+        (a_u64Dst) = u16Tmp; \
+    } while (0)
+#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
+    do { \
+        uint32_t u32Tmp; \
+        IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU32(pIemCpu, &u32Tmp, (a_iSeg), (a_GCPtrMem))); \
+        (a_u64Dst) = u32Tmp; \
+    } while (0)
 
 #define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
@@ -5877,4 +5998,7 @@
 static void iemExecVerificationModeSetup(PIEMCPU pIemCpu)
 {
+    /*
+     * Switch state.
+     */
     static CPUMCTX  s_DebugCtx; /* Ugly! */
 
@@ -5882,9 +6006,25 @@
     s_DebugCtx = *pOrgCtx;
     pIemCpu->CTX_SUFF(pCtx) = &s_DebugCtx;
+
+    /*
+     * See if there is an interrupt pending in TRPM and inject it if we can.
+     */
+    PVMCPU pVCpu = IEMCPU_TO_VMCPU(pIemCpu);
+    if (   pOrgCtx->eflags.Bits.u1IF
+        && TRPMHasTrap(pVCpu)
+        //&& TRPMIsSoftwareInterrupt(pVCpu)
+        && EMGetInhibitInterruptsPC(pVCpu) != pOrgCtx->rip)
+    {
+        Log(("Injecting trap %#x\n", TRPMGetTrapNo(pVCpu)));
+        iemCImpl_int(pIemCpu, 0, TRPMGetTrapNo(pVCpu), false);
+    }
+
+    /*
+     * Reset the counters.
+     */
     pIemCpu->cIOReads    = 0;
     pIemCpu->cIOWrites   = 0;
     pIemCpu->fMulDivHack = false;
     pIemCpu->fShlHack    = false;
-
 }
 
@@ -5942,5 +6082,5 @@
             }
             else
-                RTAssertMsg2Weak("  the FPU state differs - happends the first time...\n");
+                RTAssertMsg2Weak("  the FPU state differs - happens the first time...\n");
         }
         CHECK_FIELD(rip);
@@ -6067,4 +6207,8 @@
 {
     PIEMCPU  pIemCpu = &pVCpu->iem.s;
+
+#if defined(IEM_VERIFICATION_MODE) && defined(IN_RING3)
+    iemExecVerificationModeSetup(pIemCpu);
+#endif
 #ifdef DEBUG
     PCPUMCTX pCtx = pIemCpu->CTX_SUFF(pCtx);
@@ -6087,7 +6231,4 @@
           szInstr));
 #endif
-#if defined(IEM_VERIFICATION_MODE) && defined(IN_RING3)
-    iemExecVerificationModeSetup(pIemCpu);
-#endif
 
     /*
@@ -6102,4 +6243,7 @@
     if (rcStrict == VINF_SUCCESS)
         pIemCpu->cInstructions++;
+//#ifdef DEBUG
+//    AssertMsg(pIemCpu->offOpcode == cbInstr || rcStrict != VINF_SUCCESS, ("%u %u\n", pIemCpu->offOpcode, cbInstr));
+//#endif
 
     /* Execute the next instruction as well if a cli, pop ss or
@@ -6122,7 +6266,4 @@
      * Assert some sanity.
      */
-#ifdef DEBUG
-    AssertMsg(pIemCpu->offOpcode == cbInstr || rcStrict != VINF_SUCCESS, ("%u %u\n", pIemCpu->offOpcode, cbInstr));
-#endif
 #if defined(IEM_VERIFICATION_MODE) && defined(IN_RING3)
     iemExecVerificationModeCheck(pIemCpu);
Index: /trunk/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h	(revision 36779)
+++ /trunk/src/VBox/VMM/VMMAll/IEMAllInstructions.cpp.h	(revision 36780)
@@ -1718,6 +1718,97 @@
 /** Opcode 0x0f 0xb5. */
 FNIEMOP_STUB(iemOp_lgs_Gv_Mp);
+
+
 /** Opcode 0x0f 0xb6. */
-FNIEMOP_STUB(iemOp_movzx_Gv_Eb);
+FNIEMOP_DEF(iemOp_movzx_Gv_Eb)
+{
+    IEMOP_MNEMONIC("movzx Gv,Eb");
+
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &bRm);
+    IEMOP_HLP_NO_LOCK_PREFIX(); /** @todo should probably not be raised until we've fetched all the opcode bytes? */
+
+    /*
+     * If rm is denoting a register, no more instruction bytes.
+     */
+    if ((bRm & X86_MODRM_MOD_MASK) == (3 << X86_MODRM_MOD_SHIFT))
+    {
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_FETCH_GREG_U8_ZX_U16(u16Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_FETCH_GREG_U8_ZX_U32(u32Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 1);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_FETCH_GREG_U8_ZX_U64(u64Value, (bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+    else
+    {
+        /*
+         * We're loading a register from memory.
+         */
+        switch (pIemCpu->enmEffOpSize)
+        {
+            case IEMMODE_16BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint16_t, u16Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U8_ZX_U16(u16Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_32BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint32_t, u32Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U8_ZX_U32(u32Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U32(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u32Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            case IEMMODE_64BIT:
+                IEM_MC_BEGIN(0, 2);
+                IEM_MC_LOCAL(uint64_t, u64Value);
+                IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
+                IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
+                IEM_MC_FETCH_MEM_U8_ZX_U64(u64Value, pIemCpu->iEffSeg, GCPtrEffDst);
+                IEM_MC_STORE_GREG_U64(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u64Value);
+                IEM_MC_ADVANCE_RIP();
+                IEM_MC_END();
+                return VINF_SUCCESS;
+
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
+        }
+    }
+}
+
+
 /** Opcode 0x0f 0xb7. */
 FNIEMOP_STUB(iemOp_movzx_Gv_Ew);
@@ -2101,5 +2192,5 @@
             IEM_MC_BEGIN(0, 1);
             IEM_MC_LOCAL(uint32_t, u32Value);
-            IEM_MC_FETCH_SREG_U32_ZX(u32Value, iReg);
+            IEM_MC_FETCH_SREG_ZX_U32(u32Value, iReg);
             IEM_MC_PUSH_U32(u32Value);
             IEM_MC_ADVANCE_RIP();
@@ -2110,5 +2201,5 @@
             IEM_MC_BEGIN(0, 1);
             IEM_MC_LOCAL(uint64_t, u64Value);
-            IEM_MC_FETCH_SREG_U64_ZX(u64Value, iReg);
+            IEM_MC_FETCH_SREG_ZX_U64(u64Value, iReg);
             IEM_MC_PUSH_U64(u64Value);
             IEM_MC_ADVANCE_RIP();
@@ -3630,10 +3721,5 @@
 FNIEMOP_DEF(iemOp_insb_Yb_DX)
 {
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
     IEMOP_HLP_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeate prefix is encountered.
-     */
     if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
@@ -3644,4 +3730,5 @@
             case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op8_addr32);
             case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op8_addr64);
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
@@ -3654,7 +3741,7 @@
             case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op8_addr32);
             case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op8_addr64);
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
 }
 
@@ -3663,5 +3750,4 @@
 FNIEMOP_DEF(iemOp_inswd_Yv_DX)
 {
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
     IEMOP_HLP_NO_LOCK_PREFIX();
     if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
@@ -3676,4 +3762,5 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op16_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op16_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
@@ -3685,6 +3772,8 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op32_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_ins_op32_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
@@ -3700,4 +3789,5 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op16_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op16_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
@@ -3709,9 +3799,10 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op32_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_ins_op32_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
 }
 
@@ -3720,10 +3811,5 @@
 FNIEMOP_DEF(iemOp_outsb_Yb_DX)
 {
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
     IEMOP_HLP_NO_LOCK_PREFIX();
-
-    /*
-     * Use the C implementation if a repeate prefix is encountered.
-     */
     if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPNZ | IEM_OP_PRF_REPZ))
     {
@@ -3734,4 +3820,5 @@
             case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op8_addr32);
             case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op8_addr64);
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
@@ -3744,7 +3831,7 @@
             case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op8_addr32);
             case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op8_addr64);
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
 }
 
@@ -3753,5 +3840,4 @@
 FNIEMOP_DEF(iemOp_outswd_Yv_DX)
 {
-    uint8_t u8Imm; IEM_OPCODE_GET_NEXT_BYTE(pIemCpu, &u8Imm);
     IEMOP_HLP_NO_LOCK_PREFIX();
     if (pIemCpu->fPrefixes & (IEM_OP_PRF_REPZ | IEM_OP_PRF_REPNZ))
@@ -3766,4 +3852,5 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op16_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op16_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
@@ -3775,6 +3862,8 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op32_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_rep_outs_op32_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
@@ -3790,4 +3879,5 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op16_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op16_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
@@ -3799,9 +3889,10 @@
                     case IEMMODE_32BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op32_addr32);
                     case IEMMODE_64BIT: return IEM_MC_DEFER_TO_CIMPL_0(iemCImpl_outs_op32_addr64);
+                    IEM_NOT_REACHED_DEFAULT_CASE_RET();
                 }
                 break;
+            IEM_NOT_REACHED_DEFAULT_CASE_RET();
         }
     }
-    AssertFailedReturn(VERR_INTERNAL_ERROR_3);
 }
 
@@ -4762,7 +4853,5 @@
                 IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
                 IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm);
-Log(("GCPtrEffDst=%RGv\n", GCPtrEffDst));
                 IEM_MC_FETCH_MEM_U16(u16Value, pIemCpu->iEffSeg, GCPtrEffDst);
-Log(("u16Value=%#x\n", u16Value));
                 IEM_MC_STORE_GREG_U16(((bRm >> X86_MODRM_REG_SHIFT) & X86_MODRM_REG_SMASK) | pIemCpu->uRexReg, u16Value);
                 IEM_MC_ADVANCE_RIP();
@@ -4834,5 +4923,5 @@
                 IEM_MC_BEGIN(0, 1);
                 IEM_MC_LOCAL(uint32_t, u32Value);
-                IEM_MC_FETCH_SREG_U32_ZX(u32Value, iSegReg);
+                IEM_MC_FETCH_SREG_ZX_U32(u32Value, iSegReg);
                 IEM_MC_STORE_GREG_U32((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u32Value);
                 IEM_MC_ADVANCE_RIP();
@@ -4843,5 +4932,5 @@
                 IEM_MC_BEGIN(0, 1);
                 IEM_MC_LOCAL(uint64_t, u64Value);
-                IEM_MC_FETCH_SREG_U64_ZX(u64Value, iSegReg);
+                IEM_MC_FETCH_SREG_ZX_U64(u64Value, iSegReg);
                 IEM_MC_STORE_GREG_U64((bRm & X86_MODRM_RM_MASK) | pIemCpu->uRexB, u64Value);
                 IEM_MC_ADVANCE_RIP();
@@ -6007,5 +6096,12 @@
 
 /** Opcode 0xc2. */
-FNIEMOP_STUB(iemOp_retn_Iw);
+FNIEMOP_DEF(iemOp_retn_Iw)
+{
+    IEMOP_MNEMONIC("retn Iw");
+    uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retn, pIemCpu->enmEffOpSize, u16Imm);
+}
 
 
@@ -6015,37 +6111,6 @@
     IEMOP_MNEMONIC("retn");
     IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
-    switch (pIemCpu->enmEffOpSize)
-    {
-        case IEMMODE_16BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint16_t, u16NewIP);
-            IEM_MC_POP_U16(&u16NewIP);
-            /** @todo This should raise GP(0) if u16NewIP > csHid.u32Limit.
-             *        The intel manual does not indicate that this is the
-             *        case for 32-bit or 64-bit (canonical check). Needs to
-             *        be tested. */
-            IEM_MC_SET_RIP_U16(u16NewIP);
-            IEM_MC_END()
-            return VINF_SUCCESS;
-
-        case IEMMODE_32BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint32_t, u32NewIP);
-            IEM_MC_POP_U32(&u32NewIP);
-            IEM_MC_SET_RIP_U32(u32NewIP);
-            IEM_MC_END()
-            return VINF_SUCCESS;
-
-        case IEMMODE_64BIT:
-            IEM_MC_BEGIN(0, 1);
-            IEM_MC_LOCAL(uint64_t, u64NewIP);
-            IEM_MC_POP_U64(&u64NewIP);
-            IEM_MC_SET_RIP_U64(u64NewIP);
-            IEM_MC_END()
-            return VINF_SUCCESS;
-
-        default:
-            AssertFailedReturn(VERR_INTERNAL_ERROR_2);
-    }
+    IEMOP_HLP_NO_LOCK_PREFIX();
+    return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retn, pIemCpu->enmEffOpSize, 0);
 }
 
@@ -6186,4 +6251,5 @@
     uint16_t u16Imm; IEM_OPCODE_GET_NEXT_U16(pIemCpu, &u16Imm);
     IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retf, pIemCpu->enmEffOpSize, u16Imm);
 }
@@ -6195,4 +6261,5 @@
     IEMOP_MNEMONIC("retf");
     IEMOP_HLP_NO_LOCK_PREFIX();
+    IEMOP_HLP_DEFAULT_64BIT_OP_SIZE();
     return IEM_MC_DEFER_TO_CIMPL_2(iemCImpl_retf, pIemCpu->enmEffOpSize, 0);
 }
@@ -6303,5 +6370,10 @@
         case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,1"); break;
         case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,1"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,1"); break;
+        case 4:
+            pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,1");
+#ifdef IEM_VERIFICATION_MODE
+            pIemCpu->fShlHack = true;
+#endif
+            break;
         case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,1"); break;
         case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,1"); break;
@@ -6490,5 +6562,10 @@
         case 2: pImpl = &g_iemAImpl_rcl; IEMOP_MNEMONIC("rcl Ev,CL"); break;
         case 3: pImpl = &g_iemAImpl_rcr; IEMOP_MNEMONIC("rcr Ev,CL"); break;
-        case 4: pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,CL"); break;
+        case 4:
+            pImpl = &g_iemAImpl_shl; IEMOP_MNEMONIC("shl Ev,CL");
+#ifdef IEM_VERIFICATION_MODE
+            pIemCpu->fShlHack = true;
+#endif
+            break;
         case 5: pImpl = &g_iemAImpl_shr; IEMOP_MNEMONIC("shr Ev,CL"); break;
         case 7: pImpl = &g_iemAImpl_sar; IEMOP_MNEMONIC("sar Ev,CL"); break;
Index: /trunk/src/VBox/VMM/VMMR3/EM.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/EM.cpp	(revision 36779)
+++ /trunk/src/VBox/VMM/VMMR3/EM.cpp	(revision 36780)
@@ -1995,9 +1995,9 @@
                  */
                 case EMSTATE_RAW:
-#ifdef IEM_VERIFICATION_MODE /* remove later */
+#ifndef IEM_VERIFICATION_MODE /* remove later */
                     AssertFailed();
-#endif
                     rc = emR3RawExecute(pVM, pVCpu, &fFFDone);
                     break;
+#endif
 
                 /*
@@ -2005,9 +2005,9 @@
                  */
                 case EMSTATE_HWACC:
-#ifdef IEM_VERIFICATION_MODE /* remove later */
+#ifndef IEM_VERIFICATION_MODE /* remove later */
                     AssertFailed();
-#endif
                     rc = emR3HwAccExecute(pVM, pVCpu, &fFFDone);
                     break;
+#endif
 
                 /*
