Index: /trunk/src/VBox/ValidationKit/bootsectors/Makefile.kmk
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/Makefile.kmk	(revision 61534)
+++ /trunk/src/VBox/ValidationKit/bootsectors/Makefile.kmk	(revision 61535)
@@ -270,4 +270,23 @@
 	$$(bs3-fpustate-1_0_OUTDIR)/bs3-fpustate-1-asm.o16
 
+# CPU instruction decoding experiments.
+#MISCBINS += bs3-cpu-decode-1
+bs3-cpu-decode-1_TEMPLATE = VBoxBS3KitImg
+bs3-cpu-decode-1_INCS  = .
+bs3-cpu-decode-1_DEFS  =  BS3_CMN_INSTANTIATE_FILE1=bs3-cpu-decode-1-template.c
+bs3-cpu-decode-1_DEFS += BS3_MODE_INSTANTIATE_FILE1=bs3-cpu-decode-1-template.c
+bs3-cpu-decode-1_SOURCES = \
+	bs3kit/bs3-first-rm.asm \
+	bs3-cpu-decode-1.c32 \
+       bs3kit/bs3-cmn-instantiate.c16 \
+       bs3kit/bs3-cmn-instantiate.c32 \
+       bs3kit/bs3-cmn-instantiate.c64 \
+	bs3-cpu-decode-1-asm.asm
+bs3-cpu-decode-1-template.o:: \
+	$$(bs3-cpu-decode-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o16 \
+	$$(bs3-cpu-decode-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o32 \
+	$$(bs3-cpu-decode-1_0_OUTDIR)/bs3kit/bs3-cmn-instantiate.o64 \
+	$$(bs3-cpu-decode-1_0_OUTDIR)/bs3-cpu-decode-1-asm.o16
+
 endif # VBOX_WITH_BS3KIT
 
Index: /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm	(revision 61535)
+++ /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-asm.asm	(revision 61535)
@@ -0,0 +1,162 @@
+; $Id$
+;; @file
+; BS3Kit - bs3-fpustate-1, assembly helpers and template instantiation.
+;
+
+;
+; Copyright (C) 2007-2016 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+
+;*********************************************************************************************************************************
+;*  Header Files                                                                                                                 *
+;*********************************************************************************************************************************
+%include "bs3kit.mac"
+
+
+;*********************************************************************************************************************************
+;*  Global Variables                                                                                                             *
+;*********************************************************************************************************************************
+BS3_BEGIN_DATA16
+;; @name Floating point constants.
+; @{
+g_r32_0dot1:    dd 0.1
+g_r32_3dot2:    dd 3.2
+g_r32_Zero:     dd 0.0
+g_r32_One:      dd 1.0
+g_r32_Two:      dd 2.0
+g_r32_Three:    dd 3.0
+g_r32_Ten:      dd 10.0
+g_r32_Eleven:   dd 11.0
+g_r32_ThirtyTwo:dd 32.0
+g_r32_Min:      dd 000800000h
+g_r32_Max:      dd 07f7fffffh
+g_r32_Inf:      dd 07f800000h
+g_r32_SNaN:     dd 07f800001h
+g_r32_SNaNMax:  dd 07fbfffffh
+g_r32_QNaN:     dd 07fc00000h
+g_r32_QNaNMax:  dd 07fffffffh
+g_r32_NegQNaN:  dd 0ffc00000h
+
+g_r64_0dot1:    dq 0.1
+g_r64_6dot9:    dq 6.9
+g_r64_Zero:     dq 0.0
+g_r64_One:      dq 1.0
+g_r64_Two:      dq 2.0
+g_r64_Three:    dq 3.0
+g_r64_Ten:      dq 10.0
+g_r64_Eleven:   dq 11.0
+g_r64_ThirtyTwo:dq 32.0
+g_r64_Min:      dq 00010000000000000h
+g_r64_Max:      dq 07fefffffffffffffh
+g_r64_Inf:      dq 07ff0000000000000h
+g_r64_SNaN:     dq 07ff0000000000001h
+g_r64_SNaNMax:  dq 07ff7ffffffffffffh
+g_r64_NegQNaN:  dq 0fff8000000000000h
+g_r64_QNaN:     dq 07ff8000000000000h
+g_r64_QNaNMax:  dq 07fffffffffffffffh
+g_r64_DnMin:    dq 00000000000000001h
+g_r64_DnMax:    dq 0000fffffffffffffh
+
+
+g_r80_0dot1:    dt 0.1
+g_r80_3dot2:    dt 3.2
+g_r80_Zero:     dt 0.0
+g_r80_One:      dt 1.0
+g_r80_Two:      dt 2.0
+g_r80_Three:    dt 3.0
+g_r80_Ten:      dt 10.0
+g_r80_Eleven:   dt 11.0
+g_r80_ThirtyTwo:dt 32.0
+%ifdef __NASM__
+g_r80_Min:      dq 08000000000000000h
+                dw 00001h
+g_r80_Max:      dq     0ffffffffffffffffh
+                dw 07ffeh
+g_r80_Inf:      dq     08000000000000000h
+                dw 07fffh
+g_r80_QNaN:     dq     0c000000000000000h
+                dw 07fffh
+g_r80_QNaNMax:  dq     0ffffffffffffffffh
+                dw 07fffh
+g_r80_NegQNaN:  dq     0c000000000000000h
+                dw 0ffffh
+g_r80_SNaN:     dq     08000000000000001h
+                dw 07fffh
+g_r80_SNaNMax:  dq     0bfffffffffffffffh
+                dw 07fffh
+g_r80_DnMin:    dq     00000000000000001h
+                dw 00000h
+g_r80_DnMax:    dq     07fffffffffffffffh
+                dw 00000h
+%else
+g_r80_Min:      dt 000018000000000000000h
+g_r80_Max:      dt 07ffeffffffffffffffffh
+g_r80_Inf:      dt 07fff8000000000000000h
+g_r80_QNaN:     dt 07fffc000000000000000h
+g_r80_QNaNMax:  dt 07fffffffffffffffffffh
+g_r80_NegQNaN:  dt 0ffffc000000000000000h
+g_r80_SNaN:     dt 07fff8000000000000001h
+g_r80_SNaNMax:  dt 07fffbfffffffffffffffh
+g_r80_DnMin:    dt 000000000000000000001h
+g_r80_DnMax:    dt 000007fffffffffffffffh
+%endif
+
+g_r32V1:        dd 3.2
+g_r32V2:        dd -1.9
+g_r64V1:        dq 6.4
+g_r80V1:        dt 8.0
+
+; Denormal numbers.
+g_r32D0:        dd 000200000h
+;; @}
+
+;; @name Upconverted Floating point constants
+; @{
+;g_r80_r32_0dot1:        dt 0.1
+%ifdef __NASM__
+g_r80_r32_3dot2:        dq     0cccccd0000000000h
+                        dw 04000h
+%else
+g_r80_r32_3dot2:        dt 04000cccccd0000000000h
+%endif
+;g_r80_r32_Zero:         dt 0.0
+;g_r80_r32_One:          dt 1.0
+;g_r80_r32_Two:          dt 2.0
+;g_r80_r32_Three:        dt 3.0
+;g_r80_r32_Ten:          dt 10.0
+;g_r80_r32_Eleven:       dt 11.0
+;g_r80_r32_ThirtyTwo:    dt 32.0
+;; @}
+
+;; @name Decimal constants.
+; @{
+g_u64Zero:      dd 0
+g_u32Zero:      dw 0
+g_u64Two:       dd 2
+g_u32Two:       dw 2
+;; @}
+
+
+;
+; Instantiate code templates.
+;
+BS3_INSTANTIATE_TEMPLATE_ESSENTIALS      "bs3-fpustate-1-template.mac"
+
Index: /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.c
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.c	(revision 61535)
+++ /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.c	(revision 61535)
@@ -0,0 +1,302 @@
+/* $Id$ */
+/** @file
+ * BS3Kit - bs3-fpustate-1, C code template.
+ */
+
+/*
+ * Copyright (C) 2007-2016 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+#include <VBox/VMMDevTesting.h>
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+
+
+#ifdef BS3_INSTANTIATING_CMN
+
+/**
+ * Displays the differences between the two states.
+ */
+# define bs3FpuState1_Diff BS3_CMN_NM(bs3FpuState1_Diff)
+BS3_DECL_NEAR(void) bs3FpuState1_Diff(X86FXSTATE const BS3_FAR *pExpected, X86FXSTATE const BS3_FAR *pChecking)
+{
+    unsigned i;
+
+# define CHECK(a_Member, a_Fmt) \
+        if (pExpected->a_Member != pChecking->a_Member) \
+            Bs3TestPrintf("  " #a_Member ": " a_Fmt ", expected " a_Fmt "\n", pChecking->a_Member, pExpected->a_Member); \
+        else do { } while (0)
+    CHECK(FCW,          "%#RX16");
+    CHECK(FSW,          "%#RX16");
+    CHECK(FTW,          "%#RX16");
+    CHECK(FOP,          "%#RX16");
+    CHECK(FPUIP,        "%#RX32");
+    CHECK(CS,           "%#RX16");
+    CHECK(Rsrvd1,       "%#RX16");
+    CHECK(FPUDP,        "%#RX32");
+    CHECK(DS,           "%#RX16");
+    CHECK(Rsrvd2,       "%#RX16");
+    CHECK(MXCSR,        "%#RX32");
+    CHECK(MXCSR_MASK,   "%#RX32");
+# undef CHECK
+    for (i = 0; i < RT_ELEMENTS(pExpected->aRegs); i++)
+        if (   pChecking->aRegs[i].au64[0] != pExpected->aRegs[i].au64[0]
+            || pChecking->aRegs[i].au64[1] != pExpected->aRegs[i].au64[1])
+            Bs3TestPrintf("st%u: %.16Rhxs\n"
+                          "exp: %.16Rhxs\n",
+                          i, &pChecking->aRegs[i], &pExpected->aRegs[i]);
+    for (i = 0; i < RT_ELEMENTS(pExpected->aXMM); i++)
+        if (   pChecking->aXMM[i].au64[0] != pExpected->aXMM[i].au64[0]
+            || pChecking->aXMM[i].au64[1] != pExpected->aXMM[i].au64[1])
+            Bs3TestPrintf("xmm%u: %.16Rhxs\n"
+                          " %sexp: %.16Rhxs\n",
+                          i, &pChecking->aRegs[i], &pExpected->aRegs[i], i >= 10 ? " " : "");
+}
+
+
+#endif /* BS3_INSTANTIATING_CMN */
+
+
+/*
+ * Mode specific code.
+ * Mode specific code.
+ * Mode specific code.
+ */
+#ifdef BS3_INSTANTIATING_MODE
+# if TMPL_MODE == BS3_MODE_PE32 \
+  || TMPL_MODE == BS3_MODE_PP32 \
+  || TMPL_MODE == BS3_MODE_PAE32 \
+  || TMPL_MODE == BS3_MODE_LM64 \
+  || TMPL_MODE == BS3_MODE_RM
+
+/* Assembly helpers: */
+BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_InitState)(X86FXSTATE BS3_FAR *pFxState, void BS3_FAR *pvMmioReg);
+BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_Restore)(X86FXSTATE const BS3_FAR *pFxState);
+BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_Save)(X86FXSTATE BS3_FAR *pFxState);
+
+BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_FNStEnv)(void BS3_FAR *pvMmioReg);
+BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_MovDQU_Read)(void BS3_FAR *pvMmioReg);
+BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_MovDQU_Write)(void BS3_FAR *pvMmioReg);
+BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_FMul)(void BS3_FAR *pvMmioReg);
+
+
+/**
+ * Tests for FPU state corruption.
+ *
+ * First we don't do anything to quit guest context for a while.
+ * Then we start testing weird MMIO accesses, some which amonger other things
+ * forces the use of the FPU state or host FPU to do the emulation.  Both are a
+ * little complicated in raw-mode and ring-0 contexts.
+ *
+ * We ASSUME FXSAVE/FXRSTOR support here.
+ */
+BS3_DECL_FAR(uint8_t) TMPL_NM(bs3FpuState1_Corruption)(uint8_t bMode)
+{
+    /* We don't need to test that many modes, probably.  */
+
+    uint8_t             abBuf[sizeof(X86FXSTATE)*2 + 32];
+    uint8_t BS3_FAR    *pbTmp = &abBuf[0x10 - (((uintptr_t)abBuf) & 0x0f)];
+    X86FXSTATE BS3_FAR *pExpected = (X86FXSTATE BS3_FAR *)pbTmp;
+    X86FXSTATE BS3_FAR *pChecking = pExpected + 1;
+    uint32_t            iLoop;
+    uint32_t            uStartTick;
+    bool                fMmioReadback;
+    bool                fReadBackError = false;
+    BS3PTRUNION         MmioReg;
+
+
+# undef  CHECK_STATE
+# define CHECK_STATE(a_Instr) \
+        do { \
+            TMPL_NM(bs3FpuState1_Save)(pChecking); \
+            if (Bs3MemCmp(pExpected, pChecking, sizeof(*pExpected)) != 0) \
+            { \
+                Bs3TestFailedF("State differs after " #a_Instr " (write) in loop #%RU32\n", iLoop); \
+                bs3FpuState1_Diff(pExpected, pChecking); \
+                Bs3PitDisable(); \
+                return 1; \
+            } \
+        } while (0)
+
+    /*
+     * Setup the test.
+     */
+
+    /* Make this code executable in raw-mode.  A bit tricky. */
+    ASMSetCR0(ASMGetCR0() | X86_CR0_WP);
+    Bs3PitSetupAndEnablePeriodTimer(20);
+    ASMIntEnable();
+# if ARCH_BITS != 64
+    ASMHalt();
+# endif
+
+    /* Figure out which MMIO region we'll be using so we can correctly initialize FPUDS. */
+# if BS3_MODE_IS_RM_OR_V86(TMPL_MODE)
+    MmioReg.pv = BS3_FP_MAKE(0xffff, VMMDEV_TESTING_MMIO_BASE - _1M + 16);
+# elif BS3_MODE_IS_16BIT_CODE(TMPL_MODE)
+    MmioReg.pv = BS3_FP_MAKE(BS3_SEL_VMMDEV_MMIO16, VMMDEV_TESTING_MMIO_BASE - _1M);
+# else
+    MmioReg.pv = (uint8_t *)VMMDEV_TESTING_MMIO_BASE;
+# endif
+    if (MmioReg.pu32[VMMDEV_TESTING_MMIO_OFF_NOP / sizeof(uint32_t)] == VMMDEV_TESTING_NOP_RET)
+    {
+        fMmioReadback = true;
+        MmioReg.pb += VMMDEV_TESTING_MMIO_OFF_READBACK;
+    }
+    else
+    {
+        Bs3TestPrintf("VMMDev MMIO not found, using VGA instead\n");
+        fMmioReadback = false;
+        MmioReg.pv = Bs3XptrFlatToCurrent(0xa7800);
+    }
+
+    /* Make 100% sure we don't trap accessing the FPU state and that we can use fxsave/fxrstor. */
+    g_usBs3TestStep = 1;
+    ASMSetCR0((ASMGetCR0() & ~(X86_CR0_TS | X86_CR0_EM)) | X86_CR0_MP);
+    ASMSetCR4(ASMGetCR4() | X86_CR4_OSFXSR /*| X86_CR4_OSXMMEEXCPT*/);
+
+    /* Come up with a distinct state. We do that from assembly (will do FPU in R0/RC). */
+    g_usBs3TestStep = 2;
+    Bs3MemSet(abBuf, 0x42, sizeof(abBuf));
+    TMPL_NM(bs3FpuState1_InitState)(pExpected, MmioReg.pb);
+
+
+    /*
+     * Test #1: Check that we can keep it consistent for a while.
+     */
+    g_usBs3TestStep = 3;
+    uStartTick = g_cBs3PitTicks;
+    for (iLoop = 0; iLoop < _16M; iLoop++)
+    {
+        CHECK_STATE(nop);
+        if (   (iLoop & 0xffff) == 0xffff
+            && g_cBs3PitTicks - uStartTick >= 20 * 20) /* 20 seconds*/
+            break;
+    }
+
+    /*
+     * Test #2: Use various FPU, SSE and weird instructions to do MMIO writes.
+     *
+     * We'll use the VMMDev readback register if possible, but make do
+     * with VGA if not configured.
+     */
+    g_usBs3TestStep = 4;
+    uStartTick = g_cBs3PitTicks;
+    for (iLoop = 0; iLoop < _1M; iLoop++)
+    {
+        unsigned off;
+        uint8_t  abCompare[64];
+        uint8_t  abReadback[64];
+
+        /* Macros  */
+# undef  CHECK_READBACK_WRITE_RUN
+# define CHECK_READBACK_WRITE_RUN(a_Instr, a_Worker, a_Type) \
+            do { \
+                off = (unsigned)(iLoop & (VMMDEV_TESTING_READBACK_SIZE / 2 - 1)); \
+                if (off + sizeof(a_Type) > VMMDEV_TESTING_READBACK_SIZE) \
+                    off = VMMDEV_TESTING_READBACK_SIZE - sizeof(a_Type); \
+                a_Worker((a_Type *)&MmioReg.pb[off]); \
+                if (fMmioReadback && (!fReadBackError || iLoop == 0)) \
+                { \
+                    a_Worker((a_Type *)&abCompare[0]); \
+                    Bs3MemCpy(abReadback, &MmioReg.pb[off], sizeof(a_Type)); \
+                    if (Bs3MemCmp(abReadback, abCompare, sizeof(a_Type)) != 0) \
+                    { \
+                        Bs3TestFailedF("Read back error for " #a_Instr " in loop #%RU32:\n%.*Rhxs expected:\n%.*Rhxs\n", \
+                                       iLoop, sizeof(a_Type), abReadback, sizeof(a_Type), abCompare); \
+                        fReadBackError = true; \
+                    } \
+                } \
+            } while (0)
+
+# undef  CHECK_READBACK_WRITE
+# define CHECK_READBACK_WRITE(a_Instr, a_Worker, a_Type) \
+            CHECK_READBACK_WRITE_RUN(a_Instr, a_Worker, a_Type); \
+            CHECK_STATE(a_Instr)
+# undef  CHECK_READBACK_WRITE_Z
+# define CHECK_READBACK_WRITE_Z(a_Instr, a_Worker, a_Type) \
+            do { \
+                if (fMmioReadback && (!fReadBackError || iLoop == 0)) \
+                { \
+                    Bs3MemZero(&abCompare[0], sizeof(a_Type)); \
+                    off = (unsigned)(iLoop & (VMMDEV_TESTING_READBACK_SIZE / 2 - 1)); \
+                    if (off + sizeof(a_Type) > VMMDEV_TESTING_READBACK_SIZE) \
+                        off = VMMDEV_TESTING_READBACK_SIZE - sizeof(a_Type); \
+                    Bs3MemZero(&MmioReg.pb[off], sizeof(a_Type)); \
+                } \
+                CHECK_READBACK_WRITE(a_Instr, a_Worker, a_Type); \
+            } while (0)
+
+# undef  CHECK_READBACK_READ_RUN
+# define CHECK_READBACK_READ_RUN(a_Instr, a_Worker, a_Type) \
+            do { \
+                off = (unsigned)(iLoop & (VMMDEV_TESTING_READBACK_SIZE / 2 - 1)); \
+                if (off + sizeof(a_Type) > VMMDEV_TESTING_READBACK_SIZE) \
+                    off = VMMDEV_TESTING_READBACK_SIZE - sizeof(a_Type); \
+                a_Worker((a_Type *)&MmioReg.pb[off]); \
+                TMPL_NM(bs3FpuState1_Save)(pChecking); \
+            } while (0)
+# undef  CHECK_READBACK_READ
+# define CHECK_READBACK_READ(a_Instr, a_Worker, a_Type) \
+            CHECK_READBACK_READ_RUN(a_Instr, a_Worker, a_Type); \
+            CHECK_STATE(a_Instr)
+
+
+        /* The tests. */
+        CHECK_READBACK_WRITE_Z(SIDT,     ASMGetIDTR,                         RTIDTR);
+        CHECK_READBACK_WRITE_Z(FNSTENV,  TMPL_NM(bs3FpuState1_FNStEnv),      X86FSTENV32P); /** @todo x86.h is missing types */
+        CHECK_READBACK_WRITE(  MOVDQU,   TMPL_NM(bs3FpuState1_MovDQU_Write), X86XMMREG);
+        CHECK_READBACK_READ(   MOVDQU,   TMPL_NM(bs3FpuState1_MovDQU_Read),  X86XMMREG);
+
+        /* Using the FPU is a little complicated, but we really need to check these things. */
+        CHECK_READBACK_READ_RUN(FMUL,    TMPL_NM(bs3FpuState1_FMul),         uint64_t);
+        pExpected->FOP    = 0x7dc;
+# if ARCH_BITS == 64
+        pExpected->FPUDP  = (uint32_t) (uintptr_t)&MmioReg.pb[off];
+        pExpected->DS     = (uint16_t)((uintptr_t)&MmioReg.pb[off] >> 32);
+        pExpected->Rsrvd2 = (uint16_t)((uintptr_t)&MmioReg.pb[off] >> 48);
+# elif BS3_MODE_IS_RM_OR_V86(TMPL_MODE)
+        pExpected->FPUDP  = Bs3SelPtrToFlat(&MmioReg.pb[off]);
+# else
+        pExpected->FPUDP  = BS3_FP_OFF(&MmioReg.pb[off]);
+# endif
+        CHECK_STATE(FMUL);
+
+        /* check for timeout every now an then. */
+        if (   (iLoop & 0xfff) == 0xfff
+            && g_cBs3PitTicks - uStartTick >= 20 * 20) /* 20 seconds*/
+            break;
+    }
+
+    Bs3PitDisable();
+    return 0;
+}
+# endif
+#endif /* BS3_INSTANTIATING_MODE */
+
Index: /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.mac
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.mac	(revision 61535)
+++ /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1-template.mac	(revision 61535)
@@ -0,0 +1,330 @@
+; $Id$
+;; @file
+; BS3Kit - bs3-fpustate-1, assembly template.
+;
+
+;
+; Copyright (C) 2007-2016 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+; The contents of this file may alternatively be used under the terms
+; of the Common Development and Distribution License Version 1.0
+; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+; VirtualBox OSE distribution, in which case the provisions of the
+; CDDL are applicable instead of those of the GPL.
+;
+; You may elect to license modified versions of this file under the
+; terms and conditions of either the GPL or the CDDL or both.
+;
+
+
+;*********************************************************************************************************************************
+;*  Header Files                                                                                                                 *
+;*********************************************************************************************************************************
+%include "bs3kit-template-header.mac"   ; setup environment
+
+
+;*********************************************************************************************************************************
+;*  External Symbols                                                                                                             *
+;*********************************************************************************************************************************
+TMPL_BEGIN_TEXT
+
+
+;;
+; Initializes the FPU state and saves it to pFxState.
+;
+; BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_InitState)(X86FXSTATE BS3_FAR *pFxState, void *pvMmioReg);
+;
+BS3_PROC_BEGIN_MODE bs3FpuState1_InitState, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 2
+        push    xBP
+        mov     xBP, xSP
+        push    xBX
+TONLY16 push    ds
+        pushf
+TONLY64 sub     xSP, 20h
+
+        ;
+        ; x87 state.
+        ;
+        fninit
+        fld     dword [TMPL_DATA16_WRT(g_r32V1)]
+        fld     qword [TMPL_DATA16_WRT(g_r64V1)]
+        fld     tword [TMPL_DATA16_WRT(g_r80V1)]
+        fld     qword [TMPL_DATA16_WRT(g_r64V1)]
+        fld     dword [TMPL_DATA16_WRT(g_r32V2)]
+        fld     dword [TMPL_DATA16_WRT(g_r80_QNaNMax)]
+        fld     tword [TMPL_DATA16_WRT(g_r80_SNaNMax)]
+        fld     tword [TMPL_DATA16_WRT(g_r80_ThirtyTwo)]
+
+        ;
+        ; We'll later be using FMUL to test actually using the FPU in RC & R0,
+        ; so for everything to line up correctly with FPU CS:IP and FPU DS:DP,
+        ; we'll call the function here too.  This has the benefitial side effect
+        ; of loading correct FPU DS/DS values so we can check that they don't
+        ; get lost either.  Also, we now don't have to guess whether the CPU
+        ; emulation sets CS/DS or not.
+        ;
+TONLY16 push    xPRE [xBP + xCB + cbCurRetAddr + sCB + 2]
+        push    xPRE [xBP + xCB + cbCurRetAddr + sCB]
+        BS3_CALL TMPL_NM(bs3FpuState1_FMul), 1
+        add     xSP, sCB
+
+        ;
+        ; SSE state
+        ;
+        movdqu  xmm0, [TMPL_DATA16_WRT(g_r32_0dot1)]
+        movdqu  xmm1, [TMPL_DATA16_WRT(g_r32_Two)]
+        movdqu  xmm2, [TMPL_DATA16_WRT(g_r32_ThirtyTwo)]
+        movdqu  xmm3, [TMPL_DATA16_WRT(g_r32_ThirtyTwo)]
+        movdqu  xmm4, [TMPL_DATA16_WRT(g_r32_SNaN)]
+        movdqu  xmm5, [TMPL_DATA16_WRT(g_r32_NegQNaN)]
+        movdqu  xmm6, [TMPL_DATA16_WRT(g_r64_Zero)]
+        movdqu  xmm7, [TMPL_DATA16_WRT(g_r64_Two)]
+%if TMPL_BITS == 64
+        movdqu  xmm8, [TMPL_DATA16_WRT(g_r64_Ten)]
+        movdqu  xmm9, [TMPL_DATA16_WRT(g_r64_ThirtyTwo)]
+        movdqu  xmm10, [TMPL_DATA16_WRT(g_r64_Max)]
+        movdqu  xmm11, [TMPL_DATA16_WRT(g_r64_SNaN)]
+        movdqu  xmm12, [TMPL_DATA16_WRT(g_r64_NegQNaN)]
+        movdqu  xmm13, [TMPL_DATA16_WRT(g_r64_QNaNMax)]
+        movdqu  xmm14, [TMPL_DATA16_WRT(g_r64_DnMax)]
+        movdqu  xmm15, [TMPL_DATA16_WRT(g_r80_Eleven)]
+%endif
+
+        ;; @todo status regs
+
+        ;
+        ; Save it.  Note that DS is no longer valid in 16-bit code.
+        ; To be on the safe side, we load and save the state once again.
+        ;
+TONLY16 mov     ds, [xBP + xCB + cbCurRetAddr + 2]
+        mov     xBX, [xBP + xCB + cbCurRetAddr]
+        cli
+%if TMPL_BITS == 64
+        o64 fxsave [xBX]
+        fninit
+        o64 fxrstor [xBX]
+        o64 fxsave [xBX]
+%else
+        fxsave  [xBX]
+        fninit
+        fxrstor [xBX]
+        fxsave  [xBX]
+%endif
+
+.return:
+TONLY64 add     xSP, 20h
+        popf
+TONLY16 pop     ds
+        pop     xBX
+        mov     xSP, xBP
+        pop     xBP
+        BS3_CALL_CONV_EPILOG 2
+        BS3_HYBRID_RET
+BS3_PROC_END_MODE   bs3FpuState1_InitState
+
+
+;;
+; BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_Restore)(X86FXSTATE const BS3_FAR *pFxState);
+;
+BS3_PROC_BEGIN_MODE bs3FpuState1_Restore, BS3_PBC_NEAR
+        push    xBP
+        mov     xBP, xSP
+
+%if TMPL_BITS == 64
+        o64     fxrstor [rcx]
+
+%elif TMPL_BITS == 32
+        mov     eax, [xBP + xCB*2]
+        fxrstor [eax]
+
+%elif TMPL_BITS == 16
+        mov     ax, ds
+        mov     ds, [xBP + xCB + cbCurRetAddr + 2]
+        mov     xBX, [xBP + xCB + cbCurRetAddr]
+        fxrstor [bx]
+        mov     ds, ax
+%else
+ %error TMPL_BITS
+%endif
+
+        mov     xSP, xBP
+        pop     xBP
+        BS3_HYBRID_RET
+BS3_PROC_END_MODE   bs3FpuState1_Restore
+
+;;
+; BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_Save)(X86FXSTATE BS3_FAR *pFxState);
+;
+BS3_PROC_BEGIN_MODE bs3FpuState1_Save, BS3_PBC_NEAR
+        push    xBP
+        mov     xBP, xSP
+
+%if TMPL_BITS == 64
+        o64     fxsave [rcx]
+
+%elif TMPL_BITS == 32
+        mov     eax, [xBP + xCB*2]
+        fxsave  [eax]
+
+%elif TMPL_BITS == 16
+        push    bx
+        push    ds
+        mov     ds, [xBP + xCB + cbCurRetAddr + 2]
+        mov     bx, [xBP + xCB + cbCurRetAddr]
+        fxsave  [bx]
+        pop     ds
+        pop     bx
+%else
+ %error TMPL_BITS
+%endif
+
+        mov     xSP, xBP
+        pop     xBP
+        BS3_HYBRID_RET
+BS3_PROC_END_MODE   bs3FpuState1_Save
+
+
+;;
+; Performs a MOVDQU write on the specified memory.
+;
+; BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_MovDQU_Write)(void *pvMmioReg);
+;
+BS3_PROC_BEGIN_MODE bs3FpuState1_MovDQU_Write, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 1
+        push    xBP
+        mov     xBP, xSP
+        push    xBX
+TONLY16 push    ds
+
+        ; Load the register pointer.
+        mov     xBX, [xBP + xCB + cbCurRetAddr]
+TONLY16 mov     ds,  [xBP + xCB + cbCurRetAddr + 2]
+
+        ; Do read.
+        movdqu  [xBX], xmm3
+
+TONLY16 pop     ds
+        pop     xBX
+        leave
+        BS3_CALL_CONV_EPILOG 1
+        BS3_HYBRID_RET
+BS3_PROC_END_MODE   bs3FpuState1_MovDQU_Write
+
+
+;;
+; Performs a MOVDQU write to the specified memory.
+;
+; BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_MovDQU_Read)(void *pvMmioReg);
+;
+BS3_PROC_BEGIN_MODE bs3FpuState1_MovDQU_Read, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 1
+        push    xBP
+        mov     xBP, xSP
+        push    xBX
+TONLY16 push    ds
+        sub     xSP, 20h
+%if TMPL_BITS == 16
+        movdqu  [xBP - xCB - xCB - 2 - 18h], xmm2
+%else
+        movdqu  [xSP], xmm2
+%endif
+
+        ; Load the register pointer.
+        mov     xBX, [xBP + xCB + cbCurRetAddr]
+TONLY16 mov     ds,  [xBP + xCB + cbCurRetAddr + 2]
+
+
+        ; Do read.
+        movdqu  xmm2, [xBX]
+
+%if TMPL_BITS == 16
+        movdqu  xmm2, [xBP - xCB - xCB - 2 - 18h]
+%else
+        movdqu  xmm2, [xSP]
+%endif
+        add     xSP, 20h
+TONLY16 pop     ds
+        pop     xBX
+        mov     xSP, xBP
+        pop     xBP
+        BS3_CALL_CONV_EPILOG 1
+        BS3_HYBRID_RET
+BS3_PROC_END_MODE   bs3FpuState1_MovDQU_Read
+
+
+;;
+; Performs a FNSTENV write on the specified memory.
+;
+; BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_FNStEnv)(void *pvMmioReg);
+;
+BS3_PROC_BEGIN_MODE bs3FpuState1_FNStEnv, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 1
+        push    xBP
+        mov     xBP, xSP
+        push    xBX
+TONLY16 push    ds
+
+        ; Load the register pointer.
+        mov     xBX, [xBP + xCB + cbCurRetAddr]
+TONLY16 mov     ds,  [xBP + xCB + cbCurRetAddr + 2]
+
+        ; Just write.
+        fnstenv [xBX]
+
+TONLY16 pop     ds
+        pop     xBX
+        mov     xSP, xBP
+        pop     xBP
+        BS3_CALL_CONV_EPILOG 1
+        BS3_HYBRID_RET
+BS3_PROC_END_MODE   bs3FpuState1_FNStEnv
+
+
+;;
+; Performs a FMUL on the specified memory, after writing a 64-bit value to it first.
+;
+; BS3_DECL_NEAR(void) TMPL_NM(bs3FpuState1_FMul)(void *pvMmioReg);
+;
+BS3_PROC_BEGIN_MODE bs3FpuState1_FMul, BS3_PBC_NEAR
+        BS3_CALL_CONV_PROLOG 1
+        push    xBP
+        mov     xBP, xSP
+        push    xBX
+TONLY16 push    ds
+
+        ; Load the value we'll be multiplying with into register(s) while ds is DATA16.
+        mov     sAX, [TMPL_DATA16_WRT(g_r64_One)]
+TNOT64  mov     edx, [4 + TMPL_DATA16_WRT(g_r64_One)]
+
+        ; Load the register pointer.
+        mov     xBX, [xBP + xCB + cbCurRetAddr]
+TONLY16 mov     ds,  [xBP + xCB + cbCurRetAddr + 2]
+
+        ; Just write.
+        mov     [xBX], sAX
+TNOT64  mov     [xBX + 4], edx
+        call    .do_it
+
+TONLY16 pop     ds
+        pop     xBX
+        mov     xSP, xBP
+        pop     xBP
+        BS3_CALL_CONV_EPILOG 1
+        BS3_HYBRID_RET
+.do_it:
+        fmul    qword [xBX]
+        ret
+BS3_PROC_END_MODE   bs3FpuState1_FMul
+
+
+%include "bs3kit-template-footer.mac"   ; reset environment
+
Index: /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1.c32
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1.c32	(revision 61535)
+++ /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-decoding-1.c32	(revision 61535)
@@ -0,0 +1,45 @@
+/* $Id$ */
+/** @file
+ * BS3Kit - bs3-fpustate-1, 16-bit C code.
+ */
+
+/*
+ * Copyright (C) 2007-2016 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <bs3kit.h>
+#include <iprt/asm-amd64-x86.h>
+
+
+BS3_DECL(void) Main_pe32()
+{
+    Bs3TestInit("bs3-cpu-decoding-1");
+    Bs3TestPrintf("g_uBs3CpuDetected=%#x\n", g_uBs3CpuDetected);
+
+//    Bs3TestDoModes_rm(g_aModeTest, RT_ELEMENTS(g_aModeTest));
+
+    Bs3TestTerm();
+}
+
+
