Index: /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.c
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.c	(revision 60678)
+++ /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.c	(revision 60679)
@@ -73,4 +73,5 @@
 #endif
 
+
 /*********************************************************************************************************************************
 *   Structures and Typedefs                                                                                                      *
@@ -121,10 +122,13 @@
 extern FNBS3FAR     bs3CpuBasic2_sgdt_bx_ud2_c32;
 extern FNBS3FAR     bs3CpuBasic2_sgdt_bx_ud2_c64;
+extern FNBS3FAR     bs3CpuBasic2_sgdt_ss_bx_ud2_c16;
+extern FNBS3FAR     bs3CpuBasic2_sgdt_ss_bx_ud2_c32;
+extern FNBS3FAR     bs3CpuBasic2_sgdt_rexw_bx_ud2_c64;
 extern FNBS3FAR     bs3CpuBasic2_sgdt_opsize_bx_ud2_c16;
 extern FNBS3FAR     bs3CpuBasic2_sgdt_opsize_bx_ud2_c32;
 extern FNBS3FAR     bs3CpuBasic2_sgdt_opsize_bx_ud2_c64;
-extern FNBS3FAR     bs3CpuBasic2_lidt_bx_ud2_c16;
-extern FNBS3FAR     bs3CpuBasic2_lidt_bx_ud2_c32;
-extern FNBS3FAR     bs3CpuBasic2_lidt_bx_ud2_c64;
+extern FNBS3FAR     bs3CpuBasic2_sgdt_opsize_ss_bx_ud2_c16;
+extern FNBS3FAR     bs3CpuBasic2_sgdt_opsize_ss_bx_ud2_c32;
+extern FNBS3FAR     bs3CpuBasic2_sgdt_opsize_rexw_bx_ud2_c64;
 #endif
 
@@ -145,11 +149,11 @@
 {
     { bs3CpuBasic2_sidt_bx_ud2_c16,             3, false,   BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
-//    { bs3CpuBasic2_sidt_ss_bx_ud2_c16,          4, true,    BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
+    { bs3CpuBasic2_sidt_ss_bx_ud2_c16,          4, true,    BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
     { bs3CpuBasic2_sidt_opsize_bx_ud2_c16,      4, false,   BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
-//    { bs3CpuBasic2_sidt_opsize_ss_bx_ud2_c16,   5, true,    BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
+    { bs3CpuBasic2_sidt_opsize_ss_bx_ud2_c16,   5, true,    BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
     { bs3CpuBasic2_sidt_bx_ud2_c32,             3, false,   BS3_MODE_CODE_32 },
-//    { bs3CpuBasic2_sidt_ss_bx_ud2_c32,          4, true,    BS3_MODE_CODE_32 },
+    { bs3CpuBasic2_sidt_ss_bx_ud2_c32,          4, true,    BS3_MODE_CODE_32 },
     { bs3CpuBasic2_sidt_opsize_bx_ud2_c32,      4, false,   BS3_MODE_CODE_32 },
-//    { bs3CpuBasic2_sidt_opsize_ss_bx_ud2_c32,   5, true,    BS3_MODE_CODE_32 },
+    { bs3CpuBasic2_sidt_opsize_ss_bx_ud2_c32,   5, true,    BS3_MODE_CODE_32 },
     { bs3CpuBasic2_sidt_bx_ud2_c64,             3, false,   BS3_MODE_CODE_64 },
     { bs3CpuBasic2_sidt_rexw_bx_ud2_c64,        4, false,   BS3_MODE_CODE_64 },
@@ -158,19 +162,20 @@
 };
 
-#if 0
-static BS3CB2SIDTSGDT const g_aSgdtNormal[3] =
-{
-    { bs3CpuBasic2_sgdt_bx_ud2_c16, bs3CpuBasic2_sgdt_ss_bx_ud2_c16, BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
-    { bs3CpuBasic2_sgdt_bx_ud2_c32, bs3CpuBasic2_sgdt_ss_bx_ud2_c32, BS3_MODE_CODE_32 },
-    { bs3CpuBasic2_sgdt_bx_ud2_c64, bs3CpuBasic2_sgdt_rexw_bx_ud2_c64, BS3_MODE_CODE_64 },
+
+static BS3CB2SIDTSGDT const g_aSgdtWorkers[] =
+{
+    { bs3CpuBasic2_sgdt_bx_ud2_c16,             3, false,   BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
+    { bs3CpuBasic2_sgdt_ss_bx_ud2_c16,          4, true,    BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
+    { bs3CpuBasic2_sgdt_opsize_bx_ud2_c16,      4, false,   BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
+    { bs3CpuBasic2_sgdt_opsize_ss_bx_ud2_c16,   5, true,    BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
+    { bs3CpuBasic2_sgdt_bx_ud2_c32,             3, false,   BS3_MODE_CODE_32 },
+    { bs3CpuBasic2_sgdt_ss_bx_ud2_c32,          4, true,    BS3_MODE_CODE_32 },
+    { bs3CpuBasic2_sgdt_opsize_bx_ud2_c32,      4, false,   BS3_MODE_CODE_32 },
+    { bs3CpuBasic2_sgdt_opsize_ss_bx_ud2_c32,   5, true,    BS3_MODE_CODE_32 },
+    { bs3CpuBasic2_sgdt_bx_ud2_c64,             3, false,   BS3_MODE_CODE_64 },
+    { bs3CpuBasic2_sgdt_rexw_bx_ud2_c64,        4, false,   BS3_MODE_CODE_64 },
+    { bs3CpuBasic2_sgdt_opsize_bx_ud2_c64,      4, false,   BS3_MODE_CODE_64 },
+    { bs3CpuBasic2_sgdt_opsize_rexw_bx_ud2_c64, 5, false,   BS3_MODE_CODE_64 },
 };
-
-static BS3CB2SIDTSGDT const g_aSgdtOpSize[3] =
-{
-    { bs3CpuBasic2_sgdt_opsize_bx_ud2_c16, bs3CpuBasic2_sgdt_ss_bx_ud2_c16, BS3_MODE_CODE_16 | BS3_MODE_CODE_V86 },
-    { bs3CpuBasic2_sgdt_opsize_bx_ud2_c32, bs3CpuBasic2_sgdt_ss_bx_ud2_c32, BS3_MODE_CODE_32 },
-    { bs3CpuBasic2_sgdt_opsize_bx_ud2_c64, bs3CpuBasic2_sgdt_rexw_bx_ud2_c64, BS3_MODE_CODE_64 },
-};
-#endif
 
 
@@ -1388,5 +1393,4 @@
     int                 off;
     unsigned            cb;
-    uint8_t             bDpl;
     uint8_t BS3_FAR    *pbTest;
 Bs3TestPrintf("bs3CpuBasic2_sidt_sgdt_One: %p bTestMode=%#x bRing=%d\n",  pWorker, bTestMode, bRing);
@@ -1402,5 +1406,5 @@
        at our SIDT [xBX] + UD2 combo, and point DS:xBX at abBuf. */
     Bs3RegCtxSaveEx(&Ctx, bTestMode, 256 /*cbExtraStack*/);
-    Bs3RegCtxSetGrpDsFromCurPtr(&Ctx, &Ctx.rbx, abBuf);
+    Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, pWorker->fSs ? &Ctx.ss : &Ctx.ds, abBuf);
     Bs3RegCtxSetRipCsFromLnkPtr(&Ctx, pWorker->fpfnWorker);
     if (BS3_MODE_IS_16BIT_SYS(bTestMode))
@@ -1458,5 +1462,5 @@
     {
         pbBuf = &abBuf[off];
-        Bs3RegCtxSetGrpDsFromCurPtr(&Ctx, &Ctx.rbx, &abBuf[off]);
+        Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, pWorker->fSs ? &Ctx.ss : &Ctx.ds, &abBuf[off]);
         CtxUdExpected.rbx.u = Ctx.rbx.u;
 
@@ -1497,5 +1501,5 @@
     }
     pbBuf = abBuf;
-    Bs3RegCtxSetGrpDsFromCurPtr(&Ctx, &Ctx.rbx, abBuf);
+    Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, pWorker->fSs ? &Ctx.ss : &Ctx.ds, abBuf);
     CtxUdExpected.rbx.u = Ctx.rbx.u;
 
@@ -1515,5 +1519,8 @@
         Bs3GdteTestPage00.Gen.u8BaseHigh2 = (uint8_t)(uFlatBuf >> 24);
 
-        CtxUdExpected.ds = Ctx.ds = BS3_SEL_TEST_PAGE_00 | bRing;
+        if (pWorker->fSs)
+            CtxUdExpected.ss = Ctx.ss = BS3_SEL_TEST_PAGE_00 | bRing;
+        else
+            CtxUdExpected.ds = Ctx.ds = BS3_SEL_TEST_PAGE_00 | bRing;
 
         /* Expand up (normal). */
@@ -1539,5 +1546,8 @@
                 else
                 {
-                    bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
+                    if (pWorker->fSs)
+                        bs3CpuBasic2_CompareSsCtx(&TrapCtx, &Ctx, 0, false /*f486ResumeFlagHint*/);
+                    else
+                        bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
                     if (off + 2 <= cbLimit + 1)
                     {
@@ -1596,5 +1606,8 @@
                 else
                 {
-                    bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
+                    if (pWorker->fSs)
+                        bs3CpuBasic2_CompareSsCtx(&TrapCtx, &Ctx, 0, false /*f486ResumeFlagHint*/);
+                    else
+                        bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
                     if (!ASMMemIsAllU8(abBuf, sizeof(abBuf), bFiller))
                         Bs3TestFailedF("Bytes touched on #GP: cbIdtr=%u off=%u cbLimit=%u bFiller=%#x abBuf=%.*Rhxs\n",
@@ -1613,5 +1626,8 @@
         }
 
-        Bs3RegCtxSetGrpDsFromCurPtr(&Ctx, &Ctx.rbx, pbBuf);
+        Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, pWorker->fSs ? &Ctx.ss : &Ctx.ds, abBuf);
+        CtxUdExpected.rbx.u = Ctx.rbx.u;
+        CtxUdExpected.ss = Ctx.ss;
+        CtxUdExpected.ds = Ctx.ds;
     }
 
@@ -1620,7 +1636,9 @@
      */
     if (   BS3_MODE_IS_PAGED(bTestMode)
+        && (!pWorker->fSs || bRing == 3) /* SS.DPL == CPL, we'll get some tiled ring-3 selector here.  */
         && (pbTest = (uint8_t BS3_FAR *)Bs3MemGuardedTestPageAlloc(BS3MEMKIND_TILED)) != NULL)
     {
         RTCCUINTXREG uFlatTest = Bs3SelPtrToFlat(pbTest);
+Bs3TestPrintf("g_usBs3TestStep=%u line=%d\n", g_usBs3TestStep, __LINE__);
 
         /*
@@ -1631,9 +1649,12 @@
         {
             Bs3MemSet(&pbTest[X86_PAGE_SIZE - cbIdtr * 2], bFiller, cbIdtr * 2);
-            Bs3RegCtxSetGrpDsFromCurPtr(&Ctx, &Ctx.rbx, &pbTest[off]);
+            Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, pWorker->fSs ? &Ctx.ss : &Ctx.ds, &pbTest[off]);
+if (pWorker->fSs)
+    Bs3RegCtxPrint(&Ctx);
             Bs3TrapSetJmpAndRestore(&Ctx, &TrapCtx);
             if (off + cbIdtr <= X86_PAGE_SIZE)
             {
                 CtxUdExpected.rbx = Ctx.rbx;
+                CtxUdExpected.ss  = Ctx.ss;
                 CtxUdExpected.ds  = Ctx.ds;
                 bs3CpuBasic2_CompareUdCtx(&TrapCtx, &CtxUdExpected);
@@ -1656,5 +1677,7 @@
                     Bs3TestPrintf("Wrote partial limit on #PF (#10): Expected %02x, got %02x\n", bFiller, pbTest[off]);
             }
-        }
+            g_usBs3TestStep++;
+        }
+Bs3TestPrintf("g_usBs3TestStep=%u line=%d\n", g_usBs3TestStep, __LINE__);
 
         /*
@@ -1665,9 +1688,10 @@
         {
             Bs3MemSet(pbTest, bFiller, 32);
-            Bs3RegCtxSetGrpDsFromCurPtr(&Ctx, &Ctx.rbx, &pbTest[off]);
+            Bs3RegCtxSetGrpSegFromCurPtr(&Ctx, &Ctx.rbx, pWorker->fSs ? &Ctx.ss : &Ctx.ds, &pbTest[off]);
             Bs3TrapSetJmpAndRestore(&Ctx, &TrapCtx);
             if (off >= 0)
             {
                 CtxUdExpected.rbx = Ctx.rbx;
+                CtxUdExpected.ss  = Ctx.ss;
                 CtxUdExpected.ds  = Ctx.ds;
                 bs3CpuBasic2_CompareUdCtx(&TrapCtx, &CtxUdExpected);
@@ -1686,4 +1710,5 @@
                 Bs3TestPrintf("Wrote beyond expected area (#13): bFiller=%#x, found %.16Rhxs; off=%d\n",
                               bFiller, &pbTest[RT_MAX(cbIdtr + off, 0)], off);
+            g_usBs3TestStep++;
         }
 
@@ -1704,5 +1729,8 @@
             Bs3GdteTestPage00.Gen.u8BaseHigh2 = (uint8_t)(uFlatTest >> 24);
 
-            CtxUdExpected.ds = Ctx.ds = BS3_SEL_TEST_PAGE_00 | bRing;
+            if (pWorker->fSs)
+                CtxUdExpected.ss = Ctx.ss = BS3_SEL_TEST_PAGE_00 | bRing;
+            else
+                CtxUdExpected.ds = Ctx.ds = BS3_SEL_TEST_PAGE_00 | bRing;
 
             /* Expand up (normal), approaching tail guard page. */
@@ -1747,5 +1775,8 @@
                         if (off <= X86_PAGE_SIZE - 2)
                         {
-                            bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
+                            if (pWorker->fSs)
+                                bs3CpuBasic2_CompareSsCtx(&TrapCtx, &Ctx, 0, false /*f486ResumeFlagHint*/);
+                            else
+                                bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
                             if (Bs3MemCmp(&pbTest[off], pabExpected, 2) != 0)
                                 Bs3TestPrintf("Mismatch (#16): Expected limit %.2Rhxs, got %.2Rhxs; off=%#x\n",
@@ -1769,6 +1800,9 @@
                     else
                     {
-                        /* #GP on limit. */
-                        bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
+                        /* #GP/#SS on limit. */
+                        if (pWorker->fSs)
+                            bs3CpuBasic2_CompareSsCtx(&TrapCtx, &Ctx, 0, false /*f486ResumeFlagHint*/);
+                        else
+                            bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
                         if (   off < X86_PAGE_SIZE
                             && !ASMMemIsAllU8(&pbTest[off], X86_PAGE_SIZE - off, bFiller))
@@ -1785,9 +1819,12 @@
 
                     /* Set DS to 0 and check that we get #GP(0). */
-                    Ctx.ds = 0;
-                    Bs3TrapSetJmpAndRestore(&Ctx, &TrapCtx);
-                    bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
-                    g_usBs3TestStep++;
-                    Ctx.ds = BS3_SEL_TEST_PAGE_00 | bRing;
+                    if (!pWorker->fSs)
+                    {
+                        Ctx.ds = 0;
+                        Bs3TrapSetJmpAndRestore(&Ctx, &TrapCtx);
+                        bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
+                        Ctx.ds = BS3_SEL_TEST_PAGE_00 | bRing;
+                        g_usBs3TestStep++;
+                    }
                 }
             }
@@ -1826,4 +1863,6 @@
                             bs3CpuBasic2_ComparePfCtx(&TrapCtx, &Ctx, X86_TRAP_PF_RW | (Ctx.bCpl == 3 ? X86_TRAP_PF_US : 0),
                                                       uFlatTest + off);
+                        else if (pWorker->fSs)
+                            bs3CpuBasic2_CompareSsCtx(&TrapCtx, &Ctx, 0, false /*f486ResumeFlagHint*/);
                         else
                             bs3CpuBasic2_CompareGpCtx(&TrapCtx, &Ctx, 0);
@@ -1858,11 +1897,14 @@
 {
     unsigned idx;
+    unsigned bRing;
     unsigned iStep = 0;
-    unsigned bRing = 0;
-
+
+    /* Note! We skip the SS checks for ring-0 since we badly mess up SS in the
+             test and don't want to bother with double faults. */
     for (bRing = 0; bRing <= 3; bRing++)
     {
         for (idx = 0; idx < cWorkers; idx++)
-            if (paWorkers[idx].bMode & (bTestMode & BS3_MODE_CODE_MASK))
+            if (    (paWorkers[idx].bMode & (bTestMode & BS3_MODE_CODE_MASK))
+                && (!paWorkers[idx].fSs || bRing != 0))
             {
                 g_usBs3TestStep = iStep;
@@ -2155,4 +2197,33 @@
 }
 
+
+BS3_DECL_FAR(uint8_t) TMPL_NM(bs3CpuBasic2_gidt)(uint8_t bMode)
+{
+    union
+    {
+        RTGDTR  Gdtr;
+        uint8_t ab[16];
+    } Expected;
+
+    g_pszTestMode = TMPL_NM(g_szBs3ModeName);
+    g_bTestMode   = bMode;
+    g_f16BitSys   = BS3_MODE_IS_16BIT_SYS(TMPL_MODE);
+
+    BS3_ASSERT(bMode == TMPL_MODE);
+
+    /*
+     * Pass to common worker which is only compiled once per mode.
+     */
+    Bs3MemZero(&Expected, sizeof(Expected));
+    ASMGetGDTR(&Expected.Gdtr);
+    bs3CpuBasic2_sidt_sgdt_Common(bMode, g_aSgdtWorkers, RT_ELEMENTS(g_aSgdtWorkers), Expected.ab);
+
+    /*
+     * Re-initialize the IDT.
+     */
+    Bs3TrapInit();
+    return 0;
+}
+
 #endif /* BS3_INSTANTIATING_MODE */
 
Index: /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.mac
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.mac	(revision 60678)
+++ /trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-basic-2-template.mac	(revision 60679)
@@ -172,4 +172,13 @@
 AssertCompile(.again - BS3_CMN_NM(bs3CpuBasic2_sgdt_rexw_bx_ud2) == 4)
 BS3_PROC_END_CMN   bs3CpuBasic2_sgdt_rexw_bx_ud2
+
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_sgdt_opsize_rexw_bx_ud2, BS3_PBC_NEAR
+        db      X86_OP_PRF_SIZE_OP
+        db      X86_OP_REX_W
+        sgdt    [xBX]
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_CMN_NM(bs3CpuBasic2_sgdt_opsize_rexw_bx_ud2) == 5)
+BS3_PROC_END_CMN   bs3CpuBasic2_sgdt_opsize_rexw_bx_ud2
 %endif
 
@@ -181,4 +190,12 @@
 AssertCompile(.again - BS3_CMN_NM(bs3CpuBasic2_sgdt_ss_bx_ud2) == 4)
 BS3_PROC_END_CMN   bs3CpuBasic2_sgdt_ss_bx_ud2
+
+BS3_PROC_BEGIN_CMN bs3CpuBasic2_sgdt_opsize_ss_bx_ud2, BS3_PBC_NEAR
+        db      X86_OP_PRF_SIZE_OP
+        sgdt    [ss:xBX]
+.again: ud2
+        jmp     .again
+AssertCompile(.again - BS3_CMN_NM(bs3CpuBasic2_sgdt_opsize_ss_bx_ud2) == 5)
+BS3_PROC_END_CMN   bs3CpuBasic2_sgdt_opsize_ss_bx_ud2
 %endif
 
Index: /trunk/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm
===================================================================
--- /trunk/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm	(revision 60678)
+++ /trunk/src/VBox/ValidationKit/bootsectors/bs3kit/bs3-cmn-SelFlatDataToProtFar16.asm	(revision 60679)
@@ -56,6 +56,39 @@
 
         ;
+        ; Check if we can use the protected mode stack or data selector.
+        ; The latter ensures the usability of this function for setting SS.
+        ;
+%if TMPL_BITS == 16
+        mov     ax, [xBP + xCB + cbCurRetAddr]
+        mov     dx, [xBP + xCB + cbCurRetAddr + 2]
+        test    dx, dx
+        jnz     .not_stack
+        mov     dx, BS3_SEL_R0_SS16
+%else
+        mov     eax, [xBP + xCB + cbCurRetAddr]
+        test    eax, 0ffff0000h
+        jnz     .not_stack
+        or      eax, BS3_SEL_R0_SS16 << 16
+%endif
+        jmp     .return
+
+.not_stack:
+%if TMPL_BITS == 16
+        sub     ax, BS3_ADDR_BS3DATA16 & 0xffff
+        sbb     dx, BS3_ADDR_BS3DATA16 >> 16
+        jnz     .do_tiled
+        mov     dx, BS3_SEL_R0_DS16
+%else
+        sub     eax, BS3_ADDR_BS3DATA16
+        test    eax, 0ffff0000h
+        jnz     .do_tiled
+        or      eax, BS3_SEL_R0_DS16 << 16
+%endif
+        jmp     .return
+
+        ;
         ; Just translate the address to tiled.
         ;
+.do_tiled:
 %if TMPL_BITS == 16
         ; Convert upper 16-bit to a tiled selector.
@@ -95,4 +128,5 @@
 %endif
 
+.return:
         pop     xBP
         BS3_HYBRID_RET
