Index: /trunk/include/VBox/vmm/cpum.h
===================================================================
--- /trunk/include/VBox/vmm/cpum.h	(revision 61347)
+++ /trunk/include/VBox/vmm/cpum.h	(revision 61348)
@@ -1487,4 +1487,5 @@
 VMMR0_INT_DECL(int)     CPUMR0InitVM(PVM pVM);
 DECLASM(void)           CPUMR0RegisterVCpuThread(PVMCPU pVCpu);
+DECLASM(void)           CPUMR0TouchHostFpu(void);
 VMMR0_INT_DECL(int)     CPUMR0Trap07Handler(PVM pVM, PVMCPU pVCpu);
 VMMR0_INT_DECL(int)     CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu);
Index: /trunk/src/VBox/VMM/Makefile.kmk
===================================================================
--- /trunk/src/VBox/VMM/Makefile.kmk	(revision 61347)
+++ /trunk/src/VBox/VMM/Makefile.kmk	(revision 61348)
@@ -568,4 +568,7 @@
   VMMR0_DEFS    += VMM_R0_SWITCH_STACK
  endif
+ if1of ($(KBUILD_TARGET), darwin linux win)
+  VMMR0_DEFS    += VMM_R0_TOUCH_FPU
+ endif
  VMMR0_DEFS.darwin = VMM_R0_SWITCH_STACK
  VMMR0_DEFS.win.amd64  = VBOX_WITH_KERNEL_USING_XMM
Index: /trunk/src/VBox/VMM/VMMR0/CPUMR0A.asm
===================================================================
--- /trunk/src/VBox/VMM/VMMR0/CPUMR0A.asm	(revision 61347)
+++ /trunk/src/VBox/VMM/VMMR0/CPUMR0A.asm	(revision 61348)
@@ -49,6 +49,6 @@
 SEH64_END_PROLOGUE
 
-%ifdef CPUM_CAN_USE_FPU_IN_R0
-        movaps  xmm0, xmm0
+%ifdef VMM_R0_TOUCH_FPU
+        movdqa  xmm0, xmm0              ; hope this is harmless.
 %endif
 
@@ -60,4 +60,26 @@
 
 
+%ifdef VMM_R0_TOUCH_FPU
+;;
+; Touches the host FPU state.
+;
+; @uses nothing (well, maybe cr0)
+;
+ALIGNCODE(16)
+BEGINPROC CPUMR0TouchHostFpu
+        push    xBP
+        SEH64_PUSH_xBP
+        mov     xBP, xSP
+        SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+        movdqa  xmm0, xmm0              ; Hope this is harmless.
+
+        leave
+        ret
+ENDPROC   CPUMR0TouchHostFpu
+%endif ; VMM_R0_TOUCH_FPU
+
+
 ;;
 ; Saves the host FPU/SSE/AVX state and restores the guest FPU/SSE/AVX state.
@@ -66,5 +88,5 @@
 ; @param    pCpumCpu  x86:[ebp+8] gcc:rdi msc:rcx     CPUMCPU pointer
 ;
-align 16
+ALIGNCODE(16)
 BEGINPROC cpumR0SaveHostRestoreGuestFPUState
         push    xBP
@@ -102,13 +124,5 @@
         jnz     .already_saved_host
 
-%ifndef CPUM_CAN_USE_FPU_IN_R0
-        ; On systems where the kernel doesn't necessarily allow us to use the FPU
-        ; in ring-0 context, we have to disable FPU traps before doing fxsave/xsave
-        ; here.  (xCX is 0 if no CR0 was necessary.)  We leave it like that so IEM
-        ; can use the FPU/SSE/AVX host CPU features directly.
-        SAVE_CR0_CLEAR_FPU_TRAPS xCX, xAX
-        mov     [pCpumCpu + CPUMCPU.Host.cr0Fpu], xCX
-        ;; @todo What about XCR0?
-%endif
+        CPUMRZ_TOUCH_FPU_CLEAR_CR0_FPU_TRAPS_SET_RC xCX, xAX, pCpumCpu ; xCX is the return value for VT-x; xAX is scratch.
 
         CPUMR0_SAVE_HOST
@@ -157,9 +171,5 @@
         popf
 
-%ifndef CPUM_CAN_USE_FPU_IN_R0
-        test    ecx, ecx
-        jnz     .modified_cr0
-%endif
-        xor     eax, eax
+        mov     eax, ecx
 .return:
 %ifdef RT_ARCH_X86
@@ -169,10 +179,4 @@
         leave
         ret
-
-%ifndef CPUM_CAN_USE_FPU_IN_R0
-.modified_cr0:
-        mov     eax, VINF_CPUM_HOST_CR0_MODIFIED
-        jmp     .return
-%endif
 ENDPROC   cpumR0SaveHostRestoreGuestFPUState
 
@@ -183,5 +187,5 @@
 ; @param    pCpumCpu  x86:[ebp+8] gcc:rdi msc:rcx     CPUMCPU pointer
 ;
-align 16
+ALIGNCODE(16)
 BEGINPROC cpumR0SaveGuestRestoreHostFPUState
         push    xBP
@@ -264,10 +268,8 @@
         CPUMR0_LOAD_HOST
 
-%ifndef CPUM_CAN_USE_FPU_IN_R0
         ; Restore the CR0 value we saved in cpumR0SaveHostRestoreGuestFPUState or
         ; in cpumRZSaveHostFPUState.
         mov     xCX, [pCpumCpu + CPUMCPU.Host.cr0Fpu]
-        RESTORE_CR0 xCX
-%endif
+        CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET xCX
         and     dword [pCpumCpu + CPUMCPU.fUseFlags], ~(CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST)
 
@@ -291,5 +293,5 @@
 ; @param    pCpumCpu  x86:[ebp+8] gcc:rdi msc:rcx     CPUMCPU pointer
 ;
-align 16
+ALIGNCODE(16)
 BEGINPROC cpumR0RestoreHostFPUState
         ;
@@ -312,11 +314,10 @@
         CPUMR0_LOAD_HOST
 
-%ifndef CPUM_CAN_USE_FPU_IN_R0
         ; Restore the CR0 value we saved in cpumR0SaveHostRestoreGuestFPUState or
         ; in cpumRZSaveHostFPUState.
         ;; @todo What about XCR0?
         mov     xCX, [pCpumCpu + CPUMCPU.Host.cr0Fpu]
-        RESTORE_CR0 xCX
-%endif
+        CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET xCX
+
         and     dword [pCpumCpu + CPUMCPU.fUseFlags], ~CPUM_USED_FPU_HOST
         popf
Index: /trunk/src/VBox/VMM/VMMR0/VMMR0.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR0/VMMR0.cpp	(revision 61347)
+++ /trunk/src/VBox/VMM/VMMR0/VMMR0.cpp	(revision 61348)
@@ -1126,4 +1126,12 @@
 #endif
 
+#ifdef VMM_R0_TOUCH_FPU
+                /*
+                 * Make sure we've got the FPU state loaded so and we don't need to clear
+                 * CR0.TS and get out of sync with the host kernel when loading the guest
+                 * FPU state.  @ref sec_cpum_fpu (CPUM.cpp) and @bugref{4053}.
+                 */
+                CPUMR0TouchHostFpu();
+#endif
                 int  rc;
                 bool fPreemptRestored = false;
Index: /trunk/src/VBox/VMM/VMMRZ/CPUMRZA.asm
===================================================================
--- /trunk/src/VBox/VMM/VMMRZ/CPUMRZA.asm	(revision 61347)
+++ /trunk/src/VBox/VMM/VMMRZ/CPUMRZA.asm	(revision 61348)
@@ -72,17 +72,10 @@
         cli                             ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
 
-%ifndef CPUM_CAN_USE_FPU_IN_R0
-        ;
-        ; In raw-mode context and on systems where the kernel doesn't necessarily
-        ; allow us to use the FPU in ring-0 context, we have to disable FPU traps
-        ; before doing fxsave/xsave here.  (xCX is 0 if no CR0 was necessary.)  We
-        ; leave it like that so IEM can use the FPU/SSE/AVX host CPU features directly.
-        ;
-        SAVE_CR0_CLEAR_FPU_TRAPS xCX, xAX               ; xCX must be preserved!
-        ;; @todo What about XCR0?
- %ifdef IN_RING0
-        mov     [pCpumCpu + CPUMCPU.Host.cr0Fpu], xCX
- %endif
-%endif
+        ;
+        ; We may have to update CR0, indirectly or directly.  We must report any
+        ; changes to the VT-x code.
+        ;
+        CPUMRZ_TOUCH_FPU_CLEAR_CR0_FPU_TRAPS_SET_RC xCX, xAX, pCpumCpu ; xCX is the return value (xAX scratch)
+
         ;
         ; Save the host state (xsave/fxsave will cause thread FPU state to be
@@ -94,12 +87,5 @@
         popf
 
-%ifndef CPUM_CAN_USE_FPU_IN_R0
-        ; Figure the return code.
-        test    ecx, ecx
-        jnz     .modified_cr0
-%endif
-        xor     eax, eax
-.return:
-
+        mov     eax, ecx                ; The return value from above.
 %ifdef RT_ARCH_X86
         pop     esi
@@ -108,10 +94,4 @@
         leave
         ret
-
-%ifndef CPUM_CAN_USE_FPU_IN_R0
-.modified_cr0:
-        mov     eax, VINF_CPUM_HOST_CR0_MODIFIED
-        jmp     .return
-%endif
 %undef pCpumCpu
 %undef pXState
@@ -156,5 +136,11 @@
 
  %ifdef IN_RC
-        SAVE_CR0_CLEAR_FPU_TRAPS xCX, xAX ; xCX must be preserved until CR0 is restored!
+        mov     ecx, cr0                ; ecx = saved cr0
+        test    ecx, X86_CR0_TS | X86_CR0_EM
+        jz      .skip_cr0_write
+        mov     eax, ecx
+        and     eax, ~(X86_CR0_TS | X86_CR0_EM)
+        mov     cr0, ecx
+.skip_cr0_write:
  %endif
 
@@ -221,5 +207,5 @@
         test    byte [ebp + 0ch], 1     ; fLeaveFpuAccessible
         jz      .no_cr0_restore
-        RESTORE_CR0 xCX
+        CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET ecx
 .no_cr0_restore:
  %endif
@@ -272,5 +258,10 @@
  %ifdef IN_RC
         ; Temporarily grant access to the SSE state. xDX must be preserved until CR0 is restored!
-        SAVE_CR0_CLEAR_FPU_TRAPS xDX, xAX
+        mov     edx, cr0
+        jz      .skip_cr0_write
+        mov     eax, edx
+        and     eax, ~(X86_CR0_TS | X86_CR0_EM)
+        mov     cr0, ecx
+.skip_cr0_write:
  %endif
 
@@ -298,5 +289,5 @@
 
  %ifdef IN_RC
-        RESTORE_CR0 xDX                 ; Restore CR0 if we changed it above.
+        CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET edx  ; Restore CR0 if we changed it above.
  %endif
 
Index: /trunk/src/VBox/VMM/include/CPUMInternal.mac
===================================================================
--- /trunk/src/VBox/VMM/include/CPUMInternal.mac	(revision 61347)
+++ /trunk/src/VBox/VMM/include/CPUMInternal.mac	(revision 61348)
@@ -32,34 +32,4 @@
  %define CPUM_IS_AMD64      0
 %endif
-
-;; @def CPUM_CAN_USE_FPU_IN_R0
-; Indicates that we can use the FPU directly in ring-0.
-; Only defined in ring-0.
-%ifdef VBOX_WITH_KERNEL_USING_XMM
- ; Systems using XMM registers as part of their kernel calling convention must
- ; support saving and restoring the state while in ring-0.  64-bit Windows will
- ; always switch the FPU state when context switching.
- %define CPUM_CAN_USE_FPU_IN_R0 1
-%endif
-%ifdef RT_OS_WINDOWS
- ; 32-bit Windows will load the FPU context of the current thread (user land).
- %define CPUM_CAN_USE_FPU_IN_R0 1
-%endif
-%ifdef RT_OS_DARWIN
- ; Intel Darwin kernels will load the FPU context of the current thread (user land).
- ;; @todo we still need to check CR0 and tell HMVMX when CR0 changes!
- ;%define CPUM_CAN_USE_FPU_IN_R0 1
-%endif
-%ifdef RT_OS_LINUX
- ; Intel Linux kernels will load the FPU context of the current thread (user land),
- ; at least that what my LXR research on 2.6.18+ indicates.  It's possible this was
- ; done differently at some point, I seems to recall issues with it ages and ages ago.
- ;; @todo We still need to check CR0 and tell HMVMX when CR0 changes!
- ;%define CPUM_CAN_USE_FPU_IN_R0 1
-%endif
-%ifndef IN_RING0
- %undef CPUM_CAN_USE_FPU_IN_R0
-%endif
-
 
 
@@ -588,31 +558,71 @@
 
 ;;
-; Clears CR0.TS and CR0.EM if necessary, saving the previous result.
-;
-; This is used to avoid FPU exceptions when touching the FPU state.
-;
-; @param    %1      Register to save the old CR0 in (pass to RESTORE_CR0).
-; @param    %2      Temporary scratch register.
-; @uses     EFLAGS, CR0
-;
-%macro SAVE_CR0_CLEAR_FPU_TRAPS 2
-        xor     %1, %1
+; Makes sure we don't trap (#NM) accessing the FPU.
+;
+; In ring-0 this is a bit of work since we may have try convince the host kernel
+; to do the work for us, also, we must report any CR0 changes back to HMR0VMX
+; via the VINF_CPUM_HOST_CR0_MODIFIED status code.
+;
+; If we end up clearing CR0.TS/EM ourselves in ring-0, we'll save the original
+; value in CPUMCPU.Host.cr0Fpu.  If we don't, we'll store zero there.  (See also
+; CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET.)
+;
+; In raw-mode we will always have to clear TS and it will be recalculated
+; elsewhere and thus needs no saving.
+;
+; @param    %1          Register to return the return status code in.
+; @param    %2          Temporary scratch register.
+; @param    %3          Ring-0 only, register pointing to the CPUMCPU structure
+;                       of the EMT we're on.
+; @uses     EFLAGS, CR0, %1, %2
+;
+%macro CPUMRZ_TOUCH_FPU_CLEAR_CR0_FPU_TRAPS_SET_RC 3
+ %ifdef IN_RC
+        ;
+        ; raw-mode - always clear it.  We won't be here otherwise.
+        ;
+        mov     %2, cr0
+        and     %2, ~(X86_CR0_TS | X86_CR0_EM)
+        mov     cr0, %2
+
+ %else
+        ;
+        ; ring-0 - slightly complicated.
+        ;
+        xor     %1, %1                          ; 0 / VINF_SUCCESS. Wishing for no CR0 changes.
+        mov     [%3 + CPUMCPU.Host.cr0Fpu], %1
+
         mov     %2, cr0
         test    %2, X86_CR0_TS | X86_CR0_EM ; Make sure its safe to access the FPU state.
-        jz      %%skip_cr0_write
-        mov     %1, %2                  ; Save old CR0
+        jz      %%no_cr0_change
+
+  %ifdef VMM_R0_TOUCH_FPU
+        ; Touch the state and check that the kernel updated CR0 for us.
+        movdqa  xmm0, xmm0
+        mov     %2, cr0
+        test    %2, X86_CR0_TS | X86_CR0_EM
+        jz      %%cr0_changed
+  %endif
+
+        ; Save CR0 and clear them flags ourselves.
+        mov     [%3 + CPUMCPU.Host.cr0Fpu], %2
         and     %2, ~(X86_CR0_TS | X86_CR0_EM)
         mov     cr0, %2
-%%skip_cr0_write:
+ %endif ; IN_RING0
+
+%%cr0_changed:
+        mov     %1,  VINF_CPUM_HOST_CR0_MODIFIED
+%%no_cr0_change:
 %endmacro
 
-;;
-; Restore CR0.TS and CR0.EM state if SAVE_CR0_CLEAR_FPU_TRAPS change it.
-;
-; @param    %1      The register that SAVE_CR0_CLEAR_FPU_TRAPS saved the old CR0 in.
-;
-%macro RESTORE_CR0 1
-        cmp     %1, 0
-        je      %%skip_cr0_restore
+
+;;
+; Restore CR0 if CR0.TS or CR0.EM were non-zero in the original state.
+;
+; @param    %1      The original state to restore (or zero).
+;
+%macro CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET 1
+        test    %1, X86_CR0_TS | X86_CR0_EM
+        jz      %%skip_cr0_restore
         mov     cr0, %1
 %%skip_cr0_restore:
