Index: /trunk/src/VBox/VMM/Makefile.kmk
===================================================================
--- /trunk/src/VBox/VMM/Makefile.kmk	(revision 48220)
+++ /trunk/src/VBox/VMM/Makefile.kmk	(revision 48221)
@@ -62,4 +62,7 @@
 ifdef VBOX_WITH_RAW_RING1
  VMM_COMMON_DEFS += VBOX_WITH_RAW_RING1
+endif
+ifdef VBOX_WITH_64ON32_IDT
+ VMM_COMMON_DEFS += VBOX_WITH_64ON32_IDT
 endif
 
Index: /trunk/src/VBox/VMM/VMMR3/VMMSwitcher.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/VMMSwitcher.cpp	(revision 48220)
+++ /trunk/src/VBox/VMM/VMMR3/VMMSwitcher.cpp	(revision 48221)
@@ -134,4 +134,76 @@
 
 
+# ifdef VBOX_WITH_64ON32_IDT
+/**
+ * Initializes the 64-bit IDT for 64-bit guest on 32-bit host switchers.
+ *
+ * This is only used as a debugging aid when we cannot find out why something
+ * goes haywire in the intermediate context.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   pSwitcher   The switcher descriptor.
+ * @param   pbDst       Where the switcher code was just copied.
+ * @param   HCPhysDst   The host physical address corresponding to @a pbDst.
+ */
+static void vmmR3Switcher32On64IdtInit(PVM pVM, PVMMSWITCHERDEF pSwitcher, uint8_t *pbDst, RTHCPHYS HCPhysDst)
+{
+    AssertRelease(pSwitcher->offGCCode > 0 && pSwitcher->offGCCode < pSwitcher->cbCode);
+    AssertRelease(pSwitcher->cbCode < _64K);
+    RTSEL uCs64 = SELMGetHyperCS64(pVM);
+
+    PX86DESC64GATE paIdt = (PX86DESC64GATE)(pbDst + pSwitcher->offGCCode);
+    for (uint32_t i = 0 ; i < 256; i++)
+    {
+        AssertRelease(((uint64_t *)&paIdt[i])[0] < pSwitcher->cbCode);
+        AssertRelease(((uint64_t *)&paIdt[i])[1] == 0);
+        uint64_t uHandler = HCPhysDst + paIdt[i].u16OffsetLow;
+        paIdt[i].u16OffsetLow   = (uint16_t)uHandler;
+        paIdt[i].u16Sel         = uCs64;
+        paIdt[i].u3IST          = 0;
+        paIdt[i].u5Reserved     = 0;
+        paIdt[i].u4Type         = AMD64_SEL_TYPE_SYS_INT_GATE;
+        paIdt[i].u1DescType     = 0 /* system */;
+        paIdt[i].u2Dpl          = 3;
+        paIdt[i].u1Present      = 1;
+        paIdt[i].u16OffsetHigh  = (uint16_t)(uHandler >> 16);
+        paIdt[i].u32Reserved    = (uint32_t)(uHandler >> 32);
+    }
+
+    for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
+    {
+        uint64_t uIdtr = HCPhysDst + pSwitcher->offGCCode; AssertRelease(uIdtr < UINT32_MAX);
+        CPUMSetHyperIDTR(&pVM->aCpus[iCpu], uIdtr, 16*256 + iCpu);
+    }
+}
+
+
+/**
+ * Relocates the 64-bit IDT for 64-bit guest on 32-bit host switchers.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   pSwitcher   The switcher descriptor.
+ * @param   pbDst       Where the switcher code was just copied.
+ * @param   HCPhysDst   The host physical address corresponding to @a pbDst.
+ */
+static void vmmR3Switcher32On64IdtRelocate(PVM pVM, PVMMSWITCHERDEF pSwitcher, uint8_t *pbDst, RTHCPHYS HCPhysDst)
+{
+    AssertRelease(pSwitcher->offGCCode > 0 && pSwitcher->offGCCode < pSwitcher->cbCode && pSwitcher->cbCode < _64K);
+
+    /* The intermediate context doesn't move, but the CS may. */
+    RTSEL uCs64 = SELMGetHyperCS64(pVM);
+    PX86DESC64GATE paIdt = (PX86DESC64GATE)(pbDst + pSwitcher->offGCCode);
+    for (uint32_t i = 0 ; i < 256; i++)
+        paIdt[i].u16Sel = uCs64;
+
+    /* Just in case... */
+    for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
+    {
+        uint64_t uIdtr = HCPhysDst + pSwitcher->offGCCode; AssertRelease(uIdtr < UINT32_MAX);
+        CPUMSetHyperIDTR(&pVM->aCpus[iCpu], uIdtr, 16*256 + iCpu);
+    }
+}
+# endif /* VBOX_WITH_64ON32_IDT */
+
+
 /**
  * VMMR3Init worker that initiates the switcher code (aka core code).
@@ -225,5 +297,5 @@
     {
         /*
-         * copy the code.
+         * Copy the code.
          */
         for (unsigned iSwitcher = 0; iSwitcher < VMMSWITCHER_MAX; iSwitcher++)
@@ -231,6 +303,14 @@
             PVMMSWITCHERDEF pSwitcher = papSwitchers[iSwitcher];
             if (pSwitcher)
-                memcpy((uint8_t *)pVM->vmm.s.pvCoreCodeR3 + pVM->vmm.s.aoffSwitchers[iSwitcher],
-                       pSwitcher->pvCode, pSwitcher->cbCode);
+            {
+                uint8_t *pbDst = (uint8_t *)pVM->vmm.s.pvCoreCodeR3 + pVM->vmm.s.aoffSwitchers[iSwitcher];
+                memcpy(pbDst, pSwitcher->pvCode, pSwitcher->cbCode);
+# ifdef VBOX_WITH_64ON32_IDT
+                if (   pSwitcher->enmType == VMMSWITCHER_32_TO_AMD64
+                    || pSwitcher->enmType == VMMSWITCHER_PAE_TO_AMD64)
+                    vmmR3Switcher32On64IdtInit(pVM, pSwitcher, pbDst,
+                                               pVM->vmm.s.HCPhysCoreCode + pVM->vmm.s.aoffSwitchers[iSwitcher]);
+# endif
+            }
         }
 
@@ -299,4 +379,11 @@
                                    pVM->vmm.s.pvCoreCodeRC + off,
                                    pVM->vmm.s.HCPhysCoreCode + off);
+# ifdef VBOX_WITH_64ON32_IDT
+            if (   pSwitcher->enmType == VMMSWITCHER_32_TO_AMD64
+                || pSwitcher->enmType == VMMSWITCHER_PAE_TO_AMD64)
+                vmmR3Switcher32On64IdtRelocate(pVM, pSwitcher,
+                                               (uint8_t *)pVM->vmm.s.pvCoreCodeR3 + off,
+                                               pVM->vmm.s.HCPhysCoreCode + off);
+# endif
         }
     }
Index: /trunk/src/VBox/VMM/VMMSwitcher/LegacyandAMD64.mac
===================================================================
--- /trunk/src/VBox/VMM/VMMSwitcher/LegacyandAMD64.mac	(revision 48220)
+++ /trunk/src/VBox/VMM/VMMSwitcher/LegacyandAMD64.mac	(revision 48221)
@@ -537,4 +537,8 @@
 %endif
 
+%ifdef VBOX_WITH_64ON32_IDT
+    ; Set up emergency trap handlers.
+    lidt    [rdx + CPUMCPU.Hyper.idtr]
+%endif
 
     ; load the hypervisor function address
@@ -812,9 +816,18 @@
     vmwrite rax, rdx
 
-    sub     rsp, 8*2
-    sgdt    [rsp]
+    sub     rsp, 16
+    sgdt    [rsp + 6]                   ; (The 64-bit base should be aligned, not the word.)
     mov     eax, VMX_VMCS_HOST_GDTR_BASE
-    vmwrite rax, [rsp+2]
-    add     rsp, 8*2
+    vmwrite rax, [rsp + 6 + 2]
+    add     rsp, 16
+
+%ifdef VBOX_WITH_64ON32_IDT
+    sub     rsp, 16
+    sidt    [rsp + 6]
+    mov     eax, VMX_VMCS_HOST_IDTR_BASE
+    vmwrite rax, [rsp + 6 + 2] ; [rsi + CPUMCPU.Hyper.idtr + 2] - why doesn't this work?
+    add     rsp, 16
+    ;call NAME(vmm64On32PrintIdtr)
+%endif
 
 %ifdef VBOX_WITH_CRASHDUMP_MAGIC
@@ -892,4 +905,10 @@
 ALIGNCODE(16)
 .vmlaunch64_done:
+%if 0 ;fixme later - def VBOX_WITH_64ON32_IDT
+    push    rdx
+    mov     rdx, [rsp + 8]         ; pCtx
+    lidt    [rdx + CPUMCPU.Hyper.idtr]
+    pop     rdx
+%endif
     jc      near .vmstart64_invalid_vmcs_ptr
     jz      near .vmstart64_start_failed
@@ -1201,4 +1220,216 @@
 
 
+%ifdef VBOX_WITH_64ON32_IDT
+;
+; Trap handling.
+;
+
+;; Here follows an array of trap handler entry points, 8 byte in size.
+BEGINPROC vmm64On32TrapHandlers
+%macro vmm64On32TrapEntry 1
+GLOBALNAME vmm64On32Trap %+ i
+    db 06ah, i                          ; push imm8 - note that this is a signextended value.
+    jmp   NAME(%1)
+    ALIGNCODE(8)
+%assign i i+1
+%endmacro
+%assign i 0                                 ; start counter.
+    vmm64On32TrapEntry vmm64On32Trap        ; 0
+    vmm64On32TrapEntry vmm64On32Trap        ; 1
+    vmm64On32TrapEntry vmm64On32Trap        ; 2
+    vmm64On32TrapEntry vmm64On32Trap        ; 3
+    vmm64On32TrapEntry vmm64On32Trap        ; 4
+    vmm64On32TrapEntry vmm64On32Trap        ; 5
+    vmm64On32TrapEntry vmm64On32Trap        ; 6
+    vmm64On32TrapEntry vmm64On32Trap        ; 7
+    vmm64On32TrapEntry vmm64On32TrapErrCode ; 8
+    vmm64On32TrapEntry vmm64On32Trap        ; 9
+    vmm64On32TrapEntry vmm64On32TrapErrCode ; a
+    vmm64On32TrapEntry vmm64On32TrapErrCode ; b
+    vmm64On32TrapEntry vmm64On32TrapErrCode ; c
+    vmm64On32TrapEntry vmm64On32TrapErrCode ; d
+    vmm64On32TrapEntry vmm64On32TrapErrCode ; e
+    vmm64On32TrapEntry vmm64On32Trap        ; f  (reserved)
+    vmm64On32TrapEntry vmm64On32Trap        ; 10
+    vmm64On32TrapEntry vmm64On32TrapErrCode ; 11
+    vmm64On32TrapEntry vmm64On32Trap        ; 12
+    vmm64On32TrapEntry vmm64On32Trap        ; 13
+%rep (0x100 - 0x14)
+    vmm64On32TrapEntry vmm64On32Trap
+%endrep
+ENDPROC vmm64On32TrapHandlers
+
+;; Fake an error code and jump to the real thing.
+BEGINPROC vmm64On32Trap
+    push    qword [rsp]
+    jmp     NAME(vmm64On32TrapErrCode)
+ENDPROC vmm64On32Trap
+
+
+;;
+; Trap frame:
+;   [rbp + 38h] = ss
+;   [rbp + 30h] = rsp
+;   [rbp + 28h] = eflags
+;   [rbp + 20h] = cs
+;   [rbp + 18h] = rip
+;   [rbp + 10h] = error code (or trap number)
+;   [rbp + 08h] = trap number
+;   [rbp + 00h] = rbp
+;   [rbp - 08h] = rax
+;   [rbp - 10h] = rbx
+;   [rbp - 18h] = ds
+;
+BEGINPROC vmm64On32TrapErrCode
+    push    rbp
+    mov     rbp, rsp
+    push    rax
+    push    rbx
+    mov     ax, ds
+    push    rax
+    sub     rsp, 20h
+
+    mov     ax, cs
+    mov     ds, ax
+
+%if 1
+    COM64_S_NEWLINE
+    COM64_S_CHAR '!'
+    COM64_S_CHAR 't'
+    COM64_S_CHAR 'r'
+    COM64_S_CHAR 'a'
+    COM64_S_CHAR 'p'
+    movzx   eax, byte [rbp + 08h]
+    COM64_S_DWORD_REG eax
+    COM64_S_CHAR '!'
+%endif
+
+%if 0 ;; @todo Figure the offset of the CPUMCPU relative to CPUM
+    sidt    [rsp]
+    movsx   eax, word [rsp]
+    shr     eax, 12                     ; div by  16 * 256 (0x1000).
+%else
+    ; hardcoded VCPU(0) for now...
+    mov     rbx, [NAME(pCpumIC) wrt rip]
+    mov     eax, [rbx + CPUM.offCPUMCPU0]
+%endif
+    push    rax                         ; Save the offset for rbp later.
+
+    add     rbx, rax                    ; rbx = CPUMCPU
+
+    ;
+    ; Deal with recursive traps due to vmxoff (lazy bird).
+    ;
+    lea     rax, [.vmxoff_trap_location wrt rip]
+    cmp     rax, [rbp + 18h]
+    je      .not_vmx_root
+
+    ;
+    ; Save the context.
+    ;
+    mov     rax, [rbp - 8]
+    mov     [rbx + CPUMCPU.Hyper.eax], rax
+    mov     [rbx + CPUMCPU.Hyper.ecx], rcx
+    mov     [rbx + CPUMCPU.Hyper.edx], rdx
+    mov     rax, [rbp - 10h]
+    mov     [rbx + CPUMCPU.Hyper.ebx], rax
+    mov     rax, [rbp]
+    mov     [rbx + CPUMCPU.Hyper.ebp], rax
+    mov     rax, [rbp + 30h]
+    mov     [rbx + CPUMCPU.Hyper.esp], rax
+    mov     [rbx + CPUMCPU.Hyper.edi], rdi
+    mov     [rbx + CPUMCPU.Hyper.esi], rsi
+    mov     [rbx + CPUMCPU.Hyper.r8], r8
+    mov     [rbx + CPUMCPU.Hyper.r9], r9
+    mov     [rbx + CPUMCPU.Hyper.r10], r10
+    mov     [rbx + CPUMCPU.Hyper.r11], r11
+    mov     [rbx + CPUMCPU.Hyper.r12], r12
+    mov     [rbx + CPUMCPU.Hyper.r13], r13
+    mov     [rbx + CPUMCPU.Hyper.r14], r14
+    mov     [rbx + CPUMCPU.Hyper.r15], r15
+
+    mov     rax, [rbp + 18h]
+    mov     [rbx + CPUMCPU.Hyper.eip], rax
+    movzx   ax, [rbp + 20h]
+    mov     [rbx + CPUMCPU.Hyper.cs.Sel], ax
+    mov     ax, [rbp + 38h]
+    mov     [rbx + CPUMCPU.Hyper.ss.Sel], ax
+    mov     ax, [rbp - 18h]
+    mov     [rbx + CPUMCPU.Hyper.ds.Sel], ax
+
+    mov     rax, [rbp + 28h]
+    mov     [rbx + CPUMCPU.Hyper.eflags], rax
+
+    mov     rax, cr2
+    mov     [rbx + CPUMCPU.Hyper.cr2], rax
+
+    mov     rax, [rbp + 10h]
+    mov     [rbx + CPUMCPU.Hyper.r14], rax ; r14 = error code
+    movzx   eax, byte [rbp + 08h]
+    mov     [rbx + CPUMCPU.Hyper.r15], rax ; r15 = trap number
+
+    ;
+    ; Finally, leave VMX root operation before trying to return to the host.
+    ;
+    mov     rax, cr4
+    test    rax, X86_CR4_VMXE
+    jz      .not_vmx_root
+.vmxoff_trap_location:
+    vmxoff
+.not_vmx_root:
+
+    ;
+    ; Go back to the host.
+    ;
+    pop     rbp
+    mov     dword [rbx + CPUMCPU.u32RetCode], VERR_TRPM_DONT_PANIC
+    jmp     NAME(vmmRCToHostAsm)
+ENDPROC vmm64On32TrapErrCode
+
+;; We allocate the IDT here to avoid having to allocate memory separately somewhere.
+ALIGNCODE(16)
+GLOBALNAME vmm64On32Idt
+%assign i 0
+%rep 256
+    dq NAME(vmm64On32Trap %+ i) - NAME(Start) ; Relative trap handler offsets.
+    dq 0
+%assign i (i + 1)
+%endrep
+
+
+ %if 0
+;; For debugging purposes.
+BEGINPROC vmm64On32PrintIdtr
+    push    rax
+    push    rsi                         ; paranoia
+    push    rdi                         ; ditto
+    sub rsp, 16
+
+    COM64_S_CHAR ';'
+    COM64_S_CHAR 'i'
+    COM64_S_CHAR 'd'
+    COM64_S_CHAR 't'
+    COM64_S_CHAR 'r'
+    COM64_S_CHAR '='
+    sidt [rsp + 6]
+    mov eax, [rsp + 8 + 4]
+    COM64_S_DWORD_REG eax
+    mov eax, [rsp + 8]
+    COM64_S_DWORD_REG eax
+    COM64_S_CHAR ':'
+    movzx eax, word [rsp + 6]
+    COM64_S_DWORD_REG eax
+    COM64_S_CHAR '!'
+
+    add rsp, 16
+    pop     rdi
+    pop     rsi
+    pop     rax
+    ret
+ENDPROC   vmm64On32PrintIdtr
+ %endif
+
+%endif ; VBOX_WITH_64ON32_IDT
+
 
 
@@ -1261,6 +1492,6 @@
 ; been messing with the guest at all.
 ;
-; @param    eax     Return code.
-; @uses     eax, edx, ecx (or it may use them in the future)
+; @param    rbp     The virtual cpu number.
+; @param
 ;
 BITS 64
@@ -1330,11 +1561,11 @@
 
     ;;
-    ;; When we arrive at this label we're at the
-    ;; intermediate mapping of the switching code.
+    ;; When we arrive at this label we're at the host mapping of the
+    ;; switcher code, but with intermediate page tables.
     ;;
 BITS 32
 ALIGNCODE(16)
 GLOBALNAME ICExitTarget
-    DEBUG32_CHAR('8')
+    DEBUG32_CHAR('9')
 
     ; load the hypervisor data selector into ds & es
@@ -1343,10 +1574,14 @@
     mov     ds, eax
     mov     es, eax
+    DEBUG32_CHAR('a')
 
     FIXUP FIX_GC_CPUM_OFF, 1, 0
     mov     edx, 0ffffffffh
     CPUMCPU_FROM_CPUM_WITH_OFFSET edx, ebp
+
+    DEBUG32_CHAR('b')
     mov     esi, [edx + CPUMCPU.Host.cr3]
     mov     cr3, esi
+    DEBUG32_CHAR('c')
 
     ;; now we're in host memory context, let's restore regs
@@ -1354,4 +1589,5 @@
     mov     edx, 0ffffffffh
     CPUMCPU_FROM_CPUM_WITH_OFFSET edx, ebp
+    DEBUG32_CHAR('e')
 
     ; restore the host EFER
@@ -1360,6 +1596,8 @@
     mov     eax, [ebx + CPUMCPU.Host.efer]
     mov     edx, [ebx + CPUMCPU.Host.efer + 4]
+    DEBUG32_CHAR('f')
     wrmsr
     mov     edx, ebx
+    DEBUG32_CHAR('g')
 
     ; activate host gdt and idt
@@ -1455,5 +1693,9 @@
         at VMMSWITCHERDEF.offIDCode1,                   dd NAME(IDExitTarget)               - NAME(Start)
         at VMMSWITCHERDEF.cbIDCode1,                    dd NAME(ICExitTarget)               - NAME(Start)
+%ifdef VBOX_WITH_64ON32_IDT ; Hack! Use offGCCode to find the IDT.
+        at VMMSWITCHERDEF.offGCCode,                    dd NAME(vmm64On32Idt)               - NAME(Start)
+%else
         at VMMSWITCHERDEF.offGCCode,                    dd 0
+%endif
         at VMMSWITCHERDEF.cbGCCode,                     dd 0
 
Index: /trunk/src/VBox/VMM/include/CPUMInternal.mac
===================================================================
--- /trunk/src/VBox/VMM/include/CPUMInternal.mac	(revision 48220)
+++ /trunk/src/VBox/VMM/include/CPUMInternal.mac	(revision 48221)
@@ -456,5 +456,5 @@
 ;;
 ; Converts the CPUM pointer to CPUMCPU
-; @param   %1   register name (PVM)
+; @param   %1   register name (CPUM)
 ; @param   %2   register name (CPUMCPU offset)
 %macro CPUMCPU_FROM_CPUM_WITH_OFFSET 2
@@ -471,5 +471,5 @@
 ;;
 ; Converts the CPUMCPU pointer to CPUM
-; @param   %1   register name (PVM)
+; @param   %1   register name (CPUM)
 ; @param   %2   register name (CPUMCPU offset)
 %macro CPUM_FROM_CPUMCPU_WITH_OFFSET 2
