Index: /trunk/include/VBox/iommu-amd.h
===================================================================
--- /trunk/include/VBox/iommu-amd.h	(revision 87816)
+++ /trunk/include/VBox/iommu-amd.h	(revision 87817)
@@ -545,42 +545,44 @@
 /** Mask of valid  bits for EPHSUP (Enhanced Peripheral Page Request Handling
  *  Support) feature (bits 52:53). */
-#define IOMMU_DTE_QWORD_0_FEAT_EPHSUP_MASK      UINT64_C(0x0030000000000000)
+#define IOMMU_DTE_QWORD_0_FEAT_EPHSUP_MASK              UINT64_C(0x0030000000000000)
 
 /** Mask of valid bits for GTSup (Guest Translation Support) feature (bits 55:60,
  *  bits 80:95). */
-#define IOMMU_DTE_QWORD_0_FEAT_GTSUP_MASK       UINT64_C(0x1f80000000000000)
-#define IOMMU_DTE_QWORD_1_FEAT_GTSUP_MASK       UINT64_C(0x00000000ffff0000)
+#define IOMMU_DTE_QWORD_0_FEAT_GTSUP_MASK               UINT64_C(0x1f80000000000000)
+#define IOMMU_DTE_QWORD_1_FEAT_GTSUP_MASK               UINT64_C(0x00000000ffff0000)
 
 /** Mask of valid bits for GIoSup (Guest I/O Protection Support) feature (bit 54). */
-#define IOMMU_DTE_QWORD_0_FEAT_GIOSUP_MASK      UINT64_C(0x0040000000000000)
+#define IOMMU_DTE_QWORD_0_FEAT_GIOSUP_MASK              UINT64_C(0x0040000000000000)
 
 /** Mask of valid DTE feature bits. */
-#define IOMMU_DTE_QWORD_0_FEAT_MASK             (  IOMMU_DTE_QWORD_0_FEAT_EPHSUP_MASK \
-                                                 | IOMMU_DTE_QWORD_0_FEAT_GTSUP_MASK  \
-                                                 | IOMMU_DTE_QWORD_0_FEAT_GIOSUP_MASK)
-#define IOMMU_DTE_QWORD_1_FEAT_MASK             (IOMMU_DTE_QWORD_0_FEAT_GIOSUP_MASK)
+#define IOMMU_DTE_QWORD_0_FEAT_MASK                     (  IOMMU_DTE_QWORD_0_FEAT_EPHSUP_MASK \
+                                                         | IOMMU_DTE_QWORD_0_FEAT_GTSUP_MASK  \
+                                                         | IOMMU_DTE_QWORD_0_FEAT_GIOSUP_MASK)
+#define IOMMU_DTE_QWORD_1_FEAT_MASK                     IOMMU_DTE_QWORD_0_FEAT_GIOSUP_MASK
 
 /** Mask of all valid DTE bits (including all feature bits). */
-#define IOMMU_DTE_QWORD_0_VALID_MASK            UINT64_C(0x7fffffffffffff83)
-#define IOMMU_DTE_QWORD_1_VALID_MASK            UINT64_C(0xfffffbffffffffff)
-#define IOMMU_DTE_QWORD_2_VALID_MASK            UINT64_C(0xff0fffffffffffff)
-#define IOMMU_DTE_QWORD_3_VALID_MASK            UINT64_C(0xffc0000000000000)
+#define IOMMU_DTE_QWORD_0_VALID_MASK                    UINT64_C(0x7fffffffffffff83)
+#define IOMMU_DTE_QWORD_1_VALID_MASK                    UINT64_C(0xfffffbffffffffff)
+#define IOMMU_DTE_QWORD_2_VALID_MASK                    UINT64_C(0xff0fffffffffffff)
+#define IOMMU_DTE_QWORD_3_VALID_MASK                    UINT64_C(0xffc0000000000000)
 
 /** Mask of the interrupt table root pointer. */
-#define IOMMU_DTE_IRTE_ROOT_PTR_MASK            UINT64_C(0x000fffffffffffc0)
+#define IOMMU_DTE_IRTE_ROOT_PTR_MASK                    UINT64_C(0x000fffffffffffc0)
 /** Number of bits to shift to get the interrupt root table pointer at
    qword 2 (qword 0 being the first one) - 128-byte aligned. */
-#define IOMMU_DTE_IRTE_ROOT_PTR_SHIFT           6
+#define IOMMU_DTE_IRTE_ROOT_PTR_SHIFT                   6
 
 /** Maximum encoded IRTE length (exclusive). */
-#define IOMMU_DTE_INTR_TAB_LEN_MAX              12
+#define IOMMU_DTE_INTR_TAB_LEN_MAX                      12
 /** Gets the interrupt table entries (in bytes) given the DTE pointer. */
-#define IOMMU_GET_INTR_TAB_ENTRIES(a_pDte)      (UINT64_C(1) << (a_pDte)->n.u4IntrTableLength)
+#define IOMMU_DTE_GET_INTR_TAB_ENTRIES(a_pDte)          (UINT64_C(1) << (a_pDte)->n.u4IntrTableLength)
 /** Gets the interrupt table length (in bytes) given the DTE pointer. */
-#define IOMMU_GET_INTR_TAB_LEN(a_pDte)          (IOMMU_GET_INTR_TAB_ENTRIES(a_pDte) * sizeof(IRTE_T))
+#define IOMMU_DTE_GET_INTR_TAB_LEN(a_pDte)              (IOMMU_DTE_GET_INTR_TAB_ENTRIES(a_pDte) * sizeof(IRTE_T))
 /** Mask of interrupt control bits. */
-#define IOMMU_DTE_INTR_CTRL_MASK                0x3
-/** Gets the interrupt control bits given the DTE pointer. */
-#define IOMMU_GET_INTR_CTRL(a_pDte)             (((a_pDte)->au64[2] >> 60) & IOMMU_DTE_INTR_CTRL_MASK)
+#define IOMMU_DTE_INTR_CTRL_MASK                        0x3
+/** Gets the interrupt control bits from the DTE. */
+#define IOMMU_DTE_GET_INTR_CTRL(a_pDte)                 (((a_pDte)->au64[2] >> 60) & IOMMU_DTE_INTR_CTRL_MASK)
+/** Gets the ignore unmapped interrupt bit from DTE. */
+#define IOMMU_DTE_GET_IG(a_pDte)                        (((a_pDte)->au64[2] >> 5) & 0x1)
 
 /**
@@ -699,5 +701,5 @@
 #define IOMMU_MSI_DATA_IRTE_OFFSET_MASK     UINT32_C(0x000007ff)
 /** Gets the IRTE offset from the originating MSI interrupt message. */
-#define IOMMU_GET_IRTE_OFF(a_u32MsiData)    (((a_u32MsiData) & IOMMU_MSI_DATA_IRTE_OFFSET_MASK) * sizeof(IRTE_T));
+#define IOMMU_GET_IRTE_OFF(a_u32MsiData)    (((a_u32MsiData) & IOMMU_MSI_DATA_IRTE_OFFSET_MASK) * sizeof(IRTE_T))
 
 /**
Index: /trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp
===================================================================
--- /trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp	(revision 87816)
+++ /trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp	(revision 87817)
@@ -47,6 +47,8 @@
 #define IOMMU_MAGIC                                 0x10acce55
 
-/** Enable the IOTLBE cache. */
-#define IOMMU_WITH_IOTLBE_CACHE
+/** Enable the IOTLBE cache only in ring-3 for now, see @bugref{9654#c95}. */
+#ifdef IN_RING3
+# define IOMMU_WITH_IOTLBE_CACHE
+#endif
 /** Enable the interrupt cache. */
 #define IOMMU_WITH_IRTE_CACHE
@@ -55,14 +57,13 @@
 #if defined(IOMMU_WITH_IOTLBE_CACHE) || defined(IOMMU_WITH_IRTE_CACHE)
 # define IOMMU_WITH_DTE_CACHE
+/** The maximum number of device IDs in the cache. */
+# define IOMMU_DEV_CACHE_COUNT                      16
+/** An empty device ID. */
+# define IOMMU_DTE_CACHE_KEY_NIL                    0
 #endif
 
 #ifdef IOMMU_WITH_IRTE_CACHE
-/** The maximum number of interrupt cache entries configurable through CFGM. */
-# define IOMMU_IRTE_CACHE_MAX                       32
-/** The default number of interrupt cache entries. */
-# define IOMMU_IRTE_CACHE_DEFAULT                   16
-/** The minimum number of interrupt cache entries configurable through CFGM. */
-# define IOMMU_IRTE_CACHE_MIN                       8
-
+/** The maximum number of IRTE cache entries. */
+# define IOMMU_IRTE_CACHE_COUNT                     32
 /** A NIL IRTE cache entry key. */
 # define IOMMU_IRTE_CACHE_KEY_NIL                   (~(uint32_t)0U)
@@ -108,8 +109,5 @@
 
 #ifdef IOMMU_WITH_DTE_CACHE
-/** The maximum number of DTE entries. */
-# define IOMMU_DTE_CACHE_MAX                        UINT16_MAX
-
-/** @name IOMMU_DTECACHE_F_XXX: DTE cache flags.
+/** @name IOMMU_DTE_CACHE_F_XXX: DTE cache flags.
  *
  *  Some of these flags are "basic" i.e. they correspond directly to their bits in
@@ -131,45 +129,47 @@
  *  @{ */
 /** The DTE is present. */
-# define IOMMU_DTECACHE_F_PRESENT                       RT_BIT(0)
+# define IOMMU_DTE_CACHE_F_PRESENT                       RT_BIT(0)
 /** The DTE is valid. */
-# define IOMMU_DTECACHE_F_VALID                         RT_BIT(1)
+# define IOMMU_DTE_CACHE_F_VALID                         RT_BIT(1)
 /** The DTE permissions apply for address translations. */
-# define IOMMU_DTECACHE_F_IO_PERM                       RT_BIT(2)
+# define IOMMU_DTE_CACHE_F_IO_PERM                       RT_BIT(2)
 /** DTE permission - I/O read allowed. */
-# define IOMMU_DTECACHE_F_IO_PERM_READ                  RT_BIT(3)
+# define IOMMU_DTE_CACHE_F_IO_PERM_READ                  RT_BIT(3)
 /** DTE permission - I/O write allowed. */
-# define IOMMU_DTECACHE_F_IO_PERM_WRITE                 RT_BIT(4)
+# define IOMMU_DTE_CACHE_F_IO_PERM_WRITE                 RT_BIT(4)
 /** DTE permission - reserved. */
-# define IOMMU_DTECACHE_F_IO_PERM_RSVD                  RT_BIT(5)
+# define IOMMU_DTE_CACHE_F_IO_PERM_RSVD                  RT_BIT(5)
 /** Address translation required. */
-# define IOMMU_DTECACHE_F_ADDR_TRANSLATE                RT_BIT(6)
+# define IOMMU_DTE_CACHE_F_ADDR_TRANSLATE                RT_BIT(6)
 /** Suppress all I/O page faults. */
-# define IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF             RT_BIT(7)
+# define IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF             RT_BIT(7)
 /** Suppress I/O page faults. */
-# define IOMMU_DTECACHE_F_SUPPRESS_IOPF                 RT_BIT(8)
+# define IOMMU_DTE_CACHE_F_SUPPRESS_IOPF                 RT_BIT(8)
 /** Interrupt map valid. */
-# define IOMMU_DTECACHE_F_INTR_MAP_VALID                RT_BIT(9)
+# define IOMMU_DTE_CACHE_F_INTR_MAP_VALID                RT_BIT(9)
 /** Ignore unmapped interrupts. */
-# define IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR          RT_BIT(10)
+# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR          RT_BIT(10)
 /** An I/O page fault has been raised for this device. */
-# define IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED          RT_BIT(11)
+# define IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED          RT_BIT(11)
 /** Fixed and arbitrary interrupt control: Target Abort. */
-# define IOMMU_DTECACHE_F_INTR_CTRL_TARGET_ABORT        RT_BIT(12)
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_TARGET_ABORT        RT_BIT(12)
 /** Fixed and arbitrary interrupt control: Forward unmapped. */
-# define IOMMU_DTECACHE_F_INTR_CTRL_FWD_UNMAPPED        RT_BIT(13)
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_FWD_UNMAPPED        RT_BIT(13)
 /** Fixed and arbitrary interrupt control: Remapped. */
-# define IOMMU_DTECACHE_F_INTR_CTRL_REMAPPED            RT_BIT(14)
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_REMAPPED            RT_BIT(14)
 /** Fixed and arbitrary interrupt control: Reserved. */
-# define IOMMU_DTECACHE_F_INTR_CTRL_RSVD                RT_BIT(15)
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_RSVD                RT_BIT(15)
 /** @} */
 
 /** The number of bits to shift I/O device flags for DTE permissions. */
-# define IOMMU_DTECACHE_F_IO_PERM_SHIFT                 3
+# define IOMMU_DTE_CACHE_F_IO_PERM_SHIFT                 3
 /** The mask of DTE permissions in I/O device flags. */
-# define IOMMU_DTECACHE_F_IO_PERM_MASK                  0x3
+# define IOMMU_DTE_CACHE_F_IO_PERM_MASK                  0x3
 /** The number of bits to shift I/O device flags for interrupt control bits. */
-# define IOMMU_DTECACHE_F_INTR_CTRL_SHIFT               12
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT               12
 /** The mask of interrupt control bits in I/O device flags. */
-# define IOMMU_DTECACHE_F_INTR_CTRL_MASK                0x3
+# define IOMMU_DTE_CACHE_F_INTR_CTRL_MASK                0x3
+/** The number of bits to shift for ignore-unmapped interrupts bit. */
+# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR_SHIFT    10
 
 /** Acquires the cache lock. */
@@ -283,5 +283,5 @@
 typedef struct DTECACHE
 {
-    /** This device's flags, see IOMMU_DTECACHE_F_XXX. */
+    /** This device's flags, see IOMMU_DTE_CACHE_F_XXX. */
     uint16_t        fFlags;
     /** The domain ID assigned for this device by software. */
@@ -325,8 +325,8 @@
 typedef struct IRTECACHE
 {
+    /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */
+    uint32_t            uKey;
     /** The IRTE. */
     IRTE_T              Irte;
-    /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */
-    uint32_t            uKey;
 } IRTECACHE;
 /** Pointer to an IRTE cache struct. */
@@ -364,26 +364,12 @@
     /** The critsect that protects the cache from concurrent access. */
     PDMCRITSECT                 CritSectCache;
-    /** Maps [DeviceId] to [DomainId]. */
-    PDTECACHE                   paDteCache;
-#endif
-#ifdef IOMMU_WITH_IOTLBE_CACHE
-    /** Pointer to array of pre-allocated IOTLBEs. */
-    PIOTLBE                     paIotlbes;
-    /** Maps [DomainId,Iova] to [IOTLBE]. */
-    AVLU64TREE                  TreeIotlbe;
-    /** LRU list anchor for IOTLB entries. */
-    RTLISTANCHOR                LstLruIotlbe;
-    /** Index of the next unused IOTLB. */
-    uint32_t                    idxUnusedIotlbe;
-    /** Number of cached IOTLB entries in the tree. */
-    uint32_t                    cCachedIotlbes;
+    /** Array of device IDs. */
+    uint16_t                    aDeviceIds[IOMMU_DEV_CACHE_COUNT];
+    /** Array of DTE cache entries. */
+    DTECACHE                    aDteCache[IOMMU_DEV_CACHE_COUNT];
 #endif
 #ifdef IOMMU_WITH_IRTE_CACHE
-    /** Maps [DeviceId] to [IRTE]. */
-    PIRTECACHE                  paIrteCache;
-    /** Maximum number of entries in the IRTE cache. */
-    uint16_t                    cIrteCache;
-    /** Padding. */
-    uint16_t                    auPadding[3];
+    /** Array of IRTE cache entries. */
+    IRTECACHE                   aIrteCache[IOMMU_IRTE_CACHE_COUNT];
 #endif
 
@@ -568,13 +554,9 @@
 AssertCompileMemberAlignment(IOMMU, hMmio, 8);
 #ifdef IOMMU_WITH_DTE_CACHE
-AssertCompileMemberAlignment(IOMMU, paDteCache, 8);
-#endif
-#ifdef IOMMU_WITH_IOTLBE_CACHE
-AssertCompileMemberAlignment(IOMMU, paIotlbes, 8);
-AssertCompileMemberAlignment(IOMMU, TreeIotlbe, 8);
-AssertCompileMemberAlignment(IOMMU, LstLruIotlbe, 8);
+AssertCompileMemberAlignment(IOMMU, aDeviceIds, 8);
+AssertCompileMemberAlignment(IOMMU, aDteCache, 8);
 #endif
 #ifdef IOMMU_WITH_IRTE_CACHE
-AssertCompileMemberAlignment(IOMMU, paIrteCache, 8);
+AssertCompileMemberAlignment(IOMMU, aIrteCache, 8);
 #endif
 AssertCompileMemberAlignment(IOMMU, IommuBar, 8);
@@ -594,7 +576,24 @@
     /** The command thread handle. */
     R3PTRTYPE(PPDMTHREAD)       pCmdThread;
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+    /** Pointer to array of pre-allocated IOTLBEs. */
+    PIOTLBE                     paIotlbes;
+    /** Maps [DomainId,Iova] to [IOTLBE]. */
+    AVLU64TREE                  TreeIotlbe;
+    /** LRU list anchor for IOTLB entries. */
+    RTLISTANCHOR                LstLruIotlbe;
+    /** Index of the next unused IOTLB. */
+    uint32_t                    idxUnusedIotlbe;
+    /** Number of cached IOTLB entries in the tree. */
+    uint32_t                    cCachedIotlbes;
+#endif
 } IOMMUR3;
 /** Pointer to the ring-3 IOMMU device state. */
 typedef IOMMUR3 *PIOMMUR3;
+#ifdef IOMMU_WITH_IOTLBE_CACHE
+AssertCompileMemberAlignment(IOMMUR3, paIotlbes, 8);
+AssertCompileMemberAlignment(IOMMUR3, TreeIotlbe, 8);
+AssertCompileMemberAlignment(IOMMUR3, LstLruIotlbe, 8);
+#endif
 
 /**
@@ -649,6 +648,6 @@
 typedef struct IOTLBEFLUSHARG
 {
-    /** The IOMMU device state. */
-    PIOMMU              pIommu;
+    /** The ring-3 IOMMU device state. */
+    PIOMMUR3            pIommuR3;
     /** The domain ID to flush. */
     uint16_t            uDomainId;
@@ -664,6 +663,6 @@
 typedef struct IOTLBEINFOARG
 {
-    /** The IOMMU device state. */
-    PIOMMU              pIommu;
+    /** The ring-3 IOMMU device state. */
+    PIOMMUR3            pIommuR3;
     /** The info helper. */
     PCDBGFINFOHLP       pHlp;
@@ -766,5 +765,5 @@
  *
  * @returns The number of entries in the event log.
- * @param   pThis     The IOMMU device state.
+ * @param   pThis   The shared IOMMU device state.
  */
 static uint32_t iommuAmdGetEvtLogEntryCount(PIOMMU pThis)
@@ -833,14 +832,15 @@
     if (pDte->n.u1Valid)
     {
-        fFlags |= IOMMU_DTECACHE_F_VALID;
-
+        fFlags |= IOMMU_DTE_CACHE_F_VALID;
+
+        /** @todo Skip the if checks here (shift/mask the relevant bits over).  */
         if (pDte->n.u1SuppressAllPfEvents)
-            fFlags |= IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF;
+            fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF;
         if (pDte->n.u1SuppressPfEvents)
-            fFlags |= IOMMU_DTECACHE_F_SUPPRESS_IOPF;
+            fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_IOPF;
 
         uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
-        AssertCompile(IOMMU_DTECACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
-        fFlags |= fDtePerm << IOMMU_DTECACHE_F_IO_PERM_SHIFT;
+        AssertCompile(IOMMU_DTE_CACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
+        fFlags |= fDtePerm << IOMMU_DTE_CACHE_F_IO_PERM_SHIFT;
     }
 
@@ -848,11 +848,13 @@
     if (pDte->n.u1IntrMapValid)
     {
-        fFlags |= IOMMU_DTECACHE_F_INTR_MAP_VALID;
+        fFlags |= IOMMU_DTE_CACHE_F_INTR_MAP_VALID;
+
+        /** @todo Skip the if check here (shift/mask the relevant bit over).  */
         if (pDte->n.u1IgnoreUnmappedIntrs)
-            fFlags |= IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR;
-
-        uint16_t const fIntrCtrl = IOMMU_GET_INTR_CTRL(pDte);
-        AssertCompile(IOMMU_DTECACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK);
-        fFlags |= fIntrCtrl << IOMMU_DTECACHE_F_INTR_CTRL_SHIFT;
+            fFlags |= IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR;
+
+        uint16_t const fIntrCtrl = IOMMU_DTE_GET_INTR_CTRL(pDte);
+        AssertCompile(IOMMU_DTE_CACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK);
+        fFlags |= fIntrCtrl << IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT;
     }
     return fFlags;
@@ -880,17 +882,166 @@
 
 
+#ifdef IOMMU_WITH_DTE_CACHE
+/**
+ * Looks up an entry in the DTE cache for the given device ID.
+ *
+ * @returns The index of the entry, or the cache capacity if no entry was found.
+ * @param   pThis   The shared IOMMU device state.
+ * @param   uDevId  The device ID (bus, device, function).
+ */
+DECLINLINE(uint16_t) iommuAmdDteCacheEntryLookup(PIOMMU pThis, uint16_t uDevId)
+{
+    uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds);
+    for (uint16_t i = 0; i < cDeviceIds; i++)
+    {
+        if (pThis->aDeviceIds[i] == uDevId)
+            return i;
+    }
+    return cDeviceIds;
+}
+
+
+/**
+ * Gets an free/unused DTE cache entry.
+ *
+ * @returns The index of an unused entry, or cache capacity if the cache is full.
+ * @param   pThis   The shared IOMMU device state.
+ */
+DECLINLINE(uint16_t) iommuAmdDteCacheEntryGetUnused(PCIOMMU pThis)
+{
+    /*
+     * ASSUMES device ID 0 is the PCI host bridge or the IOMMU itself
+     * (the latter being an ugly hack) and cannot be a valid device ID.
+     */
+    uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds);
+    for (uint16_t i = 0; i < cDeviceIds; i++)
+    {
+        if (!pThis->aDeviceIds[i])
+            return i;
+    }
+    return cDeviceIds;
+}
+
+
+/**
+ * Adds or updates the I/O device flags for the given device ID.
+ *
+ * @returns VBox status code.
+ * @retval  VERR_OUT_OF_RESOURCES if the cache is full.
+ *
+ * @param   pDevIns     The IOMMU instance data.
+ * @param   uDevId      The device ID (bus, device, function).
+ * @param   pDte        The device table entry.
+ * @param   fOrMask     The device flags (usually compound flags) to OR in with the
+ *                      basic flags, see IOMMU_DTE_CACHE_F_XXX.
+ */
+static int iommuAmdDteCacheAdd(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, uint16_t fOrMask)
+{
+    Assert(pDte);
+    Assert(uDevId);
+
+    int rc = VINF_SUCCESS;
+    uint16_t const fFlags    = iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT | fOrMask;
+    uint16_t const uDomainId = pDte->n.u16DomainId;
+
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+
+    uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
+    uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, uDevId);
+    if (idxDte < cDteCache)
+    {
+        pThis->aDteCache[idxDte].fFlags    = fFlags;
+        pThis->aDteCache[idxDte].uDomainId = uDomainId;
+    }
+    else if ((idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache)
+    {
+        pThis->aDeviceIds[idxDte] = uDevId;
+        pThis->aDteCache[idxDte].fFlags    = fFlags;
+        pThis->aDteCache[idxDte].uDomainId = uDomainId;
+    }
+    else
+        rc = VERR_OUT_OF_RESOURCES;
+
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+    return rc;
+}
+
+
+/**
+ * Adds one or more I/O device flags if the device is already present in the cache.
+ *
+ * @param   pDevIns     The IOMMU instance data.
+ * @param   uDevId      The device ID (bus, device, function).
+ * @param   fFlags      Additional device flags to OR with existing flags, see
+ *                      IOMMU_DTE_CACHE_F_XXX.
+ */
+static void iommuAmdDteCacheAddFlags(PPDMDEVINS pDevIns, uint16_t uDevId, uint16_t fFlags)
+{
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+
+    uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
+    uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, uDevId);
+    if (   idxDte < cDteCache
+        && (pThis->aDteCache[idxDte].fFlags & IOMMU_DTE_CACHE_F_PRESENT))
+        pThis->aDteCache[idxDte].fFlags |= fFlags;
+
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+}
+
+
+/**
+ * Removes a DTE cache entry.
+ *
+ * @param   pDevIns     The IOMMU instance data.
+ * @param   uDevId      The device ID to remove cache entries for.
+ */
+static void iommuAmdDteCacheRemove(PPDMDEVINS pDevIns, uint16_t uDevId)
+{
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+
+    uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
+    uint16_t const idxDte    = iommuAmdDteCacheEntryLookup(pThis, uDevId);
+    if (idxDte < cDteCache)
+    {
+        pThis->aDteCache[idxDte].fFlags    = 0;
+        pThis->aDteCache[idxDte].uDomainId = 0;
+    }
+
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+}
+
+
+/**
+ * Removes all entries in the device table entry cache.
+ *
+ * @param   pDevIns     The IOMMU instance data.
+ */
+static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns)
+{
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+    RT_ZERO(pThis->aDeviceIds);
+    RT_ZERO(pThis->aDteCache);
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+}
+#endif  /* IOMMU_WITH_DTE_CACHE */
+
+
 #ifdef IOMMU_WITH_IOTLBE_CACHE
 /**
  * Moves the IOTLB entry to the least recently used slot.
  *
- * @param   pThis       The IOMMU device state.
- * @param   pIotlbe     The IOTLB entry.
- */
-static void iommuAmdIotlbEntryMoveToLru(PIOMMU pThis, PIOTLBE pIotlbe)
-{
-    if (!RTListNodeIsFirst(&pThis->LstLruIotlbe, &pIotlbe->NdLru))
+ * @param   pThisR3     The ring-3 IOMMU device state.
+ * @param   pIotlbe     The IOTLB entry to move.
+ */
+static void iommuAmdIotlbEntryMoveToLru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe)
+{
+    if (!RTListNodeIsFirst(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru))
     {
         RTListNodeRemove(&pIotlbe->NdLru);
-        RTListPrepend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
+        RTListPrepend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
     }
 }
@@ -900,13 +1051,13 @@
  * Moves the IOTLB entry to the most recently used slot.
  *
- * @param   pThis       The IOMMU device state.
- * @param   pIotlbe     The IOTLB entry.
- */
-static void iommuAmdIotlbEntryMoveToMru(PIOMMU pThis, PIOTLBE pIotlbe)
-{
-    if (!RTListNodeIsLast(&pThis->LstLruIotlbe, &pIotlbe->NdLru))
+ * @param   pThisR3     The ring-3 IOMMU device state.
+ * @param   pIotlbe     The IOTLB entry to move.
+ */
+DECLINLINE(void) iommuAmdIotlbEntryMoveToMru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe)
+{
+    if (!RTListNodeIsLast(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru))
     {
         RTListNodeRemove(&pIotlbe->NdLru);
-        RTListAppend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
+        RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
     }
 }
@@ -918,5 +1069,5 @@
  *
  * @returns VINF_SUCCESS.
- * @param   pNode       Pointer to an IOTLBE.
+ * @param   pNode       Pointer to an IOTLB entry to dump info.
  * @param   pvUser      Pointer to an IOTLBEINFOARG.
  */
@@ -926,7 +1077,7 @@
     PCIOTLBEINFOARG pArgs = (PCIOTLBEINFOARG)pvUser;
     AssertPtr(pArgs);
-    AssertPtr(pArgs->pIommu);
+    AssertPtr(pArgs->pIommuR3);
     AssertPtr(pArgs->pHlp);
-    Assert(pArgs->pIommu->u32Magic == IOMMU_MAGIC);
+    //Assert(pArgs->pIommuCC->u32Magic == IOMMU_MAGIC);
 
     uint16_t const uDomainId = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
@@ -968,6 +1119,6 @@
     PCIOTLBEFLUSHARG pArgs = (PCIOTLBEFLUSHARG)pvUser;
     AssertPtr(pArgs);
-    AssertPtr(pArgs->pIommu);
-    Assert(pArgs->pIommu->u32Magic == IOMMU_MAGIC);
+    AssertPtr(pArgs->pIommuR3);
+    //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC);
 
     uint16_t const uDomainId = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
@@ -977,5 +1128,5 @@
         PIOTLBE pIotlbe = (PIOTLBE)pNode;
         pIotlbe->fEvictPending = true;
-        iommuAmdIotlbEntryMoveToLru(pArgs->pIommu, (PIOTLBE)pNode);
+        iommuAmdIotlbEntryMoveToLru(pArgs->pIommuR3, (PIOTLBE)pNode);
     }
     return VINF_SUCCESS;
@@ -986,5 +1137,6 @@
  * Inserts an IOTLB entry into the cache.
  *
- * @param   pThis           The IOMMU device state.
+ * @param   pThis           The shared IOMMU device state.
+ * @param   pThisR3         The ring-3 IOMMU device state.
  * @param   pIotlbe         The IOTLB entry to initialize and insert.
  * @param   uDomainId       The domain ID.
@@ -992,5 +1144,5 @@
  * @param   pPageLookup     The I/O page lookup result of the access.
  */
-static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOTLBE pIotlbe, uint16_t uDomainId, uint64_t uIova,
+static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOMMUR3 pThisR3, PIOTLBE pIotlbe, uint16_t uDomainId, uint64_t uIova,
                                      PCIOPAGELOOKUP pPageLookup)
 {
@@ -1004,12 +1156,12 @@
 
     /* Check if the entry already exists. */
-    PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThis->TreeIotlbe, pIotlbe->Core.Key);
+    PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, pIotlbe->Core.Key);
     if (!pFound)
     {
         /* Insert the entry into the cache. */
-        bool const fInserted = RTAvlU64Insert(&pThis->TreeIotlbe, &pIotlbe->Core);
+        bool const fInserted = RTAvlU64Insert(&pThisR3->TreeIotlbe, &pIotlbe->Core);
         Assert(fInserted); NOREF(fInserted);
-        Assert(pThis->cCachedIotlbes < IOMMU_IOTLBE_MAX);
-        ++pThis->cCachedIotlbes;
+        Assert(pThisR3->cCachedIotlbes < IOMMU_IOTLBE_MAX);
+        ++pThisR3->cCachedIotlbes;
         STAM_COUNTER_INC(&pThis->StatIotlbeCached);
     }
@@ -1034,10 +1186,11 @@
  * @returns Pointer to the removed IOTLB entry, NULL if the entry wasn't found in
  *          the tree.
- * @param   pThis   The IOMMU device state.
- * @param   uKey    The key of the IOTLB entry to remove.
- */
-static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, AVLU64KEY uKey)
-{
-    PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThis->TreeIotlbe, uKey);
+ * @param   pThis       The shared IOMMU device state.
+ * @param   pThisR3     The ring-3 IOMMU device state.
+ * @param   uKey        The key of the IOTLB entry to remove.
+ */
+static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, PIOMMUR3 pThisR3, AVLU64KEY uKey)
+{
+    PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThisR3->TreeIotlbe, uKey);
     if (pIotlbe)
     {
@@ -1051,6 +1204,6 @@
         Assert(pIotlbe->Core.Key == IOMMU_IOTLB_KEY_NIL);
 
-        Assert(pThis->cCachedIotlbes > 0);
-        --pThis->cCachedIotlbes;
+        Assert(pThisR3->cCachedIotlbes > 0);
+        --pThisR3->cCachedIotlbes;
         STAM_COUNTER_DEC(&pThis->StatIotlbeCached);
     }
@@ -1063,12 +1216,15 @@
  *
  * @returns Pointer to IOTLB entry if found, NULL otherwise.
- * @param   pThis           The IOMMU device state.
- * @param   uDomainId       The domain ID.
- * @param   uIova           The I/O virtual address.
- */
-static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, uint64_t uDomainId, uint64_t uIova)
-{
+ * @param   pThis       The shared IOMMU device state.
+ * @param   pThisR3     The ring-3 IOMMU device state.
+ * @param   uDomainId   The domain ID.
+ * @param   uIova       The I/O virtual address.
+ */
+static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, PIOMMUR3 pThisR3, uint64_t uDomainId, uint64_t uIova)
+{
+    RT_NOREF(pThis);
+
     uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
-    PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThis->TreeIotlbe, uKey);
+    PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey);
     if (    pIotlbe
         && !pIotlbe->fEvictPending)
@@ -1087,10 +1243,11 @@
  * Adds an IOTLB entry to the cache.
  *
- * @param   pThis           The IOMMU device state.
+ * @param   pThis           The shared IOMMU device state.
+ * @param   pThis           The ring-3 IOMMU device state.
  * @param   uDomainId       The domain ID.
  * @param   uIova           The I/O virtual address.
  * @param   pPageLookup     The I/O page lookup result of the access.
  */
-static void iommuAmdIotlbAdd(PIOMMU pThis, uint16_t uDomainId, uint64_t uIova, PCIOPAGELOOKUP pPageLookup)
+static void iommuAmdIotlbAdd(PIOMMU pThis, PIOMMUR3 pThisR3, uint16_t uDomainId, uint64_t uIova, PCIOPAGELOOKUP pPageLookup)
 {
     Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
@@ -1103,31 +1260,31 @@
      * Otherwise, get a new IOTLB entry from the pre-allocated list.
      */
-    if (pThis->idxUnusedIotlbe == IOMMU_IOTLBE_MAX)
+    if (pThisR3->idxUnusedIotlbe == IOMMU_IOTLBE_MAX)
     {
         /* Grab the least recently used entry. */
-        PIOTLBE pIotlbe = RTListGetFirst(&pThis->LstLruIotlbe, IOTLBE, NdLru);
+        PIOTLBE pIotlbe = RTListGetFirst(&pThisR3->LstLruIotlbe, IOTLBE, NdLru);
         Assert(pIotlbe);
 
         /* If the entry is in the cache, remove it. */
         if (pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL)
-            iommuAmdIotlbEntryRemove(pThis, pIotlbe->Core.Key);
+            iommuAmdIotlbEntryRemove(pThis, pThisR3, pIotlbe->Core.Key);
 
         /* Initialize and insert the IOTLB entry into the cache. */
-        iommuAmdIotlbEntryInsert(pThis, pIotlbe, uDomainId, uIova, pPageLookup);
+        iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, uDomainId, uIova, pPageLookup);
 
         /* Move the entry to the most recently used slot. */
-        iommuAmdIotlbEntryMoveToMru(pThis, pIotlbe);
+        iommuAmdIotlbEntryMoveToMru(pThisR3, pIotlbe);
     }
     else
     {
         /* Grab an unused IOTLB entry from the pre-allocated list. */
-        PIOTLBE pIotlbe = &pThis->paIotlbes[pThis->idxUnusedIotlbe];
-        ++pThis->idxUnusedIotlbe;
+        PIOTLBE pIotlbe = &pThisR3->paIotlbes[pThisR3->idxUnusedIotlbe];
+        ++pThisR3->idxUnusedIotlbe;
 
         /* Initialize and insert the IOTLB entry into the cache. */
-        iommuAmdIotlbEntryInsert(pThis, pIotlbe, uDomainId, uIova, pPageLookup);
+        iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, uDomainId, uIova, pPageLookup);
 
         /* Add the entry to the most recently used slot. */
-        RTListAppend(&pThis->LstLruIotlbe, &pIotlbe->NdLru);
+        RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
     }
 }
@@ -1141,15 +1298,16 @@
 static void iommuAmdIotlbRemoveAll(PPDMDEVINS pDevIns)
 {
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMU   pThis   = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMUCC pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
-    if (pThis->cCachedIotlbes > 0)
-    {
-        pThis->idxUnusedIotlbe = 0;
+    if (pThisR3->cCachedIotlbes > 0)
+    {
         size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
-        RT_BZERO(pThis->paIotlbes, cbIotlbes);
-        pThis->cCachedIotlbes  = 0;
+        RT_BZERO(pThisR3->paIotlbes, cbIotlbes);
+        pThisR3->idxUnusedIotlbe = 0;
+        pThisR3->cCachedIotlbes  = 0;
         STAM_COUNTER_RESET(&pThis->StatIotlbeCached);
-        RTListInit(&pThis->LstLruIotlbe);
+        RTListInit(&pThisR3->LstLruIotlbe);
     }
 
@@ -1174,5 +1332,6 @@
     Assert(cbInvalidate >= X86_PAGE_4K_SIZE);
 
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMU   pThis   = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
@@ -1180,7 +1339,7 @@
     {
         uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(uDomainId, uIova);
-        PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, uKey);
+        PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, pThisR3, uKey);
         if (pIotlbe)
-            iommuAmdIotlbEntryMoveToLru(pThis, pIotlbe);
+            iommuAmdIotlbEntryMoveToLru(pThisR3, pIotlbe);
         uIova        += X86_PAGE_4K_SIZE;
         cbInvalidate -= X86_PAGE_4K_SIZE;
@@ -1205,11 +1364,12 @@
      * so they will eventually get evicted and re-cycled as the cache gets re-populated.
      */
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMU   pThis   = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
     IOTLBEFLUSHARG Args;
-    Args.pIommu    = pThis;
+    Args.pIommuR3  = pThisR3;
     Args.uDomainId = uDomainId;
-    RTAvlU64DoWithAll(&pThis->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args);
+    RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args);
 
     IOMMU_UNLOCK_CACHE(pDevIns, pThis);
@@ -1235,5 +1395,6 @@
     Assert(cbAccess >= X86_PAGE_4K_SIZE);
 
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMU   pThis   = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
 
     /* Add IOTLB entries for every page in the access. */
@@ -1251,5 +1412,5 @@
     do
     {
-        iommuAmdIotlbAdd(pThis, uDomainId, uIova, &PageLookup);
+        iommuAmdIotlbAdd(pThis, pThisR3, uDomainId, uIova, &PageLookup);
         uIova                += X86_PAGE_4K_SIZE;
         PageLookup.GCPhysSpa += X86_PAGE_4K_SIZE;
@@ -1266,5 +1427,5 @@
  *
  * @returns Index of the found entry, or cache capacity if not found.
- * @param   pThis       The IOMMU device state.
+ * @param   pThis       The shared IOMMU device state.
  * @param   uDevId      The device ID (bus, device, function).
  * @param   offIrte     The offset into the interrupt remap table.
@@ -1277,11 +1438,8 @@
      *  often. */
     uint32_t const uKey = IOMMU_IRTE_CACHE_KEY_MAKE(uDevId, offIrte);
-    uint16_t const cIrteCache = pThis->cIrteCache;
+    uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
     for (uint16_t i = 0; i < cIrteCache; i++)
-    {
-        PCIRTECACHE pIrteCache = &pThis->paIrteCache[i];
-        if (pIrteCache->uKey == uKey)
+        if (pThis->aIrteCache[i].uKey == uKey)
             return i;
-    }
     return cIrteCache;
 }
@@ -1289,21 +1447,18 @@
 
 /**
- * Gets an free/unused IRTE cache entry.
+ * Gets a free/unused IRTE cache entry.
  *
  * @returns The index of an unused entry, or cache capacity if the cache is full.
- * @param   pThis       The IOMMU device state.
+ * @param   pThis   The shared IOMMU device state.
  */
 static uint16_t iommuAmdIrteCacheEntryGetUnused(PCIOMMU pThis)
 {
-    uint16_t const cIrteCache = pThis->cIrteCache;
+    uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
     for (uint16_t i = 0; i < cIrteCache; i++)
-    {
-        PCIRTECACHE pIrteCache = &pThis->paIrteCache[i];
-        if (pIrteCache->uKey == IOMMU_IRTE_CACHE_KEY_NIL)
-        {
-            Assert(!pIrteCache->Irte.u32);
+        if (pThis->aIrteCache[i].uKey == IOMMU_IRTE_CACHE_KEY_NIL)
+        {
+            Assert(!pThis->aIrteCache[i].Irte.u32);
             return i;
         }
-    }
     return cIrteCache;
 }
@@ -1334,39 +1489,45 @@
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
-    PCDTECACHE pDteCache = &pThis->paDteCache[uDevId];
-    if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_INTR_MAP_VALID))
-                          == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_INTR_MAP_VALID))
-    {
-        Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE);        /* Paranoia. */
-
-        /* Currently, we only cache remapping of fixed and arbitrated interrupts. */
-        uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
-        if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
-        {
-            uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTECACHE_F_INTR_CTRL_SHIFT)
-                                    & IOMMU_DTECACHE_F_INTR_CTRL_MASK;
-            if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
+    uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, uDevId);
+    if (idxDteCache < RT_ELEMENTS(pThis->aDteCache))
+    {
+        PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache];
+        if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID))
+                              == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID))
+        {
+            Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE);        /* Paranoia. */
+
+            /* Currently, we only cache remapping of fixed and arbitrated interrupts. */
+            uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
+            if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
             {
-                /* Interrupt table length has been verified prior to adding entries to the cache. */
-                uint16_t const offIrte = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32);
-                uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
-                if (idxIrteCache < pThis->cIrteCache)
+                uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT)
+                                        & IOMMU_DTE_CACHE_F_INTR_CTRL_MASK;
+                if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
                 {
-                    PCIRTE_T pIrte = &pThis->paIrteCache[idxIrteCache].Irte;
-                    iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte);
+                    /* Interrupt table length has been verified prior to adding entries to the cache. */
+                    uint16_t const offIrte      = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32);
+                    uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
+                    if (idxIrteCache < RT_ELEMENTS(pThis->aIrteCache))
+                    {
+                        PCIRTE_T pIrte = &pThis->aIrteCache[idxIrteCache].Irte;
+                        Assert(pIrte->n.u1RemapEnable);
+                        Assert(pIrte->n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO);
+                        iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte);
+                        rc = VINF_SUCCESS;
+                    }
+                }
+                else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
+                {
+                    *pMsiOut = *pMsiIn;
                     rc = VINF_SUCCESS;
                 }
             }
-            else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
-            {
-                *pMsiOut = *pMsiIn;
-                rc = VINF_SUCCESS;
-            }
-        }
-    }
-    else if (pDteCache->fFlags & IOMMU_DTECACHE_F_PRESENT)
-    {
-        *pMsiOut = *pMsiIn;
-        rc = VINF_SUCCESS;
+        }
+        else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT)
+        {
+            *pMsiOut = *pMsiIn;
+            rc = VINF_SUCCESS;
+        }
     }
 
@@ -1391,21 +1552,16 @@
     Assert(offIrte != 0xffff);  /* Shouldn't be a valid IRTE table offset since sizeof(IRTE) is a multiple of 4. */
 
+    int rc = VINF_SUCCESS;
     PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
     /* Find an existing entry or get an unused slot. */
-    uint16_t const cIrteCache = pThis->cIrteCache;
-    uint16_t idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
-    if (idxIrteCache == pThis->cIrteCache)
-        idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis);
-
-    /* Update the cache entry. */
-    int rc;
-    if (idxIrteCache < cIrteCache)
-    {
-        PIRTECACHE pIrteCache = &pThis->paIrteCache[idxIrteCache];
-        pIrteCache->uKey      = IOMMU_IRTE_CACHE_KEY_MAKE(uDevId, offIrte);
-        pIrteCache->Irte.u32  = pIrte->u32;
-        rc = VINF_SUCCESS;
+    uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
+    uint16_t idxIrteCache     = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
+    if (   idxIrteCache < cIrteCache
+        || (idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis)) < cIrteCache)
+    {
+        pThis->aIrteCache[idxIrteCache].uKey = IOMMU_IRTE_CACHE_KEY_MAKE(uDevId, offIrte);
+        pThis->aIrteCache[idxIrteCache].Irte = *pIrte;
     }
     else
@@ -1427,12 +1583,13 @@
     PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
-    uint16_t const cIrteCache = pThis->cIrteCache;
+    uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
     for (uint16_t i = 0; i < cIrteCache; i++)
     {
-        PIRTECACHE pIrteCache = &pThis->paIrteCache[i];
+        PIRTECACHE pIrteCache = &pThis->aIrteCache[i];
         if (uDevId == IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(pIrteCache->uKey))
         {
-            pIrteCache->uKey      = IOMMU_IRTE_CACHE_KEY_NIL;
-            pIrteCache->Irte.u32  = 0;
+            pIrteCache->uKey     = IOMMU_IRTE_CACHE_KEY_NIL;
+            pIrteCache->Irte.u32 = 0;
+            /* There could multiple IRTE entries for a device ID, continue searching. */
         }
     }
@@ -1450,10 +1607,9 @@
     PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
-    uint16_t const cIrteCache = pThis->cIrteCache;
+    uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
     for (uint16_t i = 0; i < cIrteCache; i++)
     {
-        PIRTECACHE pIrteCache = &pThis->paIrteCache[i];
-        pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL;
-        pIrteCache->Irte.u32 = 0;
+        pThis->aIrteCache[i].uKey     = IOMMU_IRTE_CACHE_KEY_NIL;
+        pThis->aIrteCache[i].Irte.u32 = 0;
     }
     IOMMU_UNLOCK_CACHE(pDevIns, pThis);
@@ -1462,79 +1618,9 @@
 
 
-#ifdef IOMMU_WITH_DTE_CACHE
-/**
- * Updates the I/O device flags for the given device ID.
- *
- * @param   pDevIns     The IOMMU instance data.
- * @param   uDevId      The device ID (bus, device, function).
- * @param   pDte        The device table entry. Can be NULL only when @a fFlags is
- *                      0.
- * @param   fOrMask     The device flags (usually compound flags) to OR in with the
- *                      basic flags, see IOMMU_DTECACHE_F_XXX. Pass 0 to flush the DTE
- *                      from the cache.
- */
-static void iommuAmdDteCacheUpdate(PPDMDEVINS pDevIns, uint16_t uDevId, PCDTE_T pDte, uint16_t fOrMask)
-{
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
-    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
-
-    if (fOrMask & IOMMU_DTECACHE_F_PRESENT)
-    {
-        Assert(pDte);
-        pThis->paDteCache[uDevId].fFlags    = iommuAmdGetBasicDevFlags(pDte) | fOrMask;
-        pThis->paDteCache[uDevId].uDomainId = pDte->n.u16DomainId;
-    }
-    else
-    {
-        pThis->paDteCache[uDevId].fFlags    = 0;
-        pThis->paDteCache[uDevId].uDomainId = 0;
-    }
-
-    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
-}
-
-
-/**
- * Sets one or more I/O device flags if the device is present in the cache.
- *
- * @param   pDevIns         The IOMMU instance data.
- * @param   uDevId          The device ID (bus, device, function).
- * @param   fDevIoFlags     The device flags to set.
- */
-static void iommuAmdDteCacheSetFlags(PPDMDEVINS pDevIns, uint16_t uDevId, uint16_t fDevIoFlags)
-{
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
-    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
-
-    if (fDevIoFlags & IOMMU_DTECACHE_F_PRESENT)
-        pThis->paDteCache[uDevId].fFlags |= fDevIoFlags;
-
-    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
-}
-
-
-/**
- * Removes all entries in the device table entry cache.
- *
- * @param   pDevIns     The IOMMU instance data.
- */
-static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns)
-{
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
-    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
-
-    size_t const cbDteCache = sizeof(DTECACHE) * IOMMU_DTE_CACHE_MAX;
-    RT_BZERO(pThis->paDteCache, cbDteCache);
-
-    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
-}
-#endif  /* IOMMU_WITH_DTE_CACHE */
-
-
 /**
  * Atomically reads the control register without locking the IOMMU device.
  *
  * @returns The control register.
- * @param   pThis     The IOMMU device state.
+ * @param   pThis   The shared IOMMU device state.
  */
 DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis)
@@ -3187,5 +3273,5 @@
  *
  * @param   pDevIns             The IOMMU instance data.
- * @param   fIoDevFlags         The I/O device flags, see IOMMU_DTECACHE_F_XXX.
+ * @param   fIoDevFlags         The I/O device flags, see IOMMU_DTE_CACHE_F_XXX.
  * @param   pIrte               The interrupt remapping table entry, can be NULL.
  * @param   enmOp               The IOMMU operation being performed.
@@ -3201,7 +3287,7 @@
     PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIoPageFault;
 
-#ifdef IOMMU_WITH_IOTLBE_CACHE
-# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId)  iommuAmdDteCacheSetFlags((a_pDevIns), (a_DevId), \
-                                                                                     IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED)
+#ifdef IOMMU_WITH_DTE_CACHE
+# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId)  iommuAmdDteCacheAddFlags((a_pDevIns), (a_DevId), \
+                                                                                     IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED)
 #else
 # define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId)  do { } while (0)
@@ -3212,9 +3298,7 @@
         || enmOp == IOMMUOP_MEM_WRITE)
     {
-        uint16_t const fSuppressIopf    = IOMMU_DTECACHE_F_VALID
-                                        | IOMMU_DTECACHE_F_SUPPRESS_IOPF
-                                        | IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED;
-        uint16_t const fSuppressAllIopf = IOMMU_DTECACHE_F_VALID
-                                        | IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF;
+        uint16_t const fSuppressIopf    = IOMMU_DTE_CACHE_F_VALID
+                                        | IOMMU_DTE_CACHE_F_SUPPRESS_IOPF | IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED;
+        uint16_t const fSuppressAllIopf = IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF;
         if (   (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf
             || (fIoDevFlags & fSuppressIopf) == fSuppressIopf)
@@ -3225,10 +3309,8 @@
     else if (enmOp == IOMMUOP_INTR_REQ)
     {
-        uint16_t const fSuppressIopf = IOMMU_DTECACHE_F_VALID
-                                     | IOMMU_DTECACHE_F_INTR_MAP_VALID
-                                     | IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR;
+        uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_INTR_MAP_VALID | IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR;
         if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf)
             fSuppressEvtLogging = true;
-        else if (pIrte)
+        else if (pIrte)     /** @todo Make this compulsary and assert if it isn't provided. */
             fSuppressEvtLogging = pIrte->n.u1SuppressIoPf;
     }
@@ -3890,9 +3972,9 @@
                         STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
 
-#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
+#ifdef IOMMU_WITH_IOTLBE_CACHE
                     if (RT_SUCCESS(rc))
                     {
                         /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */
-                        iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_ADDR_TRANSLATE);
+                        iommuAmdDteCacheAdd(pDevIns, uDevId, &Dte, IOMMU_DTE_CACHE_F_ADDR_TRANSLATE);
                         /* Update IOTLB for the contiguous range of I/O virtual addresses. */
                         iommuAmdIotlbAddRange(pDevIns, Dte.n.u16DomainId, uIova & X86_PAGE_4K_BASE_MASK, cbPages,
@@ -3911,7 +3993,7 @@
                     rc = VINF_SUCCESS;
 
-#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
+#ifdef IOMMU_WITH_IOTLBE_CACHE
                     /* Update that addresses permissions of DTE apply (but omit address translation). */
-                    iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_IO_PERM);
+                    iommuAmdDteCacheAdd(pDevIns, uDevId, &Dte, IOMMU_DTE_CACHE_F_IO_PERM);
 #endif
                 }
@@ -3944,7 +4026,7 @@
             cbContiguous = cbAccess;
 
-#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
+#ifdef IOMMU_WITH_IOTLBE_CACHE
             /* Update that addresses don't require translation (nor permission checks) but a DTE is present. */
-            iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
+            iommuAmdDteCacheAdd(pDevIns, uDevId, &Dte, 0 /* fFlags */);
 #endif
         }
@@ -3963,5 +4045,5 @@
 
 
-#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
+#ifdef IOMMU_WITH_IOTLBE_CACHE
 /**
  * I/O page lookup callback for finding an I/O page from the IOTLB.
@@ -3988,8 +4070,9 @@
     Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
 
-    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMU   pThis   = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
 
     STAM_PROFILE_ADV_START(&pThis->StatProfIotlbeLookup, a);
-    PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pAux->uDomainId, uIovaPage);
+    PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pThisR3, pAux->uDomainId, uIovaPage);
     STAM_PROFILE_ADV_STOP(&pThis->StatProfIotlbeLookup, a);
     if (pIotlbe)
@@ -4035,4 +4118,11 @@
     PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
 
+#define IOMMU_IOTLB_LOOKUP_FAILED(a_rc)   \
+    do {                                  \
+        *pGCPhysSpa    = NIL_RTGCPHYS;    \
+        *pcbContiguous = 0;               \
+        rc = (a_rc);                      \
+    } while (0)
+
     /*
      * We hold the cache lock across both the DTE and the IOTLB lookups (if any) because
@@ -4042,34 +4132,47 @@
 
     /* Lookup the DTE cache entry. */
-    PCDTECACHE pDteCache = &pThis->paDteCache[uDevId];
-    if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_ADDR_TRANSLATE))
-                          == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_ADDR_TRANSLATE))
-    {
-        /* Lookup IOTLB entries. */
-        IOADDRRANGE AddrIn;
-        AddrIn.uAddr = uIova;
-        AddrIn.cb    = cbAccess;
-        AddrIn.fPerm = fPerm;
-
-        IOMMUOPAUX Aux;
-        Aux.enmOp     = enmOp;
-        Aux.pDte      = NULL;
-        Aux.uDeviceId = uDevId;
-        Aux.uDomainId = pDteCache->uDomainId;
-
-        IOADDRRANGE AddrOut;
-        rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, NULL /* pcbPages */);
-        Assert(AddrOut.cb <= cbAccess);
-        *pGCPhysSpa    = AddrOut.uAddr;
-        *pcbContiguous = AddrOut.cb;
-    }
-    else if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_IO_PERM))
-                               == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_IO_PERM))
-    {
-        /* Address translation is disabled, but DTE permissions apply. */
-        Assert(!(pDteCache->fFlags & IOMMU_DTECACHE_F_ADDR_TRANSLATE));
-        uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTECACHE_F_IO_PERM_SHIFT) & IOMMU_DTECACHE_F_IO_PERM_MASK;
-        if ((fDtePerm & fPerm) == fPerm)
-        {
+    uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, uDevId);
+    if (idxDteCache < RT_ELEMENTS(pThis->aDteCache))
+    {
+        PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache];
+        if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE))
+                              == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE))
+        {
+            /* Lookup IOTLB entries. */
+            IOADDRRANGE AddrIn;
+            AddrIn.uAddr = uIova;
+            AddrIn.cb    = cbAccess;
+            AddrIn.fPerm = fPerm;
+
+            IOMMUOPAUX Aux;
+            Aux.enmOp     = enmOp;
+            Aux.pDte      = NULL;
+            Aux.uDeviceId = uDevId;
+            Aux.uDomainId = pDteCache->uDomainId;
+
+            IOADDRRANGE AddrOut;
+            rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, NULL /* pcbPages */);
+            Assert(AddrOut.cb <= cbAccess);
+            *pGCPhysSpa    = AddrOut.uAddr;
+            *pcbContiguous = AddrOut.cb;
+        }
+        else if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM))
+                                   == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM))
+        {
+            /* Address translation is disabled, but DTE permissions apply. */
+            Assert(!(pDteCache->fFlags & IOMMU_DTE_CACHE_F_ADDR_TRANSLATE));
+            uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_IO_PERM_SHIFT) & IOMMU_DTE_CACHE_F_IO_PERM_MASK;
+            if ((fDtePerm & fPerm) == fPerm)
+            {
+                *pGCPhysSpa    = uIova;
+                *pcbContiguous = cbAccess;
+                rc = VINF_SUCCESS;
+            }
+            else
+                IOMMU_IOTLB_LOOKUP_FAILED(VERR_IOMMU_ADDR_ACCESS_DENIED);
+        }
+        else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT)
+        {
+            /* Forward addresses untranslated, without checking permissions. */
             *pGCPhysSpa    = uIova;
             *pcbContiguous = cbAccess;
@@ -4077,39 +4180,16 @@
         }
         else
-        {
-            *pGCPhysSpa    = NIL_RTGCPHYS;
-            *pcbContiguous = 0;
-            rc = VERR_IOMMU_ADDR_ACCESS_DENIED;
-        }
-    }
-    else if (pDteCache->fFlags & IOMMU_DTECACHE_F_PRESENT)
-    {
-        /* Forward addresses untranslated, without checking permissions. */
-        *pGCPhysSpa    = uIova;
-        *pcbContiguous = cbAccess;
-        rc = VINF_SUCCESS;
+            IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND);
     }
     else
-    {
-        rc = VERR_NOT_FOUND;
-        *pGCPhysSpa    = NIL_RTGCPHYS;
-        *pcbContiguous = 0;
-    }
+        IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND);
 
     IOMMU_UNLOCK_CACHE(pDevIns, pThis);
 
-    /* Raise event if address translation resulted in a permission failure. */
-    if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
-    {
-        EVT_IO_PAGE_FAULT_T EvtIoPageFault;
-        iommuAmdIoPageFaultEventInit(uDevId, pDteCache->uDomainId, uIova, true /* fPresent */,
-                                     false /* fRsvdNotZero */, true /* fPermDenied */, enmOp, &EvtIoPageFault);
-        iommuAmdIoPageFaultEventRaise(pDevIns, pDteCache->fFlags, NULL /* pIrte */, enmOp, &EvtIoPageFault,
-                                      kIoPageFaultType_PermDenied);
-    }
-
     return rc;
-}
-#endif /* IN_RING3 && IOMMU_WITH_IOTLBE_CACHE */
+
+#undef IOMMU_IOTLB_LOOKUP_FAILED
+}
+#endif /* IOMMU_WITH_IOTLBE_CACHE */
 
 
@@ -4176,5 +4256,5 @@
 
         int rc;
-#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
+#ifdef IOMMU_WITH_IOTLBE_CACHE
         /* Lookup the IOVA from the cache. */
         rc = iommuAmdCacheLookup(pDevIns, uDevId, uIova, cbAccess, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
@@ -4187,5 +4267,7 @@
             return rc;
         }
-        if (rc == VERR_OUT_OF_RANGE)
+        if (rc != VERR_OUT_OF_RANGE)
+        { /* likely */ }
+        else
         {
             /* Access stopped when translations resulted in non-contiguous memory, let caller resume access. */
@@ -4194,16 +4276,14 @@
             return VINF_SUCCESS;
         }
-        if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
-        {
-            /* Access denied due to insufficient permissions. */
-            STAM_COUNTER_INC(&pThis->StatAccessCachePermDenied);
-            return rc;
-        }
-
-        /* Access incomplete as not all pages were in the cache. Lookup the rest from the device table. */
-        AssertMsg(rc == VERR_NOT_FOUND, ("Invalid cache lookup result: %Rrc\n", rc));
+
+        /*
+         * Access incomplete as not all pages were in the cache.
+         * Or permissions were denied for the access (which typically doesn't happen)
+         * so go through the slower path and raise the required event.
+         */
         AssertMsg(*pcbContiguous < cbAccess, ("Invalid size: cbContiguous=%zu cbAccess=%zu\n", *pcbContiguous, cbAccess));
         uIova    += *pcbContiguous;
         cbAccess -= *pcbContiguous;
+        /* FYI: We currently would be also be including permission denied as cache misses too.*/
         STAM_COUNTER_INC(&pThis->StatAccessCacheMiss);
 #endif
@@ -4312,5 +4392,5 @@
 
     RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK;
-    uint16_t const cbIntrTable     = IOMMU_GET_INTR_TAB_LEN(pDte);
+    uint16_t const cbIntrTable     = IOMMU_DTE_GET_INTR_TAB_LEN(pDte);
     uint16_t const offIrte         = IOMMU_GET_IRTE_OFF(uDataIn);
     RTGCPHYS const GCPhysIrte      = GCPhysIntrTable + offIrte;
@@ -4378,9 +4458,5 @@
                     iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, &Irte);
 #ifdef IOMMU_WITH_IRTE_CACHE
-                    /* Add/Update the interrupt cache with the remapped results. */
-                    uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uMsiInData);
-                    int const rcUpdate = iommuAmdIrteCacheAdd(pDevIns, uDevId, offIrte, &Irte);
-                    if (RT_FAILURE(rcUpdate))
-                        LogRelMax(1, ("%s: Warning! Interrupt cache full. Consider increasing cache capacity.\n", IOMMU_LOG_PFX));
+                    iommuAmdIrteCacheAdd(pDevIns, uDevId, IOMMU_GET_IRTE_OFF(uMsiInData), &Irte);
 #endif
                     return VINF_SUCCESS;
@@ -4430,13 +4506,20 @@
 static int iommuAmdIntrTableLookup(PPDMDEVINS pDevIns, uint16_t uDevId, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
 {
-    /* Read the device table entry from memory. */
     LogFlowFunc(("uDevId=%#x (%#x:%#x:%#x) enmOp=%u\n", uDevId,
                  ((uDevId >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK),
                  ((uDevId >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK), (uDevId & VBOX_PCI_DEVFN_FUN_MASK), enmOp));
 
+    /* Read the device table entry from memory. */
     DTE_T Dte;
     int rc = iommuAmdDteRead(pDevIns, uDevId, enmOp, &Dte);
     if (RT_SUCCESS(rc))
     {
+#ifdef IOMMU_WITH_IRTE_CACHE
+        int rc2 = iommuAmdDteCacheAdd(pDevIns, uDevId, &Dte, 0 /* fFlags */);
+        if (RT_FAILURE(rc2))
+        {
+            LogRelMax(10, ("%s: IOMMU DTE cache is full.\n", IOMMU_LOG_PFX));
+        }
+#endif
         /* If the DTE is not valid, all interrupts are forwarded without remapping. */
         if (Dte.n.u1IntrMapValid)
@@ -4445,11 +4528,9 @@
             uint64_t const fRsvd0 = Dte.au64[2] & ~IOMMU_DTE_QWORD_2_VALID_MASK;
             uint64_t const fRsvd1 = Dte.au64[3] & ~IOMMU_DTE_QWORD_3_VALID_MASK;
-            if (RT_LIKELY(   !fRsvd0
-                          && !fRsvd1))
+            if (RT_LIKELY(!fRsvd0 && !fRsvd1))
             { /* likely */ }
             else
             {
-                LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0,
-                     fRsvd1));
+                LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1));
                 EVT_ILLEGAL_DTE_T Event;
                 iommuAmdIllegalDteEventInit(uDevId, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
@@ -4457,9 +4538,4 @@
                 return VERR_IOMMU_INTR_REMAP_FAILED;
             }
-
-#ifdef IOMMU_WITH_IRTE_CACHE
-            /* Update the DTE cache -after- we've checked reserved bits (above) when the interrupt map is valid. */
-            iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
-#endif
 
             /*
@@ -4585,8 +4661,4 @@
         else
         {
-#ifdef IOMMU_WITH_IRTE_CACHE
-            /* Update the DTE cache that the interrupt map isn't valid. */
-            iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
-#endif
             LogFlowFunc(("DTE interrupt map not valid\n"));
             *pMsiOut = *pMsiIn;
@@ -4755,5 +4827,4 @@
         {
             STAM_COUNTER_INC(&pThis->StatCmdInvDte);
-#ifdef IOMMU_WITH_IOTLBE_CACHE
             PCCMD_INV_DTE_T pCmdInvDte = (PCCMD_INV_DTE_T)pCmd;
             AssertCompile(sizeof(*pCmdInvDte) == sizeof(*pCmd));
@@ -4763,12 +4834,11 @@
                 && !(pCmdInvDte->au64[1] & ~IOMMU_CMD_INV_DTE_QWORD_1_VALID_MASK))
             {
-                iommuAmdDteCacheUpdate(pDevIns, pCmdInvDte->n.u16DevId, NULL /* pDte */, 0 /* fFlags */);
+#ifdef IOMMU_WITH_DTE_CACHE
+                iommuAmdDteCacheRemove(pDevIns, pCmdInvDte->n.u16DevId);
+#endif
                 return VINF_SUCCESS;
             }
             iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
             return VERR_IOMMU_CMD_INVALID_FORMAT;
-#else
-            return VINF_SUCCESS;
-#endif
         }
 
@@ -4776,5 +4846,4 @@
         {
             STAM_COUNTER_INC(&pThis->StatCmdInvIommuPages);
-#ifdef IOMMU_WITH_IOTLBE_CACHE
             PCCMD_INV_IOMMU_PAGES_T pCmdInvPages = (PCCMD_INV_IOMMU_PAGES_T)pCmd;
             AssertCompile(sizeof(*pCmdInvPages) == sizeof(*pCmd));
@@ -4784,4 +4853,5 @@
                 && !(pCmdInvPages->au64[1] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_1_VALID_MASK))
             {
+#ifdef IOMMU_WITH_IOTLBE_CACHE
                 uint64_t const uIova = RT_MAKE_U64(pCmdInvPages->n.u20AddrLo << X86_PAGE_4K_SHIFT, pCmdInvPages->n.u32AddrHi);
                 uint16_t const uDomainId = pCmdInvPages->n.u16DomainId;
@@ -4830,12 +4900,9 @@
                     iommuAmdIotlbRemoveDomainId(pDevIns, uDomainId);
                 }
-
+#endif
                 return VINF_SUCCESS;
             }
             iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
             return VERR_IOMMU_CMD_INVALID_FORMAT;
-#else
-            return VINF_SUCCESS;
-#endif
         }
 
@@ -4898,5 +4965,4 @@
             if (pThis->ExtFeat.n.u1InvAllSup)
             {
-#ifdef IOMMU_WITH_IOTLBE_CACHE
                 PCCMD_INV_IOMMU_ALL_T pCmdInvAll = (PCCMD_INV_IOMMU_ALL_T)pCmd;
                 AssertCompile(sizeof(*pCmdInvAll) == sizeof(*pCmd));
@@ -4906,13 +4972,14 @@
                     && !(pCmdInvAll->au64[1] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_1_VALID_MASK))
                 {
+#ifdef IOMMU_WITH_DTE_CACHE
                     iommuAmdDteCacheRemoveAll(pDevIns);
+#endif
+#ifdef IOMMU_WITH_IOTLBE_CACHE
                     iommuAmdIotlbRemoveAll(pDevIns);
+#endif
                     return VINF_SUCCESS;
                 }
                 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
                 return VERR_IOMMU_CMD_INVALID_FORMAT;
-#else
-                return VINF_SUCCESS;
-#endif
             }
             iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
@@ -5809,6 +5876,6 @@
     if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
     {
-        uint16_t const cEntries    = IOMMU_GET_INTR_TAB_ENTRIES(pDte);
-        uint16_t const cbIntrTable = IOMMU_GET_INTR_TAB_LEN(pDte);
+        uint16_t const cEntries    = IOMMU_DTE_GET_INTR_TAB_ENTRIES(pDte);
+        uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte);
         pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length     = %#x (%u entries, %u bytes)\n", pszPrefix, uIntrTabLen, cEntries,
                         cbIntrTable);
@@ -5865,4 +5932,37 @@
 
 
+# ifdef IOMMU_WITH_DTE_CACHE
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfoDteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+    RT_NOREF(pszArgs);
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+
+    uint16_t const cDteCache = RT_ELEMENTS(pThis->aDeviceIds);
+    pHlp->pfnPrintf(pHlp, "DTE Cache: Capacity=%u entries\n", cDteCache);
+    for (uint16_t i = 0; i < cDteCache; i++)
+    {
+        uint16_t const uDeviceId = pThis->aDeviceIds[i];
+        if (uDeviceId)
+        {
+            pHlp->pfnPrintf(pHlp, " Entry[%u]: Device=%#x (BDF %02x:%02x.%d)\n", i, uDeviceId,
+                            (uDeviceId >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
+                            (uDeviceId >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
+                            uDeviceId & VBOX_PCI_DEVFN_FUN_MASK);
+
+            PCDTECACHE pDteCache = &pThis->aDteCache[i];
+            pHlp->pfnPrintf(pHlp, "  Flags            = %#x\n", pDteCache->fFlags);
+            pHlp->pfnPrintf(pHlp, "  Domain Id        = %u\n",  pDteCache->uDomainId);
+            pHlp->pfnPrintf(pHlp, "\n");
+        }
+    }
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+}
+# endif /* IOMMU_WITH_DTE_CACHE */
+
+
 # ifdef IOMMU_WITH_IOTLBE_CACHE
 /**
@@ -5878,12 +5978,13 @@
         {
             pHlp->pfnPrintf(pHlp, "IOTLBEs for domain %u (%#x):\n", uDomainId, uDomainId);
-            PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+            PIOMMU   pThis   = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+            PIOMMUCC pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
             IOTLBEINFOARG Args;
-            Args.pIommu    = pThis;
+            Args.pIommuR3  = pThisR3;
             Args.pHlp      = pHlp;
             Args.uDomainId = uDomainId;
 
             IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
-            RTAvlU64DoWithAll(&pThis->TreeIotlbe, true /* fFromLeft */, iommuAmdR3IotlbEntryInfo, &Args);
+            RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdR3IotlbEntryInfo, &Args);
             IOMMU_UNLOCK_CACHE(pDevIns, pThis);
         }
@@ -5894,8 +5995,8 @@
         pHlp->pfnPrintf(pHlp, "Missing domain ID.\n");
 }
-# endif
-
-
-#ifdef IOMMU_WITH_IRTE_CACHE
+# endif  /* IOMMU_WITH_IOTLBE_CACHE */
+
+
+# ifdef IOMMU_WITH_IRTE_CACHE
 /**
  * Gets the interrupt type name for an interrupt type in the IRTE.
@@ -5917,5 +6018,5 @@
  * @callback_method_impl{FNDBGFHANDLERDEV}
  */
-static DECLCALLBACK(void) iommuAmdR3DbgInfoIrtes(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+static DECLCALLBACK(void) iommuAmdR3DbgInfoIrteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
 {
     RT_NOREF(pszArgs);
@@ -5924,9 +6025,9 @@
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
-    uint16_t const cIrteCache = pThis->cIrteCache;
+    uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
     pHlp->pfnPrintf(pHlp, "IRTE Cache: Capacity=%u entries\n", cIrteCache);
     for (uint16_t idxIrte = 0; idxIrte < cIrteCache; idxIrte++)
     {
-        PCIRTECACHE pIrteCache = &pThis->paIrteCache[idxIrte];
+        PCIRTECACHE pIrteCache = &pThis->aIrteCache[idxIrte];
         uint32_t const uKey = pIrteCache->uKey;
         if (uKey != IOMMU_IRTE_CACHE_KEY_NIL)
@@ -5954,5 +6055,5 @@
     IOMMU_UNLOCK_CACHE(pDevIns, pThis);
 }
-#endif
+# endif /* IOMMU_WITH_IRTE_CACHE */
 
 
@@ -6166,5 +6267,5 @@
 {
     PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns);
-    PIOMMU    pThis  = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    PIOMMU   pThis  = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
     PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
     LogFlowFunc(("\n"));
@@ -6179,29 +6280,11 @@
     }
 
-#ifdef IOMMU_WITH_DTE_CACHE
-    /* Destroy the DTE cache. */
-    if (pThis->paDteCache)
-    {
-        PDMDevHlpMMHeapFree(pDevIns, pThis->paDteCache);
-        pThis->paDteCache = NULL;
-    }
-#endif
-
 #ifdef IOMMU_WITH_IOTLBE_CACHE
     /* Destroy the IOTLB cache. */
-    if (pThis->paIotlbes)
-    {
-        PDMDevHlpMMHeapFree(pDevIns, pThis->paIotlbes);
-        pThis->paIotlbes = NULL;
-        pThis->idxUnusedIotlbe = 0;
-    }
-#endif
-
-#ifdef IOMMU_WITH_IRTE_CACHE
-    /* Destroy the interrupt cache. */
-    if (pThis->paIrteCache)
-    {
-        PDMDevHlpMMHeapFree(pDevIns, pThis->paIrteCache);
-        pThis->paIrteCache = NULL;
+    if (pThisCC->paIotlbes)
+    {
+        PDMDevHlpMMHeapFree(pDevIns, pThisCC->paIotlbes);
+        pThisCC->paIotlbes = NULL;
+        pThisCC->idxUnusedIotlbe = 0;
     }
 #endif
@@ -6385,11 +6468,14 @@
      */
     PDMDevHlpDBGFInfoRegister(pDevIns, "iommu",    "Display IOMMU state.", iommuAmdR3DbgInfo);
-    PDMDevHlpDBGFInfoRegister(pDevIns, "iommudte", "Display the DTE for a device. Arguments: DeviceID.", iommuAmdR3DbgInfoDte);
-    PDMDevHlpDBGFInfoRegister(pDevIns, "iommudevtabs", "Display active IOMMU device tables.", iommuAmdR3DbgInfoDevTabs);
+    PDMDevHlpDBGFInfoRegister(pDevIns, "iommudte", "Display the DTE for a device (from memory). Arguments: DeviceID.", iommuAmdR3DbgInfoDte);
+    PDMDevHlpDBGFInfoRegister(pDevIns, "iommudevtabs", "Display I/O device tables with translation enabled.", iommuAmdR3DbgInfoDevTabs);
 #ifdef IOMMU_WITH_IOTLBE_CACHE
     PDMDevHlpDBGFInfoRegister(pDevIns, "iommutlb", "Display IOTLBs for a domain. Arguments: DomainID.", iommuAmdR3DbgInfoIotlb);
 #endif
+#ifdef IOMMU_WITH_DTE_CACHE
+    PDMDevHlpDBGFInfoRegister(pDevIns, "iommudtecache", "Display the DTE cache.", iommuAmdR3DbgInfoDteCache);
+#endif
 #ifdef IOMMU_WITH_IRTE_CACHE
-    PDMDevHlpDBGFInfoRegister(pDevIns, "iommuirtes", "Display the IRTE cache.", iommuAmdR3DbgInfoIrtes);
+    PDMDevHlpDBGFInfoRegister(pDevIns, "iommuirtecache", "Display the IRTE cache.", iommuAmdR3DbgInfoIrteCache);
 #endif
 
@@ -6439,14 +6525,14 @@
     PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteCacheLookup, STAMTYPE_PROFILE, "Profile/IrteCacheLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE cache lookup.");
 
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "Access/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "Access/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHitFull, STAMTYPE_COUNTER, "Access/CacheHitFull", STAMUNIT_OCCURENCES, "Number of accesses that was entirely in the cache.");
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheNonContig, STAMTYPE_COUNTER, "Access/CacheNonContig", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in non-contiguous translated regions.");
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCachePermDenied, STAMTYPE_COUNTER, "Access/CacheAddrDenied", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in denied permissions.");
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "Access/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions.");
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "Access/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions.");
-
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheHit, STAMTYPE_COUNTER, "Intr/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
-    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheMiss, STAMTYPE_COUNTER, "Intr/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "MemAccess/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "MemAccess/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHitFull, STAMTYPE_COUNTER, "MemAccess/CacheHitFull", STAMUNIT_OCCURENCES, "Number of accesses that was entirely in the cache.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheNonContig, STAMTYPE_COUNTER, "MemAccess/CacheNonContig", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in non-contiguous translated regions.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCachePermDenied, STAMTYPE_COUNTER, "MemAccess/CacheAddrDenied", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in denied permissions.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "MemAccess/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "MemAccess/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions.");
+
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheHit, STAMTYPE_COUNTER, "Interrupt/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheMiss, STAMTYPE_COUNTER, "Interrupt/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
 # endif
 
@@ -6471,18 +6557,6 @@
     AssertLogRelRCReturn(rc, rc);
 
-    /*
-     * Allocate the device table entry cache.
-     * PCI devices are hotpluggable and we don't have a way of querying the bus for all
-     * assigned PCI BDF slots. So while this wastes some memory, it should work regardless
-     * of how code, features and devices around the IOMMU change.
-     */
-    size_t cbCache = 0;
-    size_t const cbDteCache = sizeof(DTECACHE) * IOMMU_DTE_CACHE_MAX;
-    AssertCompile(IOMMU_DTE_CACHE_MAX >= UINT16_MAX);
-    pThis->paDteCache = (PDTECACHE)PDMDevHlpMMHeapAllocZ(pDevIns, cbDteCache);
-    if (!pThis->paDteCache)
-        return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
-                                   N_("Failed to allocate %zu bytes from the hyperheap for the DTE cache."), cbDteCache);
-    cbCache += cbDteCache;
+    /* Several places in this code relies on this basic assumption - assert it! */
+    AssertCompile(RT_ELEMENTS(pThis->aDeviceIds) == RT_ELEMENTS(pThis->aDteCache));
 #endif
 
@@ -6496,46 +6570,10 @@
      */
     size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
-    pThis->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes);
-    if (!pThis->paIotlbes)
+    pThisCC->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes);
+    if (!pThisCC->paIotlbes)
         return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
                                    N_("Failed to allocate %zu bytes from the hyperheap for the IOTLB cache."), cbIotlbes);
-    RTListInit(&pThis->LstLruIotlbe);
-    cbCache += cbIotlbes;
-#endif
-
-#ifdef IOMMU_WITH_IRTE_CACHE
-    /* Maximum number of elements in the IRTE cache. */
-    PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3;
-    rc = pHlp->pfnCFGMQueryU16Def(pCfg, "InterruptCacheCount", &pThis->cIrteCache, IOMMU_IRTE_CACHE_DEFAULT);
-    if (RT_FAILURE(rc))
-        return PDMDevHlpVMSetError(pDevIns, rc, RT_SRC_POS, N_("IOMMU: failed to read InterruptCacheCount as integer"));
-    AssertCompile(IOMMU_IRTE_CACHE_DEFAULT >= IOMMU_IRTE_CACHE_MIN);
-    AssertCompile(IOMMU_IRTE_CACHE_DEFAULT <= IOMMU_IRTE_CACHE_MAX);
-    if (   pThis->cIrteCache < IOMMU_IRTE_CACHE_MIN
-        || pThis->cIrteCache > IOMMU_IRTE_CACHE_MAX)
-        return PDMDevHlpVMSetError(pDevIns, VERR_INVALID_PARAMETER, RT_SRC_POS,
-                                   N_("IOMMU: InterruptCacheCount invalid (must be between %u and %u)."),
-                                   IOMMU_IRTE_CACHE_MIN, IOMMU_IRTE_CACHE_MAX);
-
-    /*
-     * Allocate the interrupt remapping cache.
-     * This is an array of devices and their corresponding interrupt remap table entries.
-     * Typically only a handful of PCI devices are used in VMs so this is kept rather small.
-     * If we ever need to support a vast number of interrupt-remapped devices, we can
-     * implement a more sophisticated cache solution then.
-     *
-     * NOTE: IRTE cache entry keys are initialized later in this function by calling
-     *       iommuAmdR3Reset() -> iommuAmdIrteCacheRemoveAll().
-     */
-    size_t const cbIrteCache = sizeof(IRTECACHE) * pThis->cIrteCache;
-    pThis->paIrteCache = (PIRTECACHE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIrteCache);
-    if (!pThis->paIrteCache)
-        return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
-                                   N_("Failed to allocate %zu bytes from the hyperheap for the interrupt cache."), cbIrteCache);
-    cbCache += cbIrteCache;
-#endif
-
-#ifdef IOMMU_WITH_DTE_CACHE
-    LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOMMU cache\n", IOMMU_LOG_PFX, cbCache));
+    RTListInit(&pThisCC->LstLruIotlbe);
+    LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOTLB cache\n", IOMMU_LOG_PFX, cbIotlbes));
 #endif
 
