Index: /trunk/include/VBox/iommu-amd.h
===================================================================
--- /trunk/include/VBox/iommu-amd.h	(revision 87785)
+++ /trunk/include/VBox/iommu-amd.h	(revision 87786)
@@ -579,4 +579,8 @@
 /** Gets the interrupt table length (in bytes) given the DTE pointer. */
 #define IOMMU_GET_INTR_TAB_LEN(a_pDte)          (IOMMU_GET_INTR_TAB_ENTRIES(a_pDte) * sizeof(IRTE_T))
+/** Mask of interrupt control bits. */
+#define IOMMU_DTE_INTR_CTRL_MASK                0x3
+/** Gets the interrupt control bits given the DTE pointer. */
+#define IOMMU_GET_INTR_CTRL(a_pDte)             (((a_pDte)->au64[2] >> 60) & IOMMU_DTE_INTR_CTRL_MASK)
 
 /**
@@ -694,4 +698,6 @@
  *  interrupt message. See AMD IOMMU spec. 2.2.5 "Interrupt Remapping Tables". */
 #define IOMMU_MSI_DATA_IRTE_OFFSET_MASK     UINT32_C(0x000007ff)
+/** Gets the IRTE offset from the originating MSI interrupt message. */
+#define IOMMU_GET_IRTE_OFF(a_u32MsiData)    (((a_u32MsiData) & IOMMU_MSI_DATA_IRTE_OFFSET_MASK) * sizeof(IRTE_T));
 
 /**
@@ -882,4 +888,10 @@
 } CMD_INV_INTR_TABLE_T;
 AssertCompileSize(CMD_INV_INTR_TABLE_T, 16);
+/** Pointer to a invalidate interrupt table command. */
+typedef CMD_INV_INTR_TABLE_T *PCMD_INV_INTR_TABLE_T;
+/** Pointer to a const invalidate interrupt table command. */
+typedef CMD_INV_INTR_TABLE_T const *PCCMD_INV_INTR_TABLE_T;
+#define IOMMU_CMD_INV_INTR_TABLE_QWORD_0_VALID_MASK         UINT64_C(0xf00000000000ffff)
+#define IOMMU_CMD_INV_INTR_TABLE_QWORD_1_VALID_MASK         UINT64_C(0x0000000000000000)
 
 /**
Index: /trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp
===================================================================
--- /trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp	(revision 87785)
+++ /trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp	(revision 87786)
@@ -46,10 +46,38 @@
 /** The IOMMU device instance magic. */
 #define IOMMU_MAGIC                                 0x10acce55
+
 /** Enable the IOTLBE cache. */
 #define IOMMU_WITH_IOTLBE_CACHE
+/** Enable the interrupt cache. */
+#define IOMMU_WITH_IRTE_CACHE
+
+/* The DTE cache is mandatory for the IOTLB or interrupt cache to work. */
+#if defined(IOMMU_WITH_IOTLBE_CACHE) || defined(IOMMU_WITH_IRTE_CACHE)
+# define IOMMU_WITH_DTE_CACHE
+#endif
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+/** The maximum number of interrupt cache entries configurable through CFGM. */
+# define IOMMU_IRTE_CACHE_MAX                       32
+/** The default number of interrupt cache entries. */
+# define IOMMU_IRTE_CACHE_DEFAULT                   16
+/** The minimum number of interrupt cache entries configurable through CFGM. */
+# define IOMMU_IRTE_CACHE_MIN                       8
+
+/** A NIL IRTE cache entry key. */
+# define IOMMU_IRTE_CACHE_KEY_NIL                   (~(uint32_t)0U)
+/** Gets the device ID from an IRTE cache entry key. */
+#define IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(a_Key)   RT_HIWORD(a_Key)
+/** Gets the IOVA from the IOTLB entry key. */
+# define IOMMU_IRTE_CACHE_KEY_GET_OFF(a_Key)        RT_LOWORD(a_Key)
+/** Makes an IRTE cache entry key.
+ *
+ * Bits 31:16 is the device ID (Bus, Device, Function).
+ * Bits  15:0 is the the offset into the IRTE table.
+ */
+# define IOMMU_IRTE_CACHE_KEY_MAKE(a_DevId, a_off)  RT_MAKE_U32(a_off, a_DevId)
+#endif  /* IOMMU_WITH_IRTE_CACHE */
 
 #ifdef IOMMU_WITH_IOTLBE_CACHE
-/** The maximum number of DTE entries. */
-# define IOMMU_DTE_CACHE_MAX                        UINT16_MAX
 /** The maximum number of IOTLB entries. */
 # define IOMMU_IOTLBE_MAX                           96
@@ -60,5 +88,5 @@
 /** The number of bits to shift for the domain ID of the IOTLBE key. */
 # define IOMMU_IOTLB_DOMAIN_ID_SHIFT                40
-/** The mask of bits for the domain ID of the IOTLBE key. */
+/** A NIL IOTLB key. */
 # define IOMMU_IOTLB_KEY_NIL                        UINT64_C(0)
 /** Gets the domain ID from an IOTLB entry key. */
@@ -77,4 +105,71 @@
 # define IOMMU_IOTLB_KEY_MAKE(a_DomainId, a_uIova)  (  ((uint64_t)(a_DomainId) << IOMMU_IOTLB_DOMAIN_ID_SHIFT) \
                                                      | (((a_uIova) >> X86_PAGE_4K_SHIFT) & IOMMU_IOTLB_IOVA_MASK))
+#endif  /* IOMMU_WITH_IOTLBE_CACHE */
+
+#ifdef IOMMU_WITH_DTE_CACHE
+/** The maximum number of DTE entries. */
+# define IOMMU_DTE_CACHE_MAX                        UINT16_MAX
+
+/** @name IOMMU_DTECACHE_F_XXX: DTE cache flags.
+ *
+ *  Some of these flags are "basic" i.e. they correspond directly to their bits in
+ *  the DTE. The rest of the flags are based on checks or operations on several DTE
+ *  bits.
+ *
+ *  The basic flags are:
+ *    - VALID                (DTE.V)
+ *    - IO_PERM_READ         (DTE.IR)
+ *    - IO_PERM_WRITE        (DTE.IW)
+ *    - IO_PERM_RSVD         (bit following DTW.IW reserved for future & to keep
+ *                            masking consistent)
+ *    - SUPPRESS_ALL_IOPF    (DTE.SA)
+ *    - SUPPRESS_IOPF        (DTE.SE)
+ *    - INTR_MAP_VALID       (DTE.IV)
+ *    - IGNORE_UNMAPPED_INTR (DTE.IG)
+ *
+ *  @see iommuAmdGetBasicDevFlags()
+ *  @{ */
+/** The DTE is present. */
+# define IOMMU_DTECACHE_F_PRESENT                       RT_BIT(0)
+/** The DTE is valid. */
+# define IOMMU_DTECACHE_F_VALID                         RT_BIT(1)
+/** The DTE permissions apply for address translations. */
+# define IOMMU_DTECACHE_F_IO_PERM                       RT_BIT(2)
+/** DTE permission - I/O read allowed. */
+# define IOMMU_DTECACHE_F_IO_PERM_READ                  RT_BIT(3)
+/** DTE permission - I/O write allowed. */
+# define IOMMU_DTECACHE_F_IO_PERM_WRITE                 RT_BIT(4)
+/** DTE permission - reserved. */
+# define IOMMU_DTECACHE_F_IO_PERM_RSVD                  RT_BIT(5)
+/** Address translation required. */
+# define IOMMU_DTECACHE_F_ADDR_TRANSLATE                RT_BIT(6)
+/** Suppress all I/O page faults. */
+# define IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF             RT_BIT(7)
+/** Suppress I/O page faults. */
+# define IOMMU_DTECACHE_F_SUPPRESS_IOPF                 RT_BIT(8)
+/** Interrupt map valid. */
+# define IOMMU_DTECACHE_F_INTR_MAP_VALID                RT_BIT(9)
+/** Ignore unmapped interrupts. */
+# define IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR          RT_BIT(10)
+/** An I/O page fault has been raised for this device. */
+# define IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED          RT_BIT(11)
+/** Fixed and arbitrary interrupt control: Target Abort. */
+# define IOMMU_DTECACHE_F_INTR_CTRL_TARGET_ABORT        RT_BIT(12)
+/** Fixed and arbitrary interrupt control: Forward unmapped. */
+# define IOMMU_DTECACHE_F_INTR_CTRL_FWD_UNMAPPED        RT_BIT(13)
+/** Fixed and arbitrary interrupt control: Remapped. */
+# define IOMMU_DTECACHE_F_INTR_CTRL_REMAPPED            RT_BIT(14)
+/** Fixed and arbitrary interrupt control: Reserved. */
+# define IOMMU_DTECACHE_F_INTR_CTRL_RSVD                RT_BIT(15)
+/** @} */
+
+/** The number of bits to shift I/O device flags for DTE permissions. */
+# define IOMMU_DTECACHE_F_IO_PERM_SHIFT                 3
+/** The mask of DTE permissions in I/O device flags. */
+# define IOMMU_DTECACHE_F_IO_PERM_MASK                  0x3
+/** The number of bits to shift I/O device flags for interrupt control bits. */
+# define IOMMU_DTECACHE_F_INTR_CTRL_SHIFT               12
+/** The mask of interrupt control bits in I/O device flags. */
+# define IOMMU_DTECACHE_F_INTR_CTRL_MASK                0x3
 
 /** Acquires the cache lock. */
@@ -100,54 +195,5 @@
 /** Releases the cache lock.  */
 # define IOMMU_UNLOCK_CACHE(a_pDevIns, a_pThis)     PDMDevHlpCritSectLeave((a_pDevIns), &(a_pThis)->CritSectCache)
-#endif
-
-/** @name IOMMU_DEV_F_XXX: I/O device flags.
- *
- *  Some of these flags are "basic" i.e. they correspond directly to their bits in
- *  the DTE. The rest of the flags are based on checks or operations on several DTE
- *  bits.
- *
- *  The basic flags are:
- *    - VALID                (DTE.V)
- *    - IO_PERM_READ         (DTE.IR)
- *    - IO_PERM_WRITE        (DTE.IW)
- *    - IO_PERM_RSVD         (bit following DTW.IW reserved for future & to keep
- *                            masking consistent)
- *    - SUPPRESS_ALL_IOPF    (DTE.SA)
- *    - SUPPRESS_IOPF        (DTE.SE)
- *    - INTR_MAP_VALID       (DTE.IV)
- *    - IGNORE_UNMAPPED_INTR (DTE.IG)
- *
- *  @sa iommuAmdGetBasicDevFlags()
- *  @{ */
-/** The DTE is present. */
-#define IOMMU_DEV_F_PRESENT                         RT_BIT(0)
-/** The DTE is valid. */
-#define IOMMU_DEV_F_VALID                           RT_BIT(1)
-/** DTE permissions apply for address translations. */
-#define IOMMU_DEV_F_IO_PERM                         RT_BIT(2)
-/** DTE permission - I/O read allowed. */
-#define IOMMU_DEV_F_IO_PERM_READ                    RT_BIT(3)
-/** DTE permission - I/O write allowed. */
-#define IOMMU_DEV_F_IO_PERM_WRITE                   RT_BIT(4)
-/** DTE permission - reserved. */
-#define IOMMU_DEV_F_IO_PERM_RSVD                    RT_BIT(5)
-/** Address translation required. */
-#define IOMMU_DEV_F_ADDR_TRANSLATE                  RT_BIT(6)
-/** Suppress all I/O page faults. */
-#define IOMMU_DEV_F_SUPPRESS_ALL_IOPF               RT_BIT(7)
-/** Suppress I/O page faults. */
-#define IOMMU_DEV_F_SUPPRESS_IOPF                   RT_BIT(8)
-/** Interrupt map valid. */
-#define IOMMU_DEV_F_INTR_MAP_VALID                  RT_BIT(9)
-/** Ignore unmapped interrupts. */
-#define IOMMU_DEV_F_IGNORE_UNMAPPED_INTR            RT_BIT(10)
-/** An I/O page fault has been raised for this device. */
-#define IOMMU_DEV_F_IO_PAGE_FAULT_RAISED            RT_BIT(11)
-/** @} */
-/** The number of bits to shift I/O device flags for DTE permissions. */
-#define IOMMU_DEV_F_IO_PERM_SHIFT                   3
-/** The mask of DTE permissions in I/O device flags. */
-#define IOMMU_DEV_F_IO_PERM_MASK                    0x3
+#endif  /* IOMMU_WITH_DTE_CACHE */
 
 /** Gets the page offset mask given the number of bits to shift. */
@@ -231,24 +277,25 @@
 typedef IOADDRRANGE const *PCIOADDRRANGE;
 
-/**
- * IOMMU I/O Device.
- * Used for caching as well as passing flags to events.
- */
-typedef struct IODEVICE
-{
-    /** This device's flags, see IOMMU_DEV_F_XXX. */
-    uint16_t         fFlags;
+#ifdef IOMMU_WITH_DTE_CACHE
+/**
+ * Device Table Entry Cache.
+ */
+typedef struct DTECACHE
+{
+    /** This device's flags, see IOMMU_DTECACHE_F_XXX. */
+    uint16_t        fFlags;
     /** The domain ID assigned for this device by software. */
     uint16_t        uDomainId;
-} IODEVICE;
+} DTECACHE;
 /** Pointer to an I/O device struct. */
-typedef IODEVICE *PIODEVICE;
+typedef DTECACHE *PDTECACHE;
 /** Pointer to a const I/O device struct. */
-typedef IODEVICE *PCIODEVICE;
-AssertCompileSize(IODEVICE, 4);
+typedef DTECACHE *PCDTECACHE;
+AssertCompileSize(DTECACHE, 4);
+#endif  /* IOMMU_WITH_DTE_CACHE */
 
 #ifdef IOMMU_WITH_IOTLBE_CACHE
 /**
- * IOMMU I/O TLB Entry.
+ * I/O TLB Entry.
  * Keep this as small and aligned as possible.
  */
@@ -272,4 +319,22 @@
 #endif  /* IOMMU_WITH_IOTLBE_CACHE */
 
+#ifdef IOMMU_WITH_IRTE_CACHE
+/**
+ * Interrupt Remap Table Entry Cache.
+ */
+typedef struct IRTECACHE
+{
+    /** The IRTE. */
+    IRTE_T              Irte;
+    /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */
+    uint32_t            uKey;
+} IRTECACHE;
+/** Pointer to an IRTE cache struct. */
+typedef IRTECACHE *PIRTECACHE;
+/** Pointer to a const IRTE cache struct. */
+typedef IRTECACHE const *PCIRTECACHE;
+AssertCompileSizeAlignment(IRTECACHE, 4);
+#endif /* IOMMU_WITH_IRTE_CACHE */
+
 /**
  * The shared IOMMU device state.
@@ -296,12 +361,14 @@
     IOMMMIOHANDLE               hMmio;
 
-#ifdef IOMMU_WITH_IOTLBE_CACHE
+#ifdef IOMMU_WITH_DTE_CACHE
     /** The critsect that protects the cache from concurrent access. */
     PDMCRITSECT                 CritSectCache;
-    /** L1 Cache - Maps [DeviceId] to [DomainId]. */
-    PIODEVICE                   paDevices;
+    /** Maps [DeviceId] to [DomainId]. */
+    PDTECACHE                   paDteCache;
+#endif
+#ifdef IOMMU_WITH_IOTLBE_CACHE
     /** Pointer to array of pre-allocated IOTLBEs. */
     PIOTLBE                     paIotlbes;
-    /** L2 Cache - Maps [DomainId,Iova] to [IOTLBE]. */
+    /** Maps [DomainId,Iova] to [IOTLBE]. */
     AVLU64TREE                  TreeIotlbe;
     /** LRU list anchor for IOTLB entries. */
@@ -311,4 +378,12 @@
     /** Number of cached IOTLB entries in the tree. */
     uint32_t                    cCachedIotlbes;
+#endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+    /** Maps [DeviceId] to [IRTE]. */
+    PIRTECACHE                  paIrteCache;
+    /** Maximum number of entries in the IRTE cache. */
+    uint16_t                    cIrteCache;
+    /** Padding. */
+    uint16_t                    auPadding[3];
 #endif
 
@@ -465,6 +540,9 @@
     STAMCOUNTER                 StatIotlbeLazyEvictReuse;  /**< Number of IOTLB entries re-used after lazy eviction. */
 
+    STAMPROFILEADV              StatProfDteLookup;         /**< Profiling of I/O page walk (from memory). */
     STAMPROFILEADV              StatProfIotlbeLookup;      /**< Profiling of IOTLB entry lookup (from cache). */
-    STAMPROFILEADV              StatProfDteLookup;         /**< Profiling of I/O page walk (from memory). */
+
+    STAMPROFILEADV              StatProfIrteLookup;        /**< Profiling of IRTE entry lookup (from memory). */
+    STAMPROFILEADV              StatProfIrteCacheLookup;   /**< Profiling of IRTE entry lookup (from cache). */
 
     STAMCOUNTER                 StatAccessCacheHit;        /**< Number of IOTLB cache hits. */
@@ -475,4 +553,7 @@
     STAMCOUNTER                 StatAccessDteNonContig;    /**< Number of DTE accesses resulting in non-contiguous access. */
     STAMCOUNTER                 StatAccessDtePermDenied;   /**< Number of DTE accesses resulting in insufficient permissions. */
+
+    STAMCOUNTER                 StatIntrCacheHit;          /**< Number of interrupt cache hits. */
+    STAMCOUNTER                 StatIntrCacheMiss;         /**< Number of interrupt cache misses. */
     /** @} */
 #endif
@@ -486,9 +567,14 @@
 AssertCompileMemberAlignment(IOMMU, hEvtCmdThread, 8);
 AssertCompileMemberAlignment(IOMMU, hMmio, 8);
+#ifdef IOMMU_WITH_DTE_CACHE
+AssertCompileMemberAlignment(IOMMU, paDteCache, 8);
+#endif
 #ifdef IOMMU_WITH_IOTLBE_CACHE
-AssertCompileMemberAlignment(IOMMU, paDevices, 8);
 AssertCompileMemberAlignment(IOMMU, paIotlbes, 8);
 AssertCompileMemberAlignment(IOMMU, TreeIotlbe, 8);
 AssertCompileMemberAlignment(IOMMU, LstLruIotlbe, 8);
+#endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+AssertCompileMemberAlignment(IOMMU, paIrteCache, 8);
 #endif
 AssertCompileMemberAlignment(IOMMU, IommuBar, 8);
@@ -708,24 +794,4 @@
 
 
-#if 0
-/**
- * Gets the number of (unconsumed) commands in the command buffer.
- *
- * @returns The number of commands in the command buffer.
- * @param   pThis     The IOMMU device state.
- */
-static uint32_t iommuAmdGetCmdBufEntryCount(PIOMMU pThis)
-{
-    uint32_t const idxTail = pThis->CmdBufTailPtr.n.off >> IOMMU_CMD_GENERIC_SHIFT;
-    uint32_t const idxHead = pThis->CmdBufHeadPtr.n.off >> IOMMU_CMD_GENERIC_SHIFT;
-    if (idxTail >= idxHead)
-        return idxTail - idxHead;
-
-    uint32_t const cMaxCmds = iommuAmdGetBufMaxEntries(pThis->CmdBufBaseAddr.n.u4Len);
-    return cMaxCmds - idxHead + idxTail;
-}
-#endif
-
-
 /**
  * Checks whether two consecutive I/O page lookup results translates to a physically
@@ -767,14 +833,14 @@
     if (pDte->n.u1Valid)
     {
-        fFlags |= IOMMU_DEV_F_VALID;
+        fFlags |= IOMMU_DTECACHE_F_VALID;
 
         if (pDte->n.u1SuppressAllPfEvents)
-            fFlags |= IOMMU_DEV_F_SUPPRESS_ALL_IOPF;
+            fFlags |= IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF;
         if (pDte->n.u1SuppressPfEvents)
-            fFlags |= IOMMU_DEV_F_SUPPRESS_IOPF;
+            fFlags |= IOMMU_DTECACHE_F_SUPPRESS_IOPF;
 
         uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
-        AssertCompile(IOMMU_DEV_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
-        fFlags |= fDtePerm << IOMMU_DEV_F_IO_PERM_SHIFT;
+        AssertCompile(IOMMU_DTECACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
+        fFlags |= fDtePerm << IOMMU_DTECACHE_F_IO_PERM_SHIFT;
     }
 
@@ -782,9 +848,33 @@
     if (pDte->n.u1IntrMapValid)
     {
-        fFlags |= IOMMU_DEV_F_INTR_MAP_VALID;
+        fFlags |= IOMMU_DTECACHE_F_INTR_MAP_VALID;
         if (pDte->n.u1IgnoreUnmappedIntrs)
-            fFlags |= IOMMU_DEV_F_IGNORE_UNMAPPED_INTR;
+            fFlags |= IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR;
+
+        uint16_t const fIntrCtrl = IOMMU_GET_INTR_CTRL(pDte);
+        AssertCompile(IOMMU_DTECACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK);
+        fFlags |= fIntrCtrl << IOMMU_DTECACHE_F_INTR_CTRL_SHIFT;
     }
     return fFlags;
+}
+
+
+/**
+ * Remaps the source MSI to the destination MSI given the IRTE.
+ *
+ * @param   pMsiIn      The source MSI.
+ * @param   pMsiOut     Where to store the remapped MSI.
+ * @param   pIrte       The IRTE used for the remapping.
+ */
+static void iommuAmdIrteRemapMsi(PCMSIMSG pMsiIn, PMSIMSG pMsiOut, PCIRTE_T pIrte)
+{
+    /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */
+    *pMsiOut = *pMsiIn;
+
+    pMsiOut->Addr.n.u1DestMode = pIrte->n.u1DestMode;
+    pMsiOut->Addr.n.u8DestId   = pIrte->n.u8Dest;
+
+    pMsiOut->Data.n.u8Vector       = pIrte->n.u8Vector;
+    pMsiOut->Data.n.u3DeliveryMode = pIrte->n.u3IntrType;
 }
 
@@ -823,5 +913,5 @@
 
 
-#ifdef IN_RING3
+# ifdef IN_RING3
 /**
  * Dumps the IOTLB entry via the debug info helper.
@@ -863,5 +953,5 @@
     return VINF_SUCCESS;
 }
-#endif /* IN_RING3 */
+# endif /* IN_RING3 */
 
 
@@ -1158,5 +1248,5 @@
 
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
-    /** @todo Check level 1 cache? */
+    /** @todo Re-check DTE cache? */
     do
     {
@@ -1168,6 +1258,209 @@
     IOMMU_UNLOCK_CACHE(pDevIns, pThis);
 }
-
-
+#endif  /* IOMMU_WITH_IOTLBE_CACHE */
+
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+/**
+ * Looks up an IRTE cache entry.
+ *
+ * @returns Index of the found entry, or cache capacity if not found.
+ * @param   pThis       The IOMMU device state.
+ * @param   uDevId      The device ID (bus, device, function).
+ * @param   offIrte     The offset into the interrupt remap table.
+ */
+static uint16_t iommuAmdIrteCacheEntryLookup(PCIOMMU pThis, uint16_t uDevId, uint16_t offIrte)
+{
+    /** @todo Consider sorting and binary search when the cache capacity grows.
+     *  For the IRTE cache this should be okay since typically guests do not alter the
+     *  interrupt remapping once programmed, so hopefully sorting shouldn't happen
+     *  often. */
+    uint32_t const uKey = IOMMU_IRTE_CACHE_KEY_MAKE(uDevId, offIrte);
+    uint16_t const cIrteCache = pThis->cIrteCache;
+    for (uint16_t i = 0; i < cIrteCache; i++)
+    {
+        PCIRTECACHE pIrteCache = &pThis->paIrteCache[i];
+        if (pIrteCache->uKey == uKey)
+            return i;
+    }
+    return cIrteCache;
+}
+
+
+/**
+ * Gets an free/unused IRTE cache entry.
+ *
+ * @returns The index of an unused entry, or cache capacity if the cache is full.
+ * @param   pThis       The IOMMU device state.
+ */
+static uint16_t iommuAmdIrteCacheEntryGetUnused(PCIOMMU pThis)
+{
+    uint16_t const cIrteCache = pThis->cIrteCache;
+    for (uint16_t i = 0; i < cIrteCache; i++)
+    {
+        PCIRTECACHE pIrteCache = &pThis->paIrteCache[i];
+        if (pIrteCache->uKey == IOMMU_IRTE_CACHE_KEY_NIL)
+        {
+            Assert(!pIrteCache->Irte.u32);
+            return i;
+        }
+    }
+    return cIrteCache;
+}
+
+
+/**
+ * Looks up the IRTE cache for the given MSI.
+ *
+ * @returns VBox status code.
+ * @param   pDevIns     The IOMMU instance data.
+ * @param   uDevId      The device ID (bus, device, function).
+ * @param   enmOp       The IOMMU operation being performed.
+ * @param   pMsiIn      The source MSI.
+ * @param   pMsiOut     Where to store the remapped MSI.
+ */
+static int iommuAmdIrteCacheLookup(PPDMDEVINS pDevIns, uint16_t uDevId, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
+{
+    RT_NOREF(enmOp); /* May need it if we have to report errors (currently we fallback to the slower path to do that). */
+
+    int rc = VERR_NOT_FOUND;
+    /* Deal with such cases in the slower/fallback path. */
+    if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
+    { /* likely */ }
+    else
+        return rc;
+
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+
+    PCDTECACHE pDteCache = &pThis->paDteCache[uDevId];
+    if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_INTR_MAP_VALID))
+                          == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_INTR_MAP_VALID))
+    {
+        Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE);        /* Paranoia. */
+
+        /* Currently, we only cache remapping of fixed and arbitrated interrupts. */
+        uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
+        if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
+        {
+            uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTECACHE_F_INTR_CTRL_SHIFT)
+                                    & IOMMU_DTECACHE_F_INTR_CTRL_MASK;
+            if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
+            {
+                /* Interrupt table length has been verified prior to adding entries to the cache. */
+                uint16_t const offIrte = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32);
+                uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
+                if (idxIrteCache < pThis->cIrteCache)
+                {
+                    PCIRTE_T pIrte = &pThis->paIrteCache[idxIrteCache].Irte;
+                    iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte);
+                    rc = VINF_SUCCESS;
+                }
+            }
+            else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
+            {
+                *pMsiOut = *pMsiIn;
+                rc = VINF_SUCCESS;
+            }
+        }
+    }
+    else if (pDteCache->fFlags & IOMMU_DTECACHE_F_PRESENT)
+    {
+        *pMsiOut = *pMsiIn;
+        rc = VINF_SUCCESS;
+    }
+
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+    return rc;
+}
+
+
+/**
+ * Adds or updates the IRTE cache for the given IRTE.
+ *
+ * @returns VBox status code.
+ * @retval  VERR_OUT_OF_RESOURCES if the cache is full.
+ *
+ * @param   pDevIns     The IOMMU instance data.
+ * @param   uDevId      The device ID (bus, device, function).
+ * @param   offIrte     The offset into the interrupt remap table.
+ * @param   pIrte       The IRTE to cache.
+ */
+static int iommuAmdIrteCacheAdd(PPDMDEVINS pDevIns, uint16_t uDevId, uint16_t offIrte, PCIRTE_T pIrte)
+{
+    Assert(offIrte != 0xffff);  /* Shouldn't be a valid IRTE table offset since sizeof(IRTE) is a multiple of 4. */
+
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+
+    /* Find an existing entry or get an unused slot. */
+    uint16_t const cIrteCache = pThis->cIrteCache;
+    uint16_t idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, uDevId, offIrte);
+    if (idxIrteCache == pThis->cIrteCache)
+        idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis);
+
+    /* Update the cache entry. */
+    int rc;
+    if (idxIrteCache < cIrteCache)
+    {
+        PIRTECACHE pIrteCache = &pThis->paIrteCache[idxIrteCache];
+        pIrteCache->uKey      = IOMMU_IRTE_CACHE_KEY_MAKE(uDevId, offIrte);
+        pIrteCache->Irte.u32  = pIrte->u32;
+        rc = VINF_SUCCESS;
+    }
+    else
+        rc = VERR_OUT_OF_RESOURCES;
+
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+    return rc;
+}
+
+
+/**
+ * Removes IRTE cache entries for the given device ID.
+ *
+ * @param   pDevIns     The IOMMU instance data.
+ * @param   uDevId      The device ID (bus, device, function).
+ */
+static void iommuAmdIrteCacheRemove(PPDMDEVINS pDevIns, uint16_t uDevId)
+{
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+    uint16_t const cIrteCache = pThis->cIrteCache;
+    for (uint16_t i = 0; i < cIrteCache; i++)
+    {
+        PIRTECACHE pIrteCache = &pThis->paIrteCache[i];
+        if (uDevId == IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(pIrteCache->uKey))
+        {
+            pIrteCache->uKey      = IOMMU_IRTE_CACHE_KEY_NIL;
+            pIrteCache->Irte.u32  = 0;
+        }
+    }
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+}
+
+
+/**
+ * Removes all IRTE cache entries.
+ *
+ * @param   pDevIns     The IOMMU instance data.
+ */
+static void iommuAmdIrteCacheRemoveAll(PPDMDEVINS pDevIns)
+{
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+    uint16_t const cIrteCache = pThis->cIrteCache;
+    for (uint16_t i = 0; i < cIrteCache; i++)
+    {
+        PIRTECACHE pIrteCache = &pThis->paIrteCache[i];
+        pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL;
+        pIrteCache->Irte.u32 = 0;
+    }
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
+}
+#endif  /* IOMMU_WITH_IRTE_CACHE */
+
+
+#ifdef IOMMU_WITH_DTE_CACHE
 /**
  * Updates the I/O device flags for the given device ID.
@@ -1178,5 +1471,5 @@
  *                      0.
  * @param   fOrMask     The device flags (usually compound flags) to OR in with the
- *                      basic flags, see IOMMU_DEV_F_XXX. Pass 0 to flush the DTE
+ *                      basic flags, see IOMMU_DTECACHE_F_XXX. Pass 0 to flush the DTE
  *                      from the cache.
  */
@@ -1186,14 +1479,14 @@
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
-    if (fOrMask & IOMMU_DEV_F_PRESENT)
+    if (fOrMask & IOMMU_DTECACHE_F_PRESENT)
     {
         Assert(pDte);
-        pThis->paDevices[uDevId].fFlags    = iommuAmdGetBasicDevFlags(pDte) | fOrMask;
-        pThis->paDevices[uDevId].uDomainId = pDte->n.u16DomainId;
+        pThis->paDteCache[uDevId].fFlags    = iommuAmdGetBasicDevFlags(pDte) | fOrMask;
+        pThis->paDteCache[uDevId].uDomainId = pDte->n.u16DomainId;
     }
     else
     {
-        pThis->paDevices[uDevId].fFlags    = 0;
-        pThis->paDevices[uDevId].uDomainId = 0;
+        pThis->paDteCache[uDevId].fFlags    = 0;
+        pThis->paDteCache[uDevId].uDomainId = 0;
     }
 
@@ -1214,6 +1507,6 @@
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
-    if (fDevIoFlags & IOMMU_DEV_F_PRESENT)
-        pThis->paDevices[uDevId].fFlags |= fDevIoFlags;
+    if (fDevIoFlags & IOMMU_DTECACHE_F_PRESENT)
+        pThis->paDteCache[uDevId].fFlags |= fDevIoFlags;
 
     IOMMU_UNLOCK_CACHE(pDevIns, pThis);
@@ -1231,12 +1524,18 @@
     IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
 
-    size_t const cbDevices = sizeof(IODEVICE) * IOMMU_DTE_CACHE_MAX;
-    RT_BZERO(pThis->paDevices, cbDevices);
+    size_t const cbDteCache = sizeof(DTECACHE) * IOMMU_DTE_CACHE_MAX;
+    RT_BZERO(pThis->paDteCache, cbDteCache);
 
     IOMMU_UNLOCK_CACHE(pDevIns, pThis);
 }
-#endif  /* IOMMU_WITH_IOTLBE_CACHE */
-
-
+#endif  /* IOMMU_WITH_DTE_CACHE */
+
+
+/**
+ * Atomically reads the control register without locking the IOMMU device.
+ *
+ * @returns The control register.
+ * @param   pThis     The IOMMU device state.
+ */
 DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis)
 {
@@ -2888,5 +3187,5 @@
  *
  * @param   pDevIns             The IOMMU instance data.
- * @param   fIoDevFlags         The I/O device flags, see IOMMU_DEV_F_XXX.
+ * @param   fIoDevFlags         The I/O device flags, see IOMMU_DTECACHE_F_XXX.
  * @param   pIrte               The interrupt remapping table entry, can be NULL.
  * @param   enmOp               The IOMMU operation being performed.
@@ -2904,5 +3203,5 @@
 #ifdef IOMMU_WITH_IOTLBE_CACHE
 # define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId)  iommuAmdDteCacheSetFlags((a_pDevIns), (a_DevId), \
-                                                                                     IOMMU_DEV_F_IO_PAGE_FAULT_RAISED)
+                                                                                     IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED)
 #else
 # define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId)  do { } while (0)
@@ -2913,6 +3212,9 @@
         || enmOp == IOMMUOP_MEM_WRITE)
     {
-        uint16_t const fSuppressIopf    = IOMMU_DEV_F_VALID | IOMMU_DEV_F_SUPPRESS_IOPF | IOMMU_DEV_F_IO_PAGE_FAULT_RAISED;
-        uint16_t const fSuppressAllIopf = IOMMU_DEV_F_VALID | IOMMU_DEV_F_SUPPRESS_ALL_IOPF;
+        uint16_t const fSuppressIopf    = IOMMU_DTECACHE_F_VALID
+                                        | IOMMU_DTECACHE_F_SUPPRESS_IOPF
+                                        | IOMMU_DTECACHE_F_IO_PAGE_FAULT_RAISED;
+        uint16_t const fSuppressAllIopf = IOMMU_DTECACHE_F_VALID
+                                        | IOMMU_DTECACHE_F_SUPPRESS_ALL_IOPF;
         if (   (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf
             || (fIoDevFlags & fSuppressIopf) == fSuppressIopf)
@@ -2923,5 +3225,7 @@
     else if (enmOp == IOMMUOP_INTR_REQ)
     {
-        uint16_t const fSuppressIopf = IOMMU_DEV_F_VALID | IOMMU_DEV_F_INTR_MAP_VALID | IOMMU_DEV_F_IGNORE_UNMAPPED_INTR;
+        uint16_t const fSuppressIopf = IOMMU_DTECACHE_F_VALID
+                                     | IOMMU_DTECACHE_F_INTR_MAP_VALID
+                                     | IOMMU_DTECACHE_F_IGNORE_UNMAPPED_INTR;
         if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf)
             fSuppressEvtLogging = true;
@@ -3590,5 +3894,5 @@
                     {
                         /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */
-                        iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_ADDR_TRANSLATE);
+                        iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_ADDR_TRANSLATE);
                         /* Update IOTLB for the contiguous range of I/O virtual addresses. */
                         iommuAmdIotlbAddRange(pDevIns, Dte.n.u16DomainId, uIova & X86_PAGE_4K_BASE_MASK, cbPages,
@@ -3609,5 +3913,5 @@
 #if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
                     /* Update that addresses permissions of DTE apply (but omit address translation). */
-                    iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_IO_PERM);
+                    iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_IO_PERM);
 #endif
                 }
@@ -3642,5 +3946,5 @@
 #if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
             /* Update that addresses don't require translation (nor permission checks) but a DTE is present. */
-            iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DEV_F_PRESENT);
+            iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
 #endif
         }
@@ -3732,15 +4036,15 @@
 
     /*
-     * We hold the cache lock across both the device and the IOTLB lookups (if any) because
-     * we don't want the device cache to be invalidate while we perform IOTBL lookups.
+     * We hold the cache lock across both the DTE and the IOTLB lookups (if any) because
+     * we don't want the DTE cache to be invalidate while we perform IOTBL lookups.
      */
     IOMMU_LOCK_CACHE(pDevIns, pThis);
 
-    /* Lookup the device from the level 1 cache. */
-    PCIODEVICE pDevice = &pThis->paDevices[uDevId];
-    if ((pDevice->fFlags & (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_ADDR_TRANSLATE))
-                        == (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_ADDR_TRANSLATE))
-    {
-        /* Lookup the IOTLB entries from the level 2 cache. */
+    /* Lookup the DTE cache entry. */
+    PCDTECACHE pDteCache = &pThis->paDteCache[uDevId];
+    if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_ADDR_TRANSLATE))
+                          == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_ADDR_TRANSLATE))
+    {
+        /* Lookup IOTLB entries. */
         IOADDRRANGE AddrIn;
         AddrIn.uAddr = uIova;
@@ -3752,5 +4056,5 @@
         Aux.pDte      = NULL;
         Aux.uDeviceId = uDevId;
-        Aux.uDomainId = pDevice->uDomainId;
+        Aux.uDomainId = pDteCache->uDomainId;
 
         IOADDRRANGE AddrOut;
@@ -3760,10 +4064,10 @@
         *pcbContiguous = AddrOut.cb;
     }
-    else if ((pDevice->fFlags & (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_IO_PERM))
-                             == (IOMMU_DEV_F_PRESENT | IOMMU_DEV_F_VALID | IOMMU_DEV_F_IO_PERM))
+    else if ((pDteCache->fFlags & (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_IO_PERM))
+                               == (IOMMU_DTECACHE_F_PRESENT | IOMMU_DTECACHE_F_VALID | IOMMU_DTECACHE_F_IO_PERM))
     {
         /* Address translation is disabled, but DTE permissions apply. */
-        Assert(!(pDevice->fFlags & IOMMU_DEV_F_ADDR_TRANSLATE));
-        uint8_t const fDtePerm = (pDevice->fFlags >> IOMMU_DEV_F_IO_PERM_SHIFT) & IOMMU_DEV_F_IO_PERM_MASK;
+        Assert(!(pDteCache->fFlags & IOMMU_DTECACHE_F_ADDR_TRANSLATE));
+        uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTECACHE_F_IO_PERM_SHIFT) & IOMMU_DTECACHE_F_IO_PERM_MASK;
         if ((fDtePerm & fPerm) == fPerm)
         {
@@ -3779,5 +4083,5 @@
         }
     }
-    else if (pDevice->fFlags & IOMMU_DEV_F_PRESENT)
+    else if (pDteCache->fFlags & IOMMU_DTECACHE_F_PRESENT)
     {
         /* Forward addresses untranslated, without checking permissions. */
@@ -3799,7 +4103,7 @@
     {
         EVT_IO_PAGE_FAULT_T EvtIoPageFault;
-        iommuAmdIoPageFaultEventInit(uDevId, pDevice->uDomainId, uIova, true /* fPresent */,
+        iommuAmdIoPageFaultEventInit(uDevId, pDteCache->uDomainId, uIova, true /* fPresent */,
                                      false /* fRsvdNotZero */, true /* fPermDenied */, enmOp, &EvtIoPageFault);
-        iommuAmdIoPageFaultEventRaise(pDevIns, pDevice->fFlags, NULL /* pIrte */, enmOp, &EvtIoPageFault,
+        iommuAmdIoPageFaultEventRaise(pDevIns, pDteCache->fFlags, NULL /* pIrte */, enmOp, &EvtIoPageFault,
                                       kIoPageFaultType_PermDenied);
     }
@@ -3807,5 +4111,5 @@
     return rc;
 }
-#endif /* IOMMU_WITH_IOTLBE_CACHE */
+#endif /* IN_RING3 && IOMMU_WITH_IOTLBE_CACHE */
 
 
@@ -4009,5 +4313,5 @@
     RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK;
     uint16_t const cbIntrTable     = IOMMU_GET_INTR_TAB_LEN(pDte);
-    uint16_t const offIrte         = (uDataIn & IOMMU_MSI_DATA_IRTE_OFFSET_MASK) * sizeof(IRTE_T);
+    uint16_t const offIrte         = IOMMU_GET_IRTE_OFF(uDataIn);
     RTGCPHYS const GCPhysIrte      = GCPhysIntrTable + offIrte;
 
@@ -4062,5 +4366,6 @@
 
     IRTE_T Irte;
-    int rc = iommuAmdIrteRead(pDevIns, uDevId, pDte, pMsiIn->Addr.u64, pMsiIn->Data.u32, enmOp, &Irte);
+    uint32_t const uMsiInData = pMsiIn->Data.u32;
+    int rc = iommuAmdIrteRead(pDevIns, uDevId, pDte, pMsiIn->Addr.u64, uMsiInData, enmOp, &Irte);
     if (RT_SUCCESS(rc))
     {
@@ -4071,12 +4376,12 @@
                 if (Irte.n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
                 {
-                    /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */
-                    *pMsiOut = *pMsiIn;
-
-                    pMsiOut->Addr.n.u1DestMode = Irte.n.u1DestMode;
-                    pMsiOut->Addr.n.u8DestId   = Irte.n.u8Dest;
-
-                    pMsiOut->Data.n.u8Vector       = Irte.n.u8Vector;
-                    pMsiOut->Data.n.u3DeliveryMode = Irte.n.u3IntrType;
+                    iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, &Irte);
+#ifdef IOMMU_WITH_IRTE_CACHE
+                    /* Add/Update the interrupt cache with the remapped results. */
+                    uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uMsiInData);
+                    int const rcUpdate = iommuAmdIrteCacheAdd(pDevIns, uDevId, offIrte, &Irte);
+                    if (RT_FAILURE(rcUpdate))
+                        LogRelMax(1, ("%s: Warning! Interrupt cache full. Consider increasing cache capacity.\n", IOMMU_LOG_PFX));
+#endif
                     return VINF_SUCCESS;
                 }
@@ -4152,4 +4457,9 @@
                 return VERR_IOMMU_INTR_REMAP_FAILED;
             }
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+            /* Update the DTE cache -after- we've checked reserved bits (above) when the interrupt map is valid. */
+            iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
+#endif
 
             /*
@@ -4268,4 +4578,5 @@
             else
             {
+                /** @todo should be cause a PCI target abort here? */
                 LogFunc(("MSI address region invalid %#RX64\n", pMsiIn->Addr.u64));
                 return VERR_IOMMU_INTR_REMAP_FAILED;
@@ -4274,5 +4585,8 @@
         else
         {
-            /** @todo IOMMU: Add to interrupt remapping cache. */
+#ifdef IOMMU_WITH_IRTE_CACHE
+            /* Update the DTE cache that the interrupt map isn't valid. */
+            iommuAmdDteCacheUpdate(pDevIns, uDevId, &Dte, IOMMU_DTECACHE_F_PRESENT);
+#endif
             LogFlowFunc(("DTE interrupt map not valid\n"));
             *pMsiOut = *pMsiIn;
@@ -4309,7 +4623,22 @@
     {
         STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemap));
-        /** @todo Cache? */
-
-        return iommuAmdIntrTableLookup(pDevIns, uDevId, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
+
+        int rc;
+#ifdef IOMMU_WITH_IRTE_CACHE
+        STAM_PROFILE_ADV_START(&pThis->StatProfIrteCacheLookup, a);
+        rc = iommuAmdIrteCacheLookup(pDevIns, uDevId, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
+        STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteCacheLookup, a);
+        if (RT_SUCCESS(rc))
+        {
+            STAM_COUNTER_INC(&pThis->StatIntrCacheHit);
+            return VINF_SUCCESS;
+        }
+        STAM_COUNTER_INC(&pThis->StatIntrCacheMiss);
+#endif
+
+        STAM_PROFILE_ADV_START(&pThis->StatProfIrteLookup, a);
+        rc = iommuAmdIntrTableLookup(pDevIns, uDevId, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
+        STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteLookup, a);
+        return rc;
     }
 
@@ -4527,8 +4856,20 @@
         case IOMMU_CMD_INV_INTR_TABLE:
         {
-            /** @todo IOMMU: Implement this once we implement IOTLB. Pretend success until
-             *        then. */
             STAM_COUNTER_INC(&pThis->StatCmdInvIntrTable);
-            return VINF_SUCCESS;
+
+            PCCMD_INV_INTR_TABLE_T pCmdInvIntrTable = (PCCMD_INV_INTR_TABLE_T)pCmd;
+            AssertCompile(sizeof(*pCmdInvIntrTable) == sizeof(*pCmd));
+
+            /* Validate reserved bits in the command. */
+            if (   !(pCmdInvIntrTable->au64[0] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_0_VALID_MASK)
+                && !(pCmdInvIntrTable->au64[1] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_1_VALID_MASK))
+            {
+#ifdef IOMMU_WITH_IRTE_CACHE
+                iommuAmdIrteCacheRemove(pDevIns, pCmdInvIntrTable->u.u16DevId);
+#endif
+                return VINF_SUCCESS;
+            }
+            iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
+            return VERR_IOMMU_CMD_INVALID_FORMAT;
         }
 
@@ -4570,5 +4911,5 @@
                 }
                 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
-                return VERR_IOMMU_CMD_NOT_SUPPORTED;
+                return VERR_IOMMU_CMD_INVALID_FORMAT;
 #else
                 return VINF_SUCCESS;
@@ -5524,5 +5865,5 @@
 
 
-#if defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)
+# ifdef IOMMU_WITH_IOTLBE_CACHE
 /**
  * @callback_method_impl{FNDBGFHANDLERDEV}
@@ -5552,4 +5893,64 @@
     else
         pHlp->pfnPrintf(pHlp, "Missing domain ID.\n");
+}
+# endif
+
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+/**
+ * Gets the interrupt type name for an interrupt type in the IRTE.
+ *
+ * @returns The interrupt type name.
+ * @param   uIntrType       The interrupt type (as specified in the IRTE).
+ */
+static const char *iommuAmdIrteGetIntrTypeName(uint8_t uIntrType)
+{
+    switch (uIntrType)
+    {
+        case VBOX_MSI_DELIVERY_MODE_FIXED:          return "Fixed";
+        case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO:    return "Arbitrated";
+        default:                                    return "<Reserved>";
+    }
+}
+
+/**
+ * @callback_method_impl{FNDBGFHANDLERDEV}
+ */
+static DECLCALLBACK(void) iommuAmdR3DbgInfoIrtes(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
+{
+    RT_NOREF(pszArgs);
+
+    PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
+    IOMMU_LOCK_CACHE_NORET(pDevIns, pThis);
+
+    uint16_t const cIrteCache = pThis->cIrteCache;
+    pHlp->pfnPrintf(pHlp, "IRTE Cache: Capacity=%u entries\n", cIrteCache);
+    for (uint16_t idxIrte = 0; idxIrte < cIrteCache; idxIrte++)
+    {
+        PCIRTECACHE pIrteCache = &pThis->paIrteCache[idxIrte];
+        uint32_t const uKey = pIrteCache->uKey;
+        if (uKey != IOMMU_IRTE_CACHE_KEY_NIL)
+        {
+            uint16_t const uDeviceId = IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(uKey);
+            uint16_t const offIrte   = IOMMU_IRTE_CACHE_KEY_GET_OFF(uKey);
+            pHlp->pfnPrintf(pHlp, " Entry[%u]: Offset=%#x Device=%#x (BDF %02x:%02x.%d)\n",
+                            idxIrte, offIrte, uDeviceId,
+                            (uDeviceId >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK,
+                            (uDeviceId >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK,
+                            uDeviceId & VBOX_PCI_DEVFN_FUN_MASK);
+
+            PCIRTE_T pIrte = &pIrteCache->Irte;
+            pHlp->pfnPrintf(pHlp, "  Remap Enable     = %RTbool\n", pIrte->n.u1RemapEnable);
+            pHlp->pfnPrintf(pHlp, "  Suppress IOPF    = %RTbool\n", pIrte->n.u1SuppressIoPf);
+            pHlp->pfnPrintf(pHlp, "  Interrupt Type   = %#x (%s)\n", pIrte->n.u3IntrType,
+                            iommuAmdIrteGetIntrTypeName(pIrte->n.u3IntrType));
+            pHlp->pfnPrintf(pHlp, "  Request EOI      = %RTbool\n", pIrte->n.u1ReqEoi);
+            pHlp->pfnPrintf(pHlp, "  Destination mode = %s\n", pIrte->n.u1DestMode ? "Logical" : "Physical");
+            pHlp->pfnPrintf(pHlp, "  Destination Id   = %u\n", pIrte->n.u8Dest);
+            pHlp->pfnPrintf(pHlp, "  Vector           = %#x (%u)\n", pIrte->n.u8Vector, pIrte->n.u8Vector);
+            pHlp->pfnPrintf(pHlp, "\n");
+        }
+    }
+    IOMMU_UNLOCK_CACHE(pDevIns, pThis);
 }
 #endif
@@ -5747,7 +6148,12 @@
     IOMMU_UNLOCK(pDevIns, pThisCC);
 
+#ifdef IOMMU_WITH_DTE_CACHE
+    iommuAmdDteCacheRemoveAll(pDevIns);
+#endif
 #ifdef IOMMU_WITH_IOTLBE_CACHE
-    iommuAmdDteCacheRemoveAll(pDevIns);
     iommuAmdIotlbRemoveAll(pDevIns);
+#endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+    iommuAmdIrteCacheRemoveAll(pDevIns);
 #endif
 }
@@ -5773,17 +6179,29 @@
     }
 
+#ifdef IOMMU_WITH_DTE_CACHE
+    /* Destroy the DTE cache. */
+    if (pThis->paDteCache)
+    {
+        PDMDevHlpMMHeapFree(pDevIns, pThis->paDteCache);
+        pThis->paDteCache = NULL;
+    }
+#endif
+
 #ifdef IOMMU_WITH_IOTLBE_CACHE
-    /* Destroy level 1 cache. */
-    if (pThis->paDevices)
-    {
-        PDMDevHlpMMHeapFree(pDevIns, pThis->paDevices);
-        pThis->paDevices = NULL;
-    }
-
-    /* Destroy level 2 cache. */
+    /* Destroy the IOTLB cache. */
     if (pThis->paIotlbes)
     {
         PDMDevHlpMMHeapFree(pDevIns, pThis->paIotlbes);
         pThis->paIotlbes = NULL;
+        pThis->idxUnusedIotlbe = 0;
+    }
+#endif
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+    /* Destroy the interrupt cache. */
+    if (pThis->paIrteCache)
+    {
+        PDMDevHlpMMHeapFree(pDevIns, pThis->paIrteCache);
+        pThis->paIrteCache = NULL;
     }
 #endif
@@ -5972,4 +6390,7 @@
     PDMDevHlpDBGFInfoRegister(pDevIns, "iommutlb", "Display IOTLBs for a domain. Arguments: DomainID.", iommuAmdR3DbgInfoIotlb);
 #endif
+#ifdef IOMMU_WITH_IRTE_CACHE
+    PDMDevHlpDBGFInfoRegister(pDevIns, "iommuirtes", "Display the IRTE cache.", iommuAmdR3DbgInfoIrtes);
+#endif
 
 # ifdef VBOX_WITH_STATISTICS
@@ -6015,4 +6436,7 @@
     PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIotlbeLookup, STAMTYPE_PROFILE, "Profile/IotlbeLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IOTLBE lookup.");
 
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteLookup, STAMTYPE_PROFILE, "Profile/IrteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE lookup.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteCacheLookup, STAMTYPE_PROFILE, "Profile/IrteCacheLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE cache lookup.");
+
     PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "Access/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
     PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "Access/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
@@ -6022,4 +6446,7 @@
     PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "Access/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions.");
     PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "Access/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions.");
+
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheHit, STAMTYPE_COUNTER, "Intr/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits.");
+    PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheMiss, STAMTYPE_COUNTER, "Intr/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses.");
 # endif
 
@@ -6037,5 +6464,5 @@
     AssertLogRelRCReturn(rc, rc);
 
-#ifdef IOMMU_WITH_IOTLBE_CACHE
+#ifdef IOMMU_WITH_DTE_CACHE
     /*
      * Initialize the critsect of the cache.
@@ -6045,37 +6472,70 @@
 
     /*
-     * Allocate the level 1 cache (device ID to domain ID mapping).
-     * PCI devices are hotpluggable, plus we don't have a way of querying the bus for all
+     * Allocate the device table entry cache.
+     * PCI devices are hotpluggable and we don't have a way of querying the bus for all
      * assigned PCI BDF slots. So while this wastes some memory, it should work regardless
-     * of how code, features and devices around the IOMMU changes.
+     * of how code, features and devices around the IOMMU change.
      */
-    size_t const cbDevices = sizeof(IODEVICE) * IOMMU_DTE_CACHE_MAX;
+    size_t cbCache = 0;
+    size_t const cbDteCache = sizeof(DTECACHE) * IOMMU_DTE_CACHE_MAX;
     AssertCompile(IOMMU_DTE_CACHE_MAX >= UINT16_MAX);
-    pThis->paDevices = (PIODEVICE)PDMDevHlpMMHeapAllocZ(pDevIns, cbDevices);
-    if (!pThis->paDevices)
-    {
+    pThis->paDteCache = (PDTECACHE)PDMDevHlpMMHeapAllocZ(pDevIns, cbDteCache);
+    if (!pThis->paDteCache)
         return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
-                                   N_("Failed to allocate %zu bytes from the hyperheap for the IOMMU level 1 cache."), cbDevices);
-    }
-
+                                   N_("Failed to allocate %zu bytes from the hyperheap for the DTE cache."), cbDteCache);
+    cbCache += cbDteCache;
+#endif
+
+#ifdef IOMMU_WITH_IOTLBE_CACHE
     /*
-     * Allocate the level 2 cache (IOTLB entries).
+     * Allocate IOTLB entries.
      * This is allocated upfront since we expect a relatively small number of entries,
      * is more cache-line efficient and easier to track least recently used entries for
-     * eviction when the cache is full. This also prevents unpredictable behavior during
-     * the lifetime of the VM if the hyperheap gets full as allocation would fail upfront
-     * or not at all.
+     * eviction when the cache is full. This also avoids unpredictable behavior during
+     * the lifetime of the VM if the hyperheap gets full.
      */
     size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX;
     pThis->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes);
     if (!pThis->paIotlbes)
-    {
         return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
-                                   N_("Failed to allocate %zu bytes from the hyperheap for the IOMMU level 2 cache."),
-                                   cbIotlbes);
-    }
+                                   N_("Failed to allocate %zu bytes from the hyperheap for the IOTLB cache."), cbIotlbes);
     RTListInit(&pThis->LstLruIotlbe);
-
-    LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOTLB cache\n", IOMMU_LOG_PFX, cbDevices + cbIotlbes));
+    cbCache += cbIotlbes;
+#endif
+
+#ifdef IOMMU_WITH_IRTE_CACHE
+    /* Maximum number of elements in the IRTE cache. */
+    PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3;
+    rc = pHlp->pfnCFGMQueryU16Def(pCfg, "InterruptCacheCount", &pThis->cIrteCache, IOMMU_IRTE_CACHE_DEFAULT);
+    if (RT_FAILURE(rc))
+        return PDMDevHlpVMSetError(pDevIns, rc, RT_SRC_POS, N_("IOMMU: failed to read InterruptCacheCount as integer"));
+    AssertCompile(IOMMU_IRTE_CACHE_DEFAULT >= IOMMU_IRTE_CACHE_MIN);
+    AssertCompile(IOMMU_IRTE_CACHE_DEFAULT <= IOMMU_IRTE_CACHE_MAX);
+    if (   pThis->cIrteCache < IOMMU_IRTE_CACHE_MIN
+        || pThis->cIrteCache > IOMMU_IRTE_CACHE_MAX)
+        return PDMDevHlpVMSetError(pDevIns, VERR_INVALID_PARAMETER, RT_SRC_POS,
+                                   N_("IOMMU: InterruptCacheCount invalid (must be between %u and %u)."),
+                                   IOMMU_IRTE_CACHE_MIN, IOMMU_IRTE_CACHE_MAX);
+
+    /*
+     * Allocate the interrupt remapping cache.
+     * This is an array of devices and their corresponding interrupt remap table entries.
+     * Typically only a handful of PCI devices are used in VMs so this is kept rather small.
+     * If we ever need to support a vast number of interrupt-remapped devices, we can
+     * implement a more sophisticated cache solution then.
+     *
+     * NOTE: IRTE cache entry keys are initialized later in this function by calling
+     *       iommuAmdR3Reset() -> iommuAmdIrteCacheRemoveAll().
+     */
+    size_t const cbIrteCache = sizeof(IRTECACHE) * pThis->cIrteCache;
+    pThis->paIrteCache = (PIRTECACHE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIrteCache);
+    if (!pThis->paIrteCache)
+        return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS,
+                                   N_("Failed to allocate %zu bytes from the hyperheap for the interrupt cache."), cbIrteCache);
+    cbCache += cbIrteCache;
+#endif
+
+#ifdef IOMMU_WITH_DTE_CACHE
+    LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOMMU cache\n", IOMMU_LOG_PFX, cbCache));
 #endif
 
@@ -6141,4 +6601,5 @@
     /*
      * Initialize parts of the IOMMU state as it would during reset.
+     * Also initializes non-zero initial values like IRTE cache keys.
      * Must be called -after- initializing PCI config. space registers.
      */
