VirtualBox

source: vbox/trunk/src/VBox/Devices/Bus/DevIommuAmd.cpp@ 103914

Last change on this file since 103914 was 99281, checked in by vboxsync, 18 months ago

Devices/Bus/DevIommuAmd: Memory cleanup on the semevent wait failure case.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 310.9 KB
Line 
1/* $Id: DevIommuAmd.cpp 99281 2023-04-04 13:06:21Z vboxsync $ */
2/** @file
3 * IOMMU - Input/Output Memory Management Unit - AMD implementation.
4 */
5
6/*
7 * Copyright (C) 2020-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_DEV_IOMMU
33#include <VBox/msi.h>
34#include <VBox/iommu-amd.h>
35#include <VBox/vmm/pdmdev.h>
36
37#include <iprt/x86.h>
38#include <iprt/string.h>
39#include <iprt/avl.h>
40#ifdef IN_RING3
41# include <iprt/mem.h>
42#endif
43
44#include "VBoxDD.h"
45#include "DevIommuAmd.h"
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Release log prefix string. */
52#define IOMMU_LOG_PFX "AMD-IOMMU"
53/** The current saved state version. */
54#define IOMMU_SAVED_STATE_VERSION 1
55/** The IOMMU device instance magic. */
56#define IOMMU_MAGIC 0x10acce55
57
58/** Enable the IOTLBE cache only in ring-3 for now, see @bugref{9654#c95}. */
59#ifdef IN_RING3
60# define IOMMU_WITH_IOTLBE_CACHE
61#endif
62/** Enable the interrupt cache. */
63#define IOMMU_WITH_IRTE_CACHE
64
65/* The DTE cache is mandatory for the IOTLB or interrupt cache to work. */
66#if defined(IOMMU_WITH_IOTLBE_CACHE) || defined(IOMMU_WITH_IRTE_CACHE)
67# define IOMMU_WITH_DTE_CACHE
68/** The maximum number of device IDs in the cache. */
69# define IOMMU_DEV_CACHE_COUNT 16
70/** An empty device ID. */
71# define IOMMU_DTE_CACHE_KEY_NIL 0
72#endif
73
74#ifdef IOMMU_WITH_IRTE_CACHE
75/** The maximum number of IRTE cache entries. */
76# define IOMMU_IRTE_CACHE_COUNT 32
77/** A NIL IRTE cache entry key. */
78# define IOMMU_IRTE_CACHE_KEY_NIL (~(uint32_t)0U)
79/** Gets the device ID from an IRTE cache entry key. */
80#define IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(a_Key) RT_HIWORD(a_Key)
81/** Gets the IOVA from the IOTLB entry key. */
82# define IOMMU_IRTE_CACHE_KEY_GET_OFF(a_Key) RT_LOWORD(a_Key)
83/** Makes an IRTE cache entry key.
84 *
85 * Bits 31:16 is the device ID (Bus, Device, Function).
86 * Bits 15:0 is the the offset into the IRTE table.
87 */
88# define IOMMU_IRTE_CACHE_KEY_MAKE(a_DevId, a_off) RT_MAKE_U32(a_off, a_DevId)
89#endif /* IOMMU_WITH_IRTE_CACHE */
90
91#ifdef IOMMU_WITH_IOTLBE_CACHE
92/** The maximum number of IOTLB entries. */
93# define IOMMU_IOTLBE_MAX 64
94/** The mask of bits covering the domain ID in the IOTLBE key. */
95# define IOMMU_IOTLB_DOMAIN_ID_MASK UINT64_C(0xffffff0000000000)
96/** The mask of bits covering the IOVA in the IOTLBE key. */
97# define IOMMU_IOTLB_IOVA_MASK (~IOMMU_IOTLB_DOMAIN_ID_MASK)
98/** The number of bits to shift for the domain ID of the IOTLBE key. */
99# define IOMMU_IOTLB_DOMAIN_ID_SHIFT 40
100/** A NIL IOTLB key. */
101# define IOMMU_IOTLB_KEY_NIL UINT64_C(0)
102/** Gets the domain ID from an IOTLB entry key. */
103# define IOMMU_IOTLB_KEY_GET_DOMAIN_ID(a_Key) ((a_Key) >> IOMMU_IOTLB_DOMAIN_ID_SHIFT)
104/** Gets the IOVA from the IOTLB entry key. */
105# define IOMMU_IOTLB_KEY_GET_IOVA(a_Key) (((a_Key) & IOMMU_IOTLB_IOVA_MASK) << X86_PAGE_4K_SHIFT)
106/** Makes an IOTLB entry key.
107 *
108 * Address bits 63:52 of the IOVA are zero extended, so top 12 bits are free.
109 * Address bits 11:0 of the IOVA are offset into the minimum page size of 4K,
110 * so bottom 12 bits are free.
111 *
112 * Thus we use the top 24 bits of key to hold bits 15:0 of the domain ID.
113 * We use the bottom 40 bits of the key to hold bits 51:12 of the IOVA.
114 */
115# define IOMMU_IOTLB_KEY_MAKE(a_DomainId, a_uIova) ( ((uint64_t)(a_DomainId) << IOMMU_IOTLB_DOMAIN_ID_SHIFT) \
116 | (((a_uIova) >> X86_PAGE_4K_SHIFT) & IOMMU_IOTLB_IOVA_MASK))
117#endif /* IOMMU_WITH_IOTLBE_CACHE */
118
119#ifdef IOMMU_WITH_DTE_CACHE
120/** @name IOMMU_DTE_CACHE_F_XXX: DTE cache flags.
121 *
122 * Some of these flags are "basic" i.e. they correspond directly to their bits in
123 * the DTE. The rest of the flags are based on checks or operations on several DTE
124 * bits.
125 *
126 * The basic flags are:
127 * - VALID (DTE.V)
128 * - IO_PERM_READ (DTE.IR)
129 * - IO_PERM_WRITE (DTE.IW)
130 * - IO_PERM_RSVD (bit following DTW.IW reserved for future & to keep
131 * masking consistent)
132 * - SUPPRESS_ALL_IOPF (DTE.SA)
133 * - SUPPRESS_IOPF (DTE.SE)
134 * - INTR_MAP_VALID (DTE.IV)
135 * - IGNORE_UNMAPPED_INTR (DTE.IG)
136 *
137 * @see iommuAmdGetBasicDevFlags()
138 * @{ */
139/** The DTE is present. */
140# define IOMMU_DTE_CACHE_F_PRESENT RT_BIT(0)
141/** The DTE is valid. */
142# define IOMMU_DTE_CACHE_F_VALID RT_BIT(1)
143/** The DTE permissions apply for address translations. */
144# define IOMMU_DTE_CACHE_F_IO_PERM RT_BIT(2)
145/** DTE permission - I/O read allowed. */
146# define IOMMU_DTE_CACHE_F_IO_PERM_READ RT_BIT(3)
147/** DTE permission - I/O write allowed. */
148# define IOMMU_DTE_CACHE_F_IO_PERM_WRITE RT_BIT(4)
149/** DTE permission - reserved. */
150# define IOMMU_DTE_CACHE_F_IO_PERM_RSVD RT_BIT(5)
151/** Address translation required. */
152# define IOMMU_DTE_CACHE_F_ADDR_TRANSLATE RT_BIT(6)
153/** Suppress all I/O page faults. */
154# define IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF RT_BIT(7)
155/** Suppress I/O page faults. */
156# define IOMMU_DTE_CACHE_F_SUPPRESS_IOPF RT_BIT(8)
157/** Interrupt map valid. */
158# define IOMMU_DTE_CACHE_F_INTR_MAP_VALID RT_BIT(9)
159/** Ignore unmapped interrupts. */
160# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR RT_BIT(10)
161/** An I/O page fault has been raised for this device. */
162# define IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED RT_BIT(11)
163/** Fixed and arbitrary interrupt control: Target Abort. */
164# define IOMMU_DTE_CACHE_F_INTR_CTRL_TARGET_ABORT RT_BIT(12)
165/** Fixed and arbitrary interrupt control: Forward unmapped. */
166# define IOMMU_DTE_CACHE_F_INTR_CTRL_FWD_UNMAPPED RT_BIT(13)
167/** Fixed and arbitrary interrupt control: Remapped. */
168# define IOMMU_DTE_CACHE_F_INTR_CTRL_REMAPPED RT_BIT(14)
169/** Fixed and arbitrary interrupt control: Reserved. */
170# define IOMMU_DTE_CACHE_F_INTR_CTRL_RSVD RT_BIT(15)
171/** @} */
172
173/** The number of bits to shift I/O device flags for DTE permissions. */
174# define IOMMU_DTE_CACHE_F_IO_PERM_SHIFT 3
175/** The mask of DTE permissions in I/O device flags. */
176# define IOMMU_DTE_CACHE_F_IO_PERM_MASK 0x3
177/** The number of bits to shift I/O device flags for interrupt control bits. */
178# define IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT 12
179/** The mask of interrupt control bits in I/O device flags. */
180# define IOMMU_DTE_CACHE_F_INTR_CTRL_MASK 0x3
181/** The number of bits to shift for ignore-unmapped interrupts bit. */
182# define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR_SHIFT 10
183
184/** Acquires the cache lock. */
185# define IOMMU_CACHE_LOCK(a_pDevIns, a_pThis) \
186 do { \
187 int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VINF_SUCCESS); \
188 PDM_CRITSECT_RELEASE_ASSERT_RC_DEV((a_pDevIns), &(a_pThis)->CritSectCache, rcLock); \
189 } while (0)
190
191/** Releases the cache lock. */
192# define IOMMU_CACHE_UNLOCK(a_pDevIns, a_pThis) PDMDevHlpCritSectLeave((a_pDevIns), &(a_pThis)->CritSectCache)
193#endif /* IOMMU_WITH_DTE_CACHE */
194
195/** Acquires the IOMMU lock (returns a_rcBusy on contention). */
196#define IOMMU_LOCK_RET(a_pDevIns, a_pThisCC, a_rcBusy) \
197 do { \
198 int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), (a_rcBusy)); \
199 if (RT_LIKELY(rcLock == VINF_SUCCESS)) \
200 { /* likely */ } \
201 else \
202 return rcLock; \
203 } while (0)
204
205/** Acquires the IOMMU lock (can fail under extraordinary circumstance in ring-0). */
206#define IOMMU_LOCK(a_pDevIns, a_pThisCC) \
207 do { \
208 int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VINF_SUCCESS); \
209 PDM_CRITSECT_RELEASE_ASSERT_RC_DEV((a_pDevIns), NULL, rcLock); \
210 } while (0)
211
212/** Checks if the current thread owns the PDM lock. */
213# define IOMMU_ASSERT_LOCK_IS_OWNER(a_pDevIns, a_pThisCC) \
214 do \
215 { \
216 Assert((a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLockIsOwner((a_pDevIns))); \
217 NOREF(a_pThisCC); \
218 } while (0)
219
220/** Releases the PDM lock. */
221# define IOMMU_UNLOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnUnlock((a_pDevIns))
222
223/** Gets the maximum valid IOVA for the given I/O page-table level. */
224#define IOMMU_GET_MAX_VALID_IOVA(a_Level) ((X86_PAGE_4K_SIZE << ((a_Level) * 9)) - 1)
225
226
227/*********************************************************************************************************************************
228* Structures and Typedefs *
229*********************************************************************************************************************************/
230/**
231 * IOMMU operation (transaction).
232 */
233typedef enum IOMMUOP
234{
235 /** Address translation request. */
236 IOMMUOP_TRANSLATE_REQ = 0,
237 /** Memory read request. */
238 IOMMUOP_MEM_READ,
239 /** Memory write request. */
240 IOMMUOP_MEM_WRITE,
241 /** Interrupt request. */
242 IOMMUOP_INTR_REQ,
243 /** Command. */
244 IOMMUOP_CMD
245} IOMMUOP;
246/** Pointer to a IOMMU operation. */
247typedef IOMMUOP *PIOMMUOP;
248
249/**
250 * I/O page lookup.
251 */
252typedef struct IOPAGELOOKUP
253{
254 /** The translated system physical address. */
255 RTGCPHYS GCPhysSpa;
256 /** The number of offset bits in the system physical address. */
257 uint8_t cShift;
258 /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
259 uint8_t fPerm;
260} IOPAGELOOKUP;
261/** Pointer to an I/O page lookup. */
262typedef IOPAGELOOKUP *PIOPAGELOOKUP;
263/** Pointer to a const I/O page lookup. */
264typedef IOPAGELOOKUP const *PCIOPAGELOOKUP;
265
266/**
267 * I/O address range.
268 */
269typedef struct IOADDRRANGE
270{
271 /** The address (virtual or physical). */
272 uint64_t uAddr;
273 /** The size of the access in bytes. */
274 size_t cb;
275 /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */
276 uint8_t fPerm;
277} IOADDRRANGE;
278/** Pointer to an I/O address range. */
279typedef IOADDRRANGE *PIOADDRRANGE;
280/** Pointer to a const I/O address range. */
281typedef IOADDRRANGE const *PCIOADDRRANGE;
282
283#ifdef IOMMU_WITH_DTE_CACHE
284/**
285 * Device Table Entry Cache.
286 */
287typedef struct DTECACHE
288{
289 /** This device's flags, see IOMMU_DTE_CACHE_F_XXX. */
290 uint16_t fFlags;
291 /** The domain ID assigned for this device by software. */
292 uint16_t idDomain;
293} DTECACHE;
294/** Pointer to an I/O device struct. */
295typedef DTECACHE *PDTECACHE;
296/** Pointer to a const I/O device struct. */
297typedef DTECACHE *PCDTECACHE;
298AssertCompileSize(DTECACHE, 4);
299#endif /* IOMMU_WITH_DTE_CACHE */
300
301#ifdef IOMMU_WITH_IOTLBE_CACHE
302/**
303 * I/O TLB Entry.
304 * Keep this as small and aligned as possible.
305 */
306typedef struct IOTLBE
307{
308 /** The AVL tree node. */
309 AVLU64NODECORE Core;
310 /** The least recently used (LRU) list node. */
311 RTLISTNODE NdLru;
312 /** The I/O page lookup results of the translation. */
313 IOPAGELOOKUP PageLookup;
314 /** Whether the entry needs to be evicted from the cache. */
315 bool fEvictPending;
316} IOTLBE;
317/** Pointer to an IOMMU I/O TLB entry struct. */
318typedef IOTLBE *PIOTLBE;
319/** Pointer to a const IOMMU I/O TLB entry struct. */
320typedef IOTLBE const *PCIOTLBE;
321AssertCompileSizeAlignment(IOTLBE, 8);
322AssertCompileMemberOffset(IOTLBE, Core, 0);
323#endif /* IOMMU_WITH_IOTLBE_CACHE */
324
325#ifdef IOMMU_WITH_IRTE_CACHE
326/**
327 * Interrupt Remap Table Entry Cache.
328 */
329typedef struct IRTECACHE
330{
331 /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */
332 uint32_t uKey;
333 /** The IRTE. */
334 IRTE_T Irte;
335} IRTECACHE;
336/** Pointer to an IRTE cache struct. */
337typedef IRTECACHE *PIRTECACHE;
338/** Pointer to a const IRTE cache struct. */
339typedef IRTECACHE const *PCIRTECACHE;
340AssertCompileSizeAlignment(IRTECACHE, 4);
341#endif /* IOMMU_WITH_IRTE_CACHE */
342
343/**
344 * The shared IOMMU device state.
345 */
346typedef struct IOMMU
347{
348 /** IOMMU device index (0 is at the top of the PCI tree hierarchy). */
349 uint32_t idxIommu;
350 /** IOMMU magic. */
351 uint32_t u32Magic;
352
353 /** The MMIO handle. */
354 IOMMMIOHANDLE hMmio;
355 /** The event semaphore the command thread waits on. */
356 SUPSEMEVENT hEvtCmdThread;
357 /** Whether the command thread has been signaled for wake up. */
358 bool volatile fCmdThreadSignaled;
359 /** Padding. */
360 bool afPadding0[3];
361 /** The IOMMU PCI address. */
362 PCIBDF uPciAddress;
363
364#ifdef IOMMU_WITH_DTE_CACHE
365 /** The critsect that protects the cache from concurrent access. */
366 PDMCRITSECT CritSectCache;
367 /** Array of device IDs. */
368 uint16_t aDeviceIds[IOMMU_DEV_CACHE_COUNT];
369 /** Array of DTE cache entries. */
370 DTECACHE aDteCache[IOMMU_DEV_CACHE_COUNT];
371#endif
372#ifdef IOMMU_WITH_IRTE_CACHE
373 /** Array of IRTE cache entries. */
374 IRTECACHE aIrteCache[IOMMU_IRTE_CACHE_COUNT];
375#endif
376
377 /** @name PCI: Base capability block registers.
378 * @{ */
379 IOMMU_BAR_T IommuBar; /**< IOMMU base address register. */
380 /** @} */
381
382 /** @name MMIO: Control and status registers.
383 * @{ */
384 DEV_TAB_BAR_T aDevTabBaseAddrs[8]; /**< Device table base address registers. */
385 CMD_BUF_BAR_T CmdBufBaseAddr; /**< Command buffer base address register. */
386 EVT_LOG_BAR_T EvtLogBaseAddr; /**< Event log base address register. */
387 IOMMU_CTRL_T Ctrl; /**< IOMMU control register. */
388 IOMMU_EXCL_RANGE_BAR_T ExclRangeBaseAddr; /**< IOMMU exclusion range base register. */
389 IOMMU_EXCL_RANGE_LIMIT_T ExclRangeLimit; /**< IOMMU exclusion range limit. */
390 IOMMU_EXT_FEAT_T ExtFeat; /**< IOMMU extended feature register. */
391 /** @} */
392
393 /** @name MMIO: Peripheral Page Request (PPR) Log registers.
394 * @{ */
395 PPR_LOG_BAR_T PprLogBaseAddr; /**< PPR Log base address register. */
396 IOMMU_HW_EVT_HI_T HwEvtHi; /**< IOMMU hardware event register (Hi). */
397 IOMMU_HW_EVT_LO_T HwEvtLo; /**< IOMMU hardware event register (Lo). */
398 IOMMU_HW_EVT_STATUS_T HwEvtStatus; /**< IOMMU hardware event status. */
399 /** @} */
400
401 /** @todo IOMMU: SMI filter. */
402
403 /** @name MMIO: Guest Virtual-APIC Log registers.
404 * @{ */
405 GALOG_BAR_T GALogBaseAddr; /**< Guest Virtual-APIC Log base address register. */
406 GALOG_TAIL_ADDR_T GALogTailAddr; /**< Guest Virtual-APIC Log Tail address register. */
407 /** @} */
408
409 /** @name MMIO: Alternate PPR and Event Log registers.
410 * @{ */
411 PPR_LOG_B_BAR_T PprLogBBaseAddr; /**< PPR Log B base address register. */
412 EVT_LOG_B_BAR_T EvtLogBBaseAddr; /**< Event Log B base address register. */
413 /** @} */
414
415 /** @name MMIO: Device-specific feature registers.
416 * @{ */
417 DEV_SPECIFIC_FEAT_T DevSpecificFeat; /**< Device-specific feature extension register (DSFX). */
418 DEV_SPECIFIC_CTRL_T DevSpecificCtrl; /**< Device-specific control extension register (DSCX). */
419 DEV_SPECIFIC_STATUS_T DevSpecificStatus; /**< Device-specific status extension register (DSSX). */
420 /** @} */
421
422 /** @name MMIO: MSI Capability Block registers.
423 * @{ */
424 MSI_MISC_INFO_T MiscInfo; /**< MSI Misc. info registers / MSI Vector registers. */
425 /** @} */
426
427 /** @name MMIO: Performance Optimization Control registers.
428 * @{ */
429 IOMMU_PERF_OPT_CTRL_T PerfOptCtrl; /**< IOMMU Performance optimization control register. */
430 /** @} */
431
432 /** @name MMIO: x2APIC Control registers.
433 * @{ */
434 IOMMU_XT_GEN_INTR_CTRL_T XtGenIntrCtrl; /**< IOMMU X2APIC General interrupt control register. */
435 IOMMU_XT_PPR_INTR_CTRL_T XtPprIntrCtrl; /**< IOMMU X2APIC PPR interrupt control register. */
436 IOMMU_XT_GALOG_INTR_CTRL_T XtGALogIntrCtrl; /**< IOMMU X2APIC Guest Log interrupt control register. */
437 /** @} */
438
439 /** @name MMIO: Memory Address Routing & Control (MARC) registers.
440 * @{ */
441 MARC_APER_T aMarcApers[4]; /**< MARC Aperture Registers. */
442 /** @} */
443
444 /** @name MMIO: Reserved register.
445 * @{ */
446 IOMMU_RSVD_REG_T RsvdReg; /**< IOMMU Reserved Register. */
447 /** @} */
448
449 /** @name MMIO: Command and Event Log pointer registers.
450 * @{ */
451 CMD_BUF_HEAD_PTR_T CmdBufHeadPtr; /**< Command buffer head pointer register. */
452 CMD_BUF_TAIL_PTR_T CmdBufTailPtr; /**< Command buffer tail pointer register. */
453 EVT_LOG_HEAD_PTR_T EvtLogHeadPtr; /**< Event log head pointer register. */
454 EVT_LOG_TAIL_PTR_T EvtLogTailPtr; /**< Event log tail pointer register. */
455 /** @} */
456
457 /** @name MMIO: Command and Event Status register.
458 * @{ */
459 IOMMU_STATUS_T Status; /**< IOMMU status register. */
460 /** @} */
461
462 /** @name MMIO: PPR Log Head and Tail pointer registers.
463 * @{ */
464 PPR_LOG_HEAD_PTR_T PprLogHeadPtr; /**< IOMMU PPR log head pointer register. */
465 PPR_LOG_TAIL_PTR_T PprLogTailPtr; /**< IOMMU PPR log tail pointer register. */
466 /** @} */
467
468 /** @name MMIO: Guest Virtual-APIC Log Head and Tail pointer registers.
469 * @{ */
470 GALOG_HEAD_PTR_T GALogHeadPtr; /**< Guest Virtual-APIC log head pointer register. */
471 GALOG_TAIL_PTR_T GALogTailPtr; /**< Guest Virtual-APIC log tail pointer register. */
472 /** @} */
473
474 /** @name MMIO: PPR Log B Head and Tail pointer registers.
475 * @{ */
476 PPR_LOG_B_HEAD_PTR_T PprLogBHeadPtr; /**< PPR log B head pointer register. */
477 PPR_LOG_B_TAIL_PTR_T PprLogBTailPtr; /**< PPR log B tail pointer register. */
478 /** @} */
479
480 /** @name MMIO: Event Log B Head and Tail pointer registers.
481 * @{ */
482 EVT_LOG_B_HEAD_PTR_T EvtLogBHeadPtr; /**< Event log B head pointer register. */
483 EVT_LOG_B_TAIL_PTR_T EvtLogBTailPtr; /**< Event log B tail pointer register. */
484 /** @} */
485
486 /** @name MMIO: PPR Log Overflow protection registers.
487 * @{ */
488 PPR_LOG_AUTO_RESP_T PprLogAutoResp; /**< PPR Log Auto Response register. */
489 PPR_LOG_OVERFLOW_EARLY_T PprLogOverflowEarly; /**< PPR Log Overflow Early Indicator register. */
490 PPR_LOG_B_OVERFLOW_EARLY_T PprLogBOverflowEarly; /**< PPR Log B Overflow Early Indicator register. */
491 /** @} */
492
493 /** @todo IOMMU: IOMMU Event counter registers. */
494
495#ifdef VBOX_WITH_STATISTICS
496 /** @name IOMMU: Stat counters.
497 * @{ */
498 STAMCOUNTER StatMmioReadR3; /**< Number of MMIO reads in R3. */
499 STAMCOUNTER StatMmioReadRZ; /**< Number of MMIO reads in RZ. */
500 STAMCOUNTER StatMmioWriteR3; /**< Number of MMIO writes in R3. */
501 STAMCOUNTER StatMmioWriteRZ; /**< Number of MMIO writes in RZ. */
502
503 STAMCOUNTER StatMsiRemapR3; /**< Number of MSI remap requests in R3. */
504 STAMCOUNTER StatMsiRemapRZ; /**< Number of MSI remap requests in RZ. */
505
506 STAMCOUNTER StatMemReadR3; /**< Number of memory read translation requests in R3. */
507 STAMCOUNTER StatMemReadRZ; /**< Number of memory read translation requests in RZ. */
508 STAMCOUNTER StatMemWriteR3; /**< Number of memory write translation requests in R3. */
509 STAMCOUNTER StatMemWriteRZ; /**< Number of memory write translation requests in RZ. */
510
511 STAMCOUNTER StatMemBulkReadR3; /**< Number of memory read bulk translation requests in R3. */
512 STAMCOUNTER StatMemBulkReadRZ; /**< Number of memory read bulk translation requests in RZ. */
513 STAMCOUNTER StatMemBulkWriteR3; /**< Number of memory write bulk translation requests in R3. */
514 STAMCOUNTER StatMemBulkWriteRZ; /**< Number of memory write bulk translation requests in RZ. */
515
516 STAMCOUNTER StatCmd; /**< Number of commands processed in total. */
517 STAMCOUNTER StatCmdCompWait; /**< Number of Completion Wait commands processed. */
518 STAMCOUNTER StatCmdInvDte; /**< Number of Invalidate DTE commands processed. */
519 STAMCOUNTER StatCmdInvIommuPages; /**< Number of Invalidate IOMMU pages commands processed. */
520 STAMCOUNTER StatCmdInvIotlbPages; /**< Number of Invalidate IOTLB pages commands processed. */
521 STAMCOUNTER StatCmdInvIntrTable; /**< Number of Invalidate Interrupt Table commands processed. */
522 STAMCOUNTER StatCmdPrefIommuPages; /**< Number of Prefetch IOMMU Pages commands processed. */
523 STAMCOUNTER StatCmdCompletePprReq; /**< Number of Complete PPR Requests commands processed. */
524 STAMCOUNTER StatCmdInvIommuAll; /**< Number of Invalidate IOMMU All commands processed. */
525
526 STAMCOUNTER StatIotlbeCached; /**< Number of IOTLB entries in the cache. */
527 STAMCOUNTER StatIotlbeLazyEvictReuse; /**< Number of IOTLB entries re-used after lazy eviction. */
528
529 STAMPROFILEADV StatProfDteLookup; /**< Profiling of I/O page walk (from memory). */
530 STAMPROFILEADV StatProfIotlbeLookup; /**< Profiling of IOTLB entry lookup (from cache). */
531
532 STAMPROFILEADV StatProfIrteLookup; /**< Profiling of IRTE entry lookup (from memory). */
533 STAMPROFILEADV StatProfIrteCacheLookup; /**< Profiling of IRTE entry lookup (from cache). */
534
535 STAMCOUNTER StatAccessCacheHit; /**< Number of IOTLB cache hits. */
536 STAMCOUNTER StatAccessCacheHitFull; /**< Number of accesses that were fully looked up from the cache. */
537 STAMCOUNTER StatAccessCacheMiss; /**< Number of cache misses (resulting in DTE lookups). */
538 STAMCOUNTER StatAccessCacheNonContig; /**< Number of cache accesses resulting in non-contiguous access. */
539 STAMCOUNTER StatAccessCachePermDenied; /**< Number of cache accesses resulting in insufficient permissions. */
540 STAMCOUNTER StatAccessDteNonContig; /**< Number of DTE accesses resulting in non-contiguous access. */
541 STAMCOUNTER StatAccessDtePermDenied; /**< Number of DTE accesses resulting in insufficient permissions. */
542
543 STAMCOUNTER StatIntrCacheHit; /**< Number of interrupt cache hits. */
544 STAMCOUNTER StatIntrCacheMiss; /**< Number of interrupt cache misses. */
545
546 STAMCOUNTER StatNonStdPageSize; /**< Number of non-standard page size translations. */
547 STAMCOUNTER StatIopfs; /**< Number of I/O page faults. */
548 /** @} */
549#endif
550} IOMMU;
551/** Pointer to the IOMMU device state. */
552typedef IOMMU *PIOMMU;
553/** Pointer to the const IOMMU device state. */
554typedef const IOMMU *PCIOMMU;
555AssertCompileMemberAlignment(IOMMU, hMmio, 8);
556#ifdef IOMMU_WITH_DTE_CACHE
557AssertCompileMemberAlignment(IOMMU, CritSectCache, 8);
558AssertCompileMemberAlignment(IOMMU, aDeviceIds, 8);
559AssertCompileMemberAlignment(IOMMU, aDteCache, 8);
560#endif
561#ifdef IOMMU_WITH_IRTE_CACHE
562AssertCompileMemberAlignment(IOMMU, aIrteCache, 8);
563#endif
564AssertCompileMemberAlignment(IOMMU, IommuBar, 8);
565AssertCompileMemberAlignment(IOMMU, aDevTabBaseAddrs, 8);
566AssertCompileMemberAlignment(IOMMU, CmdBufHeadPtr, 8);
567AssertCompileMemberAlignment(IOMMU, Status, 8);
568
569/**
570 * The ring-3 IOMMU device state.
571 */
572typedef struct IOMMUR3
573{
574 /** Device instance. */
575 PPDMDEVINSR3 pDevInsR3;
576 /** The IOMMU helpers. */
577 R3PTRTYPE(PCPDMIOMMUHLPR3) pIommuHlpR3;
578 /** The command thread handle. */
579 R3PTRTYPE(PPDMTHREAD) pCmdThread;
580#ifdef IOMMU_WITH_IOTLBE_CACHE
581 /** Pointer to array of pre-allocated IOTLBEs. */
582 PIOTLBE paIotlbes;
583 /** Maps [DomainId,Iova] to [IOTLBE]. */
584 AVLU64TREE TreeIotlbe;
585 /** LRU list anchor for IOTLB entries. */
586 RTLISTANCHOR LstLruIotlbe;
587 /** Index of the next unused IOTLB. */
588 uint32_t idxUnusedIotlbe;
589 /** Number of cached IOTLB entries in the tree. */
590 uint32_t cCachedIotlbes;
591#endif
592} IOMMUR3;
593/** Pointer to the ring-3 IOMMU device state. */
594typedef IOMMUR3 *PIOMMUR3;
595/** Pointer to the const ring-3 IOMMU device state. */
596typedef const IOMMUR3 *PCIOMMUR3;
597#ifdef IOMMU_WITH_IOTLBE_CACHE
598AssertCompileMemberAlignment(IOMMUR3, paIotlbes, 8);
599AssertCompileMemberAlignment(IOMMUR3, TreeIotlbe, 8);
600AssertCompileMemberAlignment(IOMMUR3, LstLruIotlbe, 8);
601#endif
602
603/**
604 * The ring-0 IOMMU device state.
605 */
606typedef struct IOMMUR0
607{
608 /** Device instance. */
609 PPDMDEVINSR0 pDevInsR0;
610 /** The IOMMU helpers. */
611 R0PTRTYPE(PCPDMIOMMUHLPR0) pIommuHlpR0;
612} IOMMUR0;
613/** Pointer to the ring-0 IOMMU device state. */
614typedef IOMMUR0 *PIOMMUR0;
615
616/**
617 * The raw-mode IOMMU device state.
618 */
619typedef struct IOMMURC
620{
621 /** Device instance. */
622 PPDMDEVINSRC pDevInsRC;
623 /** The IOMMU helpers. */
624 RCPTRTYPE(PCPDMIOMMUHLPRC) pIommuHlpRC;
625} IOMMURC;
626/** Pointer to the raw-mode IOMMU device state. */
627typedef IOMMURC *PIOMMURC;
628
629/** The IOMMU device state for the current context. */
630typedef CTX_SUFF(IOMMU) IOMMUCC;
631/** Pointer to the IOMMU device state for the current context. */
632typedef CTX_SUFF(PIOMMU) PIOMMUCC;
633
634/**
635 * IOMMU register access.
636 */
637typedef struct IOMMUREGACC
638{
639 const char *pszName;
640 VBOXSTRICTRC (*pfnRead)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value);
641 VBOXSTRICTRC (*pfnWrite)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value);
642} IOMMUREGACC;
643/** Pointer to an IOMMU register access. */
644typedef IOMMUREGACC *PIOMMUREGACC;
645/** Pointer to a const IOMMU register access. */
646typedef IOMMUREGACC const *PCIOMMUREGACC;
647
648#ifdef IOMMU_WITH_IOTLBE_CACHE
649/**
650 * IOTLBE flush argument.
651 */
652typedef struct IOTLBEFLUSHARG
653{
654 /** The ring-3 IOMMU device state. */
655 PIOMMUR3 pIommuR3;
656 /** The domain ID to flush. */
657 uint16_t idDomain;
658} IOTLBEFLUSHARG;
659/** Pointer to an IOTLBE flush argument. */
660typedef IOTLBEFLUSHARG *PIOTLBEFLUSHARG;
661/** Pointer to a const IOTLBE flush argument. */
662typedef IOTLBEFLUSHARG const *PCIOTLBEFLUSHARG;
663
664/**
665 * IOTLBE Info. argument.
666 */
667typedef struct IOTLBEINFOARG
668{
669 /** The ring-3 IOMMU device state. */
670 PIOMMUR3 pIommuR3;
671 /** The info helper. */
672 PCDBGFINFOHLP pHlp;
673 /** The domain ID to dump IOTLB entry. */
674 uint16_t idDomain;
675} IOTLBEINFOARG;
676/** Pointer to an IOTLBE flush argument. */
677typedef IOTLBEINFOARG *PIOTLBEINFOARG;
678/** Pointer to a const IOTLBE flush argument. */
679typedef IOTLBEINFOARG const *PCIOTLBEINFOARG;
680#endif
681
682/**
683 * IOMMU operation auxiliary info.
684 */
685typedef struct IOMMUOPAUX
686{
687 /** The IOMMU operation being performed. */
688 IOMMUOP enmOp;
689 /** The device table entry (can be NULL). */
690 PCDTE_T pDte;
691 /** The device ID (bus, device, function). */
692 uint16_t idDevice;
693 /** The domain ID (when the DTE isn't provided). */
694 uint16_t idDomain;
695} IOMMUOPAUX;
696/** Pointer to an I/O address lookup struct. */
697typedef IOMMUOPAUX *PIOMMUOPAUX;
698/** Pointer to a const I/O address lookup struct. */
699typedef IOMMUOPAUX const *PCIOMMUOPAUX;
700
701typedef DECLCALLBACKTYPE(int, FNIOPAGELOOKUP,(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
702 PIOPAGELOOKUP pPageLookup));
703typedef FNIOPAGELOOKUP *PFNIOPAGELOOKUP;
704
705
706/*********************************************************************************************************************************
707* Global Variables *
708*********************************************************************************************************************************/
709#ifdef IN_RING3
710/**
711 * An array of the number of device table segments supported.
712 * Indexed by u2DevTabSegSup.
713 */
714static uint8_t const g_acDevTabSegs[] = { 0, 2, 4, 8 };
715#endif
716
717#if (defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)) || defined(LOG_ENABLED)
718/**
719 * The IOMMU I/O permission names.
720 */
721static const char * const g_aszPerm[] = { "none", "read", "write", "read+write" };
722#endif
723
724/**
725 * An array of the masks to select the device table segment index from a device ID.
726 */
727static uint16_t const g_auDevTabSegMasks[] = { 0x0, 0x8000, 0xc000, 0xe000 };
728
729/**
730 * An array of the shift values to select the device table segment index from a
731 * device ID.
732 */
733static uint8_t const g_auDevTabSegShifts[] = { 0, 15, 14, 13 };
734
735/**
736 * The maximum size (inclusive) of each device table segment (0 to 7).
737 * Indexed by the device table segment index.
738 */
739static uint16_t const g_auDevTabSegMaxSizes[] = { 0x1ff, 0xff, 0x7f, 0x7f, 0x3f, 0x3f, 0x3f, 0x3f };
740
741
742#ifndef VBOX_DEVICE_STRUCT_TESTCASE
743/**
744 * Gets the maximum number of buffer entries for the given buffer length.
745 *
746 * @returns Number of buffer entries.
747 * @param uEncodedLen The length (power-of-2 encoded).
748 */
749DECLINLINE(uint32_t) iommuAmdGetBufMaxEntries(uint8_t uEncodedLen)
750{
751 Assert(uEncodedLen > 7);
752 Assert(uEncodedLen < 16);
753 return 2 << (uEncodedLen - 1);
754}
755
756
757/**
758 * Gets the total length of the buffer given a base register's encoded length.
759 *
760 * @returns The length of the buffer in bytes.
761 * @param uEncodedLen The length (power-of-2 encoded).
762 */
763DECLINLINE(uint32_t) iommuAmdGetTotalBufLength(uint8_t uEncodedLen)
764{
765 Assert(uEncodedLen > 7);
766 Assert(uEncodedLen < 16);
767 return (2 << (uEncodedLen - 1)) << 4;
768}
769
770
771/**
772 * Gets the number of (unconsumed) entries in the event log.
773 *
774 * @returns The number of entries in the event log.
775 * @param pThis The shared IOMMU device state.
776 */
777static uint32_t iommuAmdGetEvtLogEntryCount(PIOMMU pThis)
778{
779 uint32_t const idxTail = pThis->EvtLogTailPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
780 uint32_t const idxHead = pThis->EvtLogHeadPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT;
781 if (idxTail >= idxHead)
782 return idxTail - idxHead;
783
784 uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
785 return cMaxEvts - idxHead + idxTail;
786}
787
788
789#if (defined(IN_RING3) && defined(IOMMU_WITH_IOTLBE_CACHE)) || defined(LOG_ENABLED)
790/**
791 * Gets the descriptive I/O permission name for a memory access.
792 *
793 * @returns The I/O permission name.
794 * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX.
795 */
796static const char *iommuAmdMemAccessGetPermName(uint8_t fPerm)
797{
798 /* We shouldn't construct an access with "none" or "read+write" (must be read or write) permissions. */
799 Assert(fPerm > 0 && fPerm < RT_ELEMENTS(g_aszPerm));
800 return g_aszPerm[fPerm & IOMMU_IO_PERM_MASK];
801}
802#endif
803
804
805#ifdef IOMMU_WITH_DTE_CACHE
806/**
807 * Gets the basic I/O device flags for the given device table entry.
808 *
809 * @returns The basic I/O device flags.
810 * @param pDte The device table entry.
811 */
812static uint16_t iommuAmdGetBasicDevFlags(PCDTE_T pDte)
813{
814 /* Extract basic flags from bits 127:0 of the DTE. */
815 uint16_t fFlags = 0;
816 if (pDte->n.u1Valid)
817 {
818 fFlags |= IOMMU_DTE_CACHE_F_VALID;
819
820 /** @todo Skip the if checks here (shift/mask the relevant bits over). */
821 if (pDte->n.u1SuppressAllPfEvents)
822 fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF;
823 if (pDte->n.u1SuppressPfEvents)
824 fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_IOPF;
825
826 uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
827 AssertCompile(IOMMU_DTE_CACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK);
828 fFlags |= fDtePerm << IOMMU_DTE_CACHE_F_IO_PERM_SHIFT;
829 }
830
831 /* Extract basic flags from bits 255:128 of the DTE. */
832 if (pDte->n.u1IntrMapValid)
833 {
834 fFlags |= IOMMU_DTE_CACHE_F_INTR_MAP_VALID;
835
836 /** @todo Skip the if check here (shift/mask the relevant bit over). */
837 if (pDte->n.u1IgnoreUnmappedIntrs)
838 fFlags |= IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR;
839
840 uint16_t const fIntrCtrl = IOMMU_DTE_GET_INTR_CTRL(pDte);
841 AssertCompile(IOMMU_DTE_CACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK);
842 fFlags |= fIntrCtrl << IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT;
843 }
844 return fFlags;
845}
846#endif
847
848
849/**
850 * Remaps the source MSI to the destination MSI given the IRTE.
851 *
852 * @param pMsiIn The source MSI.
853 * @param pMsiOut Where to store the remapped MSI.
854 * @param pIrte The IRTE used for the remapping.
855 */
856static void iommuAmdIrteRemapMsi(PCMSIMSG pMsiIn, PMSIMSG pMsiOut, PCIRTE_T pIrte)
857{
858 /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */
859 *pMsiOut = *pMsiIn;
860
861 pMsiOut->Addr.n.u1DestMode = pIrte->n.u1DestMode;
862 pMsiOut->Addr.n.u8DestId = pIrte->n.u8Dest;
863
864 pMsiOut->Data.n.u8Vector = pIrte->n.u8Vector;
865 pMsiOut->Data.n.u3DeliveryMode = pIrte->n.u3IntrType;
866}
867
868
869#ifdef IOMMU_WITH_DTE_CACHE
870/**
871 * Looks up an entry in the DTE cache for the given device ID.
872 *
873 * @returns The index of the entry, or the cache capacity if no entry was found.
874 * @param pThis The shared IOMMU device state.
875 * @param idDevice The device ID (bus, device, function).
876 */
877DECLINLINE(uint16_t) iommuAmdDteCacheEntryLookup(PIOMMU pThis, uint16_t idDevice)
878{
879 uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds);
880 for (uint16_t i = 0; i < cDeviceIds; i++)
881 {
882 if (pThis->aDeviceIds[i] == idDevice)
883 return i;
884 }
885 return cDeviceIds;
886}
887
888
889/**
890 * Gets an free/unused DTE cache entry.
891 *
892 * @returns The index of an unused entry, or cache capacity if the cache is full.
893 * @param pThis The shared IOMMU device state.
894 */
895DECLINLINE(uint16_t) iommuAmdDteCacheEntryGetUnused(PCIOMMU pThis)
896{
897 /*
898 * ASSUMES device ID 0 is the PCI host bridge or the IOMMU itself
899 * (the latter being an ugly hack) and cannot be a valid device ID.
900 */
901 uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds);
902 for (uint16_t i = 0; i < cDeviceIds; i++)
903 {
904 if (!pThis->aDeviceIds[i])
905 return i;
906 }
907 return cDeviceIds;
908}
909
910
911/**
912 * Adds a DTE cache entry at the given index.
913 *
914 * @param pThis The shared IOMMU device state.
915 * @param idxDte The index of the DTE cache entry.
916 * @param idDevice The device ID (bus, device, function).
917 * @param fFlags Device flags to set, see IOMMU_DTE_CACHE_F_XXX.
918 * @param idDomain The domain ID.
919 *
920 * @remarks Requires the cache lock to be taken.
921 */
922DECL_FORCE_INLINE(void) iommuAmdDteCacheAddAtIndex(PIOMMU pThis, uint16_t idxDte, uint16_t idDevice, uint16_t fFlags,
923 uint16_t idDomain)
924{
925 pThis->aDeviceIds[idxDte] = idDevice;
926 pThis->aDteCache[idxDte].fFlags = fFlags;
927 pThis->aDteCache[idxDte].idDomain = idDomain;
928}
929
930
931/**
932 * Adds a DTE cache entry.
933 *
934 * @param pDevIns The IOMMU instance data.
935 * @param idDevice The device ID (bus, device, function).
936 * @param pDte The device table entry.
937 */
938static void iommuAmdDteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte)
939{
940 uint16_t const fFlags = iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT;
941 uint16_t const idDomain = pDte->n.u16DomainId;
942
943 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
944 IOMMU_CACHE_LOCK(pDevIns, pThis);
945
946 uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
947 uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
948 if ( idxDte >= cDteCache /* Not found. */
949 && (idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache) /* Get new/unused slot index. */
950 iommuAmdDteCacheAddAtIndex(pThis, idxDte, idDevice, fFlags, idDomain);
951
952 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
953}
954
955
956/**
957 * Updates flags for an existing DTE cache entry given its index.
958 *
959 * @param pThis The shared IOMMU device state.
960 * @param idxDte The index of the DTE cache entry.
961 * @param fOrMask Device flags to add to the existing flags, see
962 * IOMMU_DTE_CACHE_F_XXX.
963 * @param fAndMask Device flags to remove from the existing flags, see
964 * IOMMU_DTE_CACHE_F_XXX.
965 *
966 * @remarks Requires the cache lock to be taken.
967 */
968DECL_FORCE_INLINE(void) iommuAmdDteCacheUpdateFlagsForIndex(PIOMMU pThis, uint16_t idxDte, uint16_t fOrMask, uint16_t fAndMask)
969{
970 uint16_t const fOldFlags = pThis->aDteCache[idxDte].fFlags;
971 uint16_t const fNewFlags = (fOldFlags | fOrMask) & ~fAndMask;
972 Assert(fOldFlags & IOMMU_DTE_CACHE_F_PRESENT);
973 pThis->aDteCache[idxDte].fFlags = fNewFlags;
974}
975
976
977#ifdef IOMMU_WITH_IOTLBE_CACHE
978/**
979 * Adds a new DTE cache entry or updates flags for an existing DTE cache entry.
980 * If the cache is full, nothing happens.
981 *
982 * @param pDevIns The IOMMU instance data.
983 * @param pDte The device table entry.
984 * @param idDevice The device ID (bus, device, function).
985 * @param fOrMask Device flags to add to the existing flags, see
986 * IOMMU_DTE_CACHE_F_XXX.
987 * @param fAndMask Device flags to remove from the existing flags, see
988 * IOMMU_DTE_CACHE_F_XXX.
989 */
990static void iommuAmdDteCacheAddOrUpdateFlags(PPDMDEVINS pDevIns, PCDTE_T pDte, uint16_t idDevice, uint16_t fOrMask,
991 uint16_t fAndMask)
992{
993 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
994 IOMMU_CACHE_LOCK(pDevIns, pThis);
995
996 uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
997 uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
998 if (idxDte < cDteCache)
999 iommuAmdDteCacheUpdateFlagsForIndex(pThis, idxDte, fOrMask, fAndMask);
1000 else if ((idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache)
1001 {
1002 uint16_t const fFlags = (iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT | fOrMask) & ~fAndMask;
1003 iommuAmdDteCacheAddAtIndex(pThis, idxDte, idDevice, fFlags, pDte->n.u16DomainId);
1004 }
1005 /* else: cache is full, shouldn't really happen. */
1006
1007 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1008}
1009#endif
1010
1011
1012/**
1013 * Updates flags for an existing DTE cache entry.
1014 *
1015 * @param pDevIns The IOMMU instance data.
1016 * @param idDevice The device ID (bus, device, function).
1017 * @param fOrMask Device flags to add to the existing flags, see
1018 * IOMMU_DTE_CACHE_F_XXX.
1019 * @param fAndMask Device flags to remove from the existing flags, see
1020 * IOMMU_DTE_CACHE_F_XXX.
1021 */
1022static void iommuAmdDteCacheUpdateFlags(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t fOrMask, uint16_t fAndMask)
1023{
1024 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1025 IOMMU_CACHE_LOCK(pDevIns, pThis);
1026
1027 uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
1028 uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
1029 if (idxDte < cDteCache)
1030 iommuAmdDteCacheUpdateFlagsForIndex(pThis, idxDte, fOrMask, fAndMask);
1031
1032 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1033}
1034
1035
1036# ifdef IN_RING3
1037/**
1038 * Removes a DTE cache entry.
1039 *
1040 * @param pDevIns The IOMMU instance data.
1041 * @param idDevice The device ID to remove cache entries for.
1042 */
1043static void iommuAmdDteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice)
1044{
1045 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1046 IOMMU_CACHE_LOCK(pDevIns, pThis);
1047
1048 uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache);
1049 uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice);
1050 if (idxDte < cDteCache)
1051 {
1052 pThis->aDteCache[idxDte].fFlags = 0;
1053 pThis->aDteCache[idxDte].idDomain = 0;
1054 }
1055
1056 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1057}
1058
1059
1060/**
1061 * Removes all entries in the device table entry cache.
1062 *
1063 * @param pDevIns The IOMMU instance data.
1064 */
1065static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns)
1066{
1067 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1068 IOMMU_CACHE_LOCK(pDevIns, pThis);
1069 RT_ZERO(pThis->aDeviceIds);
1070 RT_ZERO(pThis->aDteCache);
1071 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1072}
1073# endif /* IN_RING3 */
1074#endif /* IOMMU_WITH_DTE_CACHE */
1075
1076
1077#ifdef IOMMU_WITH_IOTLBE_CACHE
1078/**
1079 * Moves the IOTLB entry to the least recently used slot.
1080 *
1081 * @param pThisR3 The ring-3 IOMMU device state.
1082 * @param pIotlbe The IOTLB entry to move.
1083 */
1084DECLINLINE(void) iommuAmdIotlbEntryMoveToLru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe)
1085{
1086 if (!RTListNodeIsFirst(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru))
1087 {
1088 RTListNodeRemove(&pIotlbe->NdLru);
1089 RTListPrepend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
1090 }
1091}
1092
1093
1094/**
1095 * Moves the IOTLB entry to the most recently used slot.
1096 *
1097 * @param pThisR3 The ring-3 IOMMU device state.
1098 * @param pIotlbe The IOTLB entry to move.
1099 */
1100DECLINLINE(void) iommuAmdIotlbEntryMoveToMru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe)
1101{
1102 if (!RTListNodeIsLast(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru))
1103 {
1104 RTListNodeRemove(&pIotlbe->NdLru);
1105 RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
1106 }
1107}
1108
1109
1110# ifdef IN_RING3
1111/**
1112 * Dumps the IOTLB entry via the debug info helper.
1113 *
1114 * @returns VINF_SUCCESS.
1115 * @param pNode Pointer to an IOTLB entry to dump info.
1116 * @param pvUser Pointer to an IOTLBEINFOARG.
1117 */
1118static DECLCALLBACK(int) iommuAmdR3IotlbEntryInfo(PAVLU64NODECORE pNode, void *pvUser)
1119{
1120 /* Validate. */
1121 PCIOTLBEINFOARG pArgs = (PCIOTLBEINFOARG)pvUser;
1122 AssertPtr(pArgs);
1123 AssertPtr(pArgs->pIommuR3);
1124 AssertPtr(pArgs->pHlp);
1125 //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC);
1126
1127 uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
1128 if (idDomain == pArgs->idDomain)
1129 {
1130 PCIOTLBE pIotlbe = (PCIOTLBE)pNode;
1131 AVLU64KEY const uKey = pIotlbe->Core.Key;
1132 uint64_t const uIova = IOMMU_IOTLB_KEY_GET_IOVA(uKey);
1133 RTGCPHYS const GCPhysSpa = pIotlbe->PageLookup.GCPhysSpa;
1134 uint8_t const cShift = pIotlbe->PageLookup.cShift;
1135 size_t const cbPage = RT_BIT_64(cShift);
1136 uint8_t const fPerm = pIotlbe->PageLookup.fPerm;
1137 const char *pszPerm = iommuAmdMemAccessGetPermName(fPerm);
1138 bool const fEvictPending = pIotlbe->fEvictPending;
1139
1140 PCDBGFINFOHLP pHlp = pArgs->pHlp;
1141 pHlp->pfnPrintf(pHlp, " Key = %#RX64 (%#RX64)\n", uKey, uIova);
1142 pHlp->pfnPrintf(pHlp, " GCPhys = %#RGp\n", GCPhysSpa);
1143 pHlp->pfnPrintf(pHlp, " cShift = %u (%zu bytes)\n", cShift, cbPage);
1144 pHlp->pfnPrintf(pHlp, " fPerm = %#x (%s)\n", fPerm, pszPerm);
1145 pHlp->pfnPrintf(pHlp, " fEvictPending = %RTbool\n", fEvictPending);
1146 }
1147
1148 return VINF_SUCCESS;
1149}
1150# endif /* IN_RING3 */
1151
1152
1153/**
1154 * Removes the IOTLB entry if it's associated with the specified domain ID.
1155 *
1156 * @returns VINF_SUCCESS.
1157 * @param pNode Pointer to an IOTLBE.
1158 * @param pvUser Pointer to an IOTLBEFLUSHARG containing the domain ID.
1159 */
1160static DECLCALLBACK(int) iommuAmdIotlbEntryRemoveDomainId(PAVLU64NODECORE pNode, void *pvUser)
1161{
1162 /* Validate. */
1163 PCIOTLBEFLUSHARG pArgs = (PCIOTLBEFLUSHARG)pvUser;
1164 AssertPtr(pArgs);
1165 AssertPtr(pArgs->pIommuR3);
1166 //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC);
1167
1168 uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key);
1169 if (idDomain == pArgs->idDomain)
1170 {
1171 /* Mark this entry is as invalidated and needs to be evicted later. */
1172 PIOTLBE pIotlbe = (PIOTLBE)pNode;
1173 pIotlbe->fEvictPending = true;
1174 iommuAmdIotlbEntryMoveToLru(pArgs->pIommuR3, (PIOTLBE)pNode);
1175 }
1176 return VINF_SUCCESS;
1177}
1178
1179
1180/**
1181 * Destroys an IOTLB entry that's in the tree.
1182 *
1183 * @returns VINF_SUCCESS.
1184 * @param pNode Pointer to an IOTLBE.
1185 * @param pvUser Opaque data. Currently not used, will be NULL.
1186 */
1187static DECLCALLBACK(int) iommuAmdIotlbEntryDestroy(PAVLU64NODECORE pNode, void *pvUser)
1188{
1189 RT_NOREF(pvUser);
1190 PIOTLBE pIotlbe = (PIOTLBE)pNode;
1191 Assert(pIotlbe);
1192 pIotlbe->NdLru.pNext = NULL;
1193 pIotlbe->NdLru.pPrev = NULL;
1194 RT_ZERO(pIotlbe->PageLookup);
1195 pIotlbe->fEvictPending = false;
1196 return VINF_SUCCESS;
1197}
1198
1199
1200/**
1201 * Inserts an IOTLB entry into the cache.
1202 *
1203 * @param pThis The shared IOMMU device state.
1204 * @param pThisR3 The ring-3 IOMMU device state.
1205 * @param pIotlbe The IOTLB entry to initialize and insert.
1206 * @param idDomain The domain ID.
1207 * @param uIova The I/O virtual address.
1208 * @param pPageLookup The I/O page lookup result of the access.
1209 */
1210static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOMMUR3 pThisR3, PIOTLBE pIotlbe, uint16_t idDomain, uint64_t uIova,
1211 PCIOPAGELOOKUP pPageLookup)
1212{
1213 /* Initialize the IOTLB entry with results of the I/O page walk. */
1214 AVLU64KEY const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova);
1215 Assert(uKey != IOMMU_IOTLB_KEY_NIL);
1216
1217 /* Check if the entry already exists. */
1218 PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey);
1219 if (!pFound)
1220 {
1221 /* Insert the entry into the cache. */
1222 pIotlbe->Core.Key = uKey;
1223 pIotlbe->PageLookup = *pPageLookup;
1224 Assert(!pIotlbe->fEvictPending);
1225
1226 bool const fInserted = RTAvlU64Insert(&pThisR3->TreeIotlbe, &pIotlbe->Core);
1227 Assert(fInserted); NOREF(fInserted);
1228 Assert(pThisR3->cCachedIotlbes < IOMMU_IOTLBE_MAX);
1229 ++pThisR3->cCachedIotlbes;
1230 STAM_COUNTER_INC(&pThis->StatIotlbeCached); NOREF(pThis);
1231 }
1232 else
1233 {
1234 /* Update the existing entry. */
1235 Assert(pFound->Core.Key == uKey);
1236 if (pFound->fEvictPending)
1237 {
1238 pFound->fEvictPending = false;
1239 STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse); NOREF(pThis);
1240 }
1241 pFound->PageLookup = *pPageLookup;
1242 }
1243}
1244
1245
1246/**
1247 * Removes an IOTLB entry from the cache for the given key.
1248 *
1249 * @returns Pointer to the removed IOTLB entry, NULL if the entry wasn't found in
1250 * the tree.
1251 * @param pThis The shared IOMMU device state.
1252 * @param pThisR3 The ring-3 IOMMU device state.
1253 * @param uKey The key of the IOTLB entry to remove.
1254 */
1255static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, PIOMMUR3 pThisR3, AVLU64KEY uKey)
1256{
1257 PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThisR3->TreeIotlbe, uKey);
1258 if (pIotlbe)
1259 {
1260 if (pIotlbe->fEvictPending)
1261 STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse);
1262
1263 RT_ZERO(pIotlbe->Core);
1264 RT_ZERO(pIotlbe->PageLookup);
1265 /* We must not erase the LRU node connections here! */
1266 pIotlbe->fEvictPending = false;
1267 Assert(pIotlbe->Core.Key == IOMMU_IOTLB_KEY_NIL);
1268
1269 Assert(pThisR3->cCachedIotlbes > 0);
1270 --pThisR3->cCachedIotlbes;
1271 STAM_COUNTER_DEC(&pThis->StatIotlbeCached); NOREF(pThis);
1272 }
1273 return pIotlbe;
1274}
1275
1276
1277/**
1278 * Looks up an IOTLB from the cache.
1279 *
1280 * @returns Pointer to IOTLB entry if found, NULL otherwise.
1281 * @param pThis The shared IOMMU device state.
1282 * @param pThisR3 The ring-3 IOMMU device state.
1283 * @param idDomain The domain ID.
1284 * @param uIova The I/O virtual address.
1285 */
1286static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, PIOMMUR3 pThisR3, uint64_t idDomain, uint64_t uIova)
1287{
1288 RT_NOREF(pThis);
1289
1290 uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova);
1291 PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey);
1292 if ( pIotlbe
1293 && !pIotlbe->fEvictPending)
1294 return pIotlbe;
1295
1296 /*
1297 * Domain Id wildcard invalidations only marks entries for eviction later but doesn't remove
1298 * them from the cache immediately. We found an entry pending eviction, just return that
1299 * nothing was found (rather than evicting now).
1300 */
1301 return NULL;
1302}
1303
1304
1305/**
1306 * Adds an IOTLB entry to the cache.
1307 *
1308 * @param pThis The shared IOMMU device state.
1309 * @param pThisR3 The ring-3 IOMMU device state.
1310 * @param idDomain The domain ID.
1311 * @param uIovaPage The I/O virtual address (must be 4K aligned).
1312 * @param pPageLookup The I/O page lookup result of the access.
1313 */
1314static void iommuAmdIotlbAdd(PIOMMU pThis, PIOMMUR3 pThisR3, uint16_t idDomain, uint64_t uIovaPage, PCIOPAGELOOKUP pPageLookup)
1315{
1316 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
1317 Assert(pPageLookup);
1318 Assert(pPageLookup->cShift <= 51);
1319 Assert(pPageLookup->fPerm != IOMMU_IO_PERM_NONE);
1320
1321 /*
1322 * If there are no unused IOTLB entries, evict the LRU entry.
1323 * Otherwise, get a new IOTLB entry from the pre-allocated list.
1324 */
1325 if (pThisR3->idxUnusedIotlbe == IOMMU_IOTLBE_MAX)
1326 {
1327 /* Grab the least recently used entry. */
1328 PIOTLBE pIotlbe = RTListGetFirst(&pThisR3->LstLruIotlbe, IOTLBE, NdLru);
1329 Assert(pIotlbe);
1330
1331 /* If the entry is in the cache, remove it. */
1332 if (pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL)
1333 iommuAmdIotlbEntryRemove(pThis, pThisR3, pIotlbe->Core.Key);
1334
1335 /* Initialize and insert the IOTLB entry into the cache. */
1336 iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIovaPage, pPageLookup);
1337
1338 /* Move the entry to the most recently used slot. */
1339 iommuAmdIotlbEntryMoveToMru(pThisR3, pIotlbe);
1340 }
1341 else
1342 {
1343 /* Grab an unused IOTLB entry from the pre-allocated list. */
1344 PIOTLBE pIotlbe = &pThisR3->paIotlbes[pThisR3->idxUnusedIotlbe];
1345 ++pThisR3->idxUnusedIotlbe;
1346
1347 /* Initialize and insert the IOTLB entry into the cache. */
1348 iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIovaPage, pPageLookup);
1349
1350 /* Add the entry to the most recently used slot. */
1351 RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru);
1352 }
1353}
1354
1355
1356/**
1357 * Removes all IOTLB entries from the cache.
1358 *
1359 * @param pDevIns The IOMMU instance data.
1360 */
1361static void iommuAmdIotlbRemoveAll(PPDMDEVINS pDevIns)
1362{
1363 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1364 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
1365 IOMMU_CACHE_LOCK(pDevIns, pThis);
1366
1367 if (pThisR3->cCachedIotlbes > 0)
1368 {
1369 RTAvlU64Destroy(&pThisR3->TreeIotlbe, iommuAmdIotlbEntryDestroy, NULL /* pvParam */);
1370 RTListInit(&pThisR3->LstLruIotlbe);
1371 pThisR3->idxUnusedIotlbe = 0;
1372 pThisR3->cCachedIotlbes = 0;
1373 STAM_COUNTER_RESET(&pThis->StatIotlbeCached);
1374 }
1375
1376 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1377}
1378
1379
1380/**
1381 * Removes IOTLB entries for the range of I/O virtual addresses and the specified
1382 * domain ID from the cache.
1383 *
1384 * @param pDevIns The IOMMU instance data.
1385 * @param idDomain The domain ID.
1386 * @param uIova The I/O virtual address to invalidate.
1387 * @param cbInvalidate The size of the invalidation (must be 4K aligned).
1388 */
1389static void iommuAmdIotlbRemoveRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIova, size_t cbInvalidate)
1390{
1391 /* Validate. */
1392 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
1393 Assert(!(cbInvalidate & X86_PAGE_4K_OFFSET_MASK));
1394 Assert(cbInvalidate >= X86_PAGE_4K_SIZE);
1395
1396 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1397 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
1398 IOMMU_CACHE_LOCK(pDevIns, pThis);
1399
1400 do
1401 {
1402 uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova);
1403 PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, pThisR3, uKey);
1404 if (pIotlbe)
1405 iommuAmdIotlbEntryMoveToLru(pThisR3, pIotlbe);
1406 uIova += X86_PAGE_4K_SIZE;
1407 cbInvalidate -= X86_PAGE_4K_SIZE;
1408 } while (cbInvalidate > 0);
1409
1410 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1411}
1412
1413
1414/**
1415 * Removes all IOTLB entries for the specified domain ID.
1416 *
1417 * @param pDevIns The IOMMU instance data.
1418 * @param idDomain The domain ID.
1419 */
1420static void iommuAmdIotlbRemoveDomainId(PPDMDEVINS pDevIns, uint16_t idDomain)
1421{
1422 /*
1423 * We need to iterate the tree and search based on the domain ID.
1424 * But it seems we cannot remove items while iterating the tree.
1425 * Thus, we simply mark entries for eviction later but move them to the LRU
1426 * so they will eventually get evicted and re-cycled as the cache gets re-populated.
1427 */
1428 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1429 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
1430 IOMMU_CACHE_LOCK(pDevIns, pThis);
1431
1432 IOTLBEFLUSHARG Args;
1433 Args.pIommuR3 = pThisR3;
1434 Args.idDomain = idDomain;
1435 RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args);
1436
1437 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1438}
1439
1440
1441/**
1442 * Adds or updates IOTLB entries for the given range of I/O virtual addresses.
1443 *
1444 * @param pDevIns The IOMMU instance data.
1445 * @param idDomain The domain ID.
1446 * @param uIovaPage The I/O virtual address (must be 4K aligned).
1447 * @param cbContiguous The size of the access.
1448 * @param pAddrOut The translated I/O address lookup.
1449 *
1450 * @remarks All pages in the range specified by @c cbContiguous must have identical
1451 * permissions and page sizes.
1452 */
1453static void iommuAmdIotlbAddRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIovaPage, size_t cbContiguous,
1454 PCIOPAGELOOKUP pAddrOut)
1455{
1456 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
1457
1458 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1459 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
1460
1461 IOPAGELOOKUP PageLookup;
1462 PageLookup.GCPhysSpa = pAddrOut->GCPhysSpa & X86_PAGE_4K_BASE_MASK;
1463 PageLookup.cShift = pAddrOut->cShift;
1464 PageLookup.fPerm = pAddrOut->fPerm;
1465
1466 size_t const cbIova = RT_ALIGN_Z(cbContiguous, X86_PAGE_4K_SIZE);
1467 Assert(!(cbIova & X86_PAGE_4K_OFFSET_MASK));
1468 Assert(cbIova >= X86_PAGE_4K_SIZE);
1469
1470 size_t cPages = cbIova / X86_PAGE_4K_SIZE;
1471 cPages = RT_MIN(cPages, IOMMU_IOTLBE_MAX);
1472
1473 IOMMU_CACHE_LOCK(pDevIns, pThis);
1474 /** @todo Re-check DTE cache? */
1475 /*
1476 * Add IOTLB entries for every page in the access.
1477 * The page size and permissions are assumed to be identical to every
1478 * page in this access.
1479 */
1480 while (cPages > 0)
1481 {
1482 iommuAmdIotlbAdd(pThis, pThisR3, idDomain, uIovaPage, &PageLookup);
1483 uIovaPage += X86_PAGE_4K_SIZE;
1484 PageLookup.GCPhysSpa += X86_PAGE_4K_SIZE;
1485 --cPages;
1486 }
1487 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1488}
1489#endif /* IOMMU_WITH_IOTLBE_CACHE */
1490
1491
1492#ifdef IOMMU_WITH_IRTE_CACHE
1493/**
1494 * Looks up an IRTE cache entry.
1495 *
1496 * @returns Index of the found entry, or cache capacity if not found.
1497 * @param pThis The shared IOMMU device state.
1498 * @param idDevice The device ID (bus, device, function).
1499 * @param offIrte The offset into the interrupt remap table.
1500 */
1501static uint16_t iommuAmdIrteCacheEntryLookup(PCIOMMU pThis, uint16_t idDevice, uint16_t offIrte)
1502{
1503 /** @todo Consider sorting and binary search when the cache capacity grows.
1504 * For the IRTE cache this should be okay since typically guests do not alter the
1505 * interrupt remapping once programmed, so hopefully sorting shouldn't happen
1506 * often. */
1507 uint32_t const uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte);
1508 uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
1509 for (uint16_t i = 0; i < cIrteCache; i++)
1510 if (pThis->aIrteCache[i].uKey == uKey)
1511 return i;
1512 return cIrteCache;
1513}
1514
1515
1516/**
1517 * Gets a free/unused IRTE cache entry.
1518 *
1519 * @returns The index of an unused entry, or cache capacity if the cache is full.
1520 * @param pThis The shared IOMMU device state.
1521 */
1522static uint16_t iommuAmdIrteCacheEntryGetUnused(PCIOMMU pThis)
1523{
1524 uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
1525 for (uint16_t i = 0; i < cIrteCache; i++)
1526 if (pThis->aIrteCache[i].uKey == IOMMU_IRTE_CACHE_KEY_NIL)
1527 {
1528 Assert(!pThis->aIrteCache[i].Irte.u32);
1529 return i;
1530 }
1531 return cIrteCache;
1532}
1533
1534
1535/**
1536 * Looks up the IRTE cache for the given MSI.
1537 *
1538 * @returns VBox status code.
1539 * @param pDevIns The IOMMU instance data.
1540 * @param idDevice The device ID (bus, device, function).
1541 * @param enmOp The IOMMU operation being performed.
1542 * @param pMsiIn The source MSI.
1543 * @param pMsiOut Where to store the remapped MSI.
1544 */
1545static int iommuAmdIrteCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
1546{
1547 RT_NOREF(enmOp); /* May need it if we have to report errors (currently we fallback to the slower path to do that). */
1548
1549 int rc = VERR_NOT_FOUND;
1550 /* Deal with such cases in the slower/fallback path. */
1551 if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
1552 { /* likely */ }
1553 else
1554 return rc;
1555
1556 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1557 IOMMU_CACHE_LOCK(pDevIns, pThis);
1558
1559 uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice);
1560 if (idxDteCache < RT_ELEMENTS(pThis->aDteCache))
1561 {
1562 PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache];
1563 if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID))
1564 == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID))
1565 {
1566 Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE); /* Paranoia. */
1567
1568 /* Currently, we only cache remapping of fixed and arbitrated interrupts. */
1569 uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
1570 if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
1571 {
1572 uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT)
1573 & IOMMU_DTE_CACHE_F_INTR_CTRL_MASK;
1574 if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
1575 {
1576 /* Interrupt table length has been verified prior to adding entries to the cache. */
1577 uint16_t const offIrte = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32);
1578 uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte);
1579 if (idxIrteCache < RT_ELEMENTS(pThis->aIrteCache))
1580 {
1581 PCIRTE_T pIrte = &pThis->aIrteCache[idxIrteCache].Irte;
1582 Assert(pIrte->n.u1RemapEnable);
1583 Assert(pIrte->n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO);
1584 iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte);
1585 rc = VINF_SUCCESS;
1586 }
1587 }
1588 else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
1589 {
1590 *pMsiOut = *pMsiIn;
1591 rc = VINF_SUCCESS;
1592 }
1593 }
1594 }
1595 else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT)
1596 {
1597 *pMsiOut = *pMsiIn;
1598 rc = VINF_SUCCESS;
1599 }
1600 }
1601
1602 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1603 return rc;
1604}
1605
1606
1607/**
1608 * Adds or updates the IRTE cache for the given IRTE.
1609 *
1610 * @returns VBox status code.
1611 * @retval VERR_OUT_OF_RESOURCES if the cache is full.
1612 *
1613 * @param pDevIns The IOMMU instance data.
1614 * @param idDevice The device ID (bus, device, function).
1615 * @param offIrte The offset into the interrupt remap table.
1616 * @param pIrte The IRTE to cache.
1617 */
1618static int iommuAmdIrteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t offIrte, PCIRTE_T pIrte)
1619{
1620 Assert(offIrte != 0xffff); /* Shouldn't be a valid IRTE table offset since sizeof(IRTE) is a multiple of 4. */
1621
1622 int rc = VINF_SUCCESS;
1623 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1624 Assert(idDevice != pThis->uPciAddress);
1625 IOMMU_CACHE_LOCK(pDevIns, pThis);
1626
1627 /* Find an existing entry or get an unused slot. */
1628 uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
1629 uint16_t idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte);
1630 if ( idxIrteCache < cIrteCache
1631 || (idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis)) < cIrteCache)
1632 {
1633 pThis->aIrteCache[idxIrteCache].uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte);
1634 pThis->aIrteCache[idxIrteCache].Irte = *pIrte;
1635 }
1636 else
1637 rc = VERR_OUT_OF_RESOURCES;
1638
1639 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1640 return rc;
1641}
1642
1643
1644# ifdef IN_RING3
1645/**
1646 * Removes IRTE cache entries for the given device ID.
1647 *
1648 * @param pDevIns The IOMMU instance data.
1649 * @param idDevice The device ID (bus, device, function).
1650 */
1651static void iommuAmdIrteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice)
1652{
1653 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1654 IOMMU_CACHE_LOCK(pDevIns, pThis);
1655 uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
1656 for (uint16_t i = 0; i < cIrteCache; i++)
1657 {
1658 PIRTECACHE pIrteCache = &pThis->aIrteCache[i];
1659 if (idDevice == IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(pIrteCache->uKey))
1660 {
1661 pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL;
1662 pIrteCache->Irte.u32 = 0;
1663 /* There could multiple IRTE entries for a device ID, continue searching. */
1664 }
1665 }
1666 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1667}
1668
1669
1670/**
1671 * Removes all IRTE cache entries.
1672 *
1673 * @param pDevIns The IOMMU instance data.
1674 */
1675static void iommuAmdIrteCacheRemoveAll(PPDMDEVINS pDevIns)
1676{
1677 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1678 IOMMU_CACHE_LOCK(pDevIns, pThis);
1679 uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache);
1680 for (uint16_t i = 0; i < cIrteCache; i++)
1681 {
1682 pThis->aIrteCache[i].uKey = IOMMU_IRTE_CACHE_KEY_NIL;
1683 pThis->aIrteCache[i].Irte.u32 = 0;
1684 }
1685 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
1686}
1687# endif /* IN_RING3 */
1688#endif /* IOMMU_WITH_IRTE_CACHE */
1689
1690
1691/**
1692 * Atomically reads the control register without locking the IOMMU device.
1693 *
1694 * @returns The control register.
1695 * @param pThis The shared IOMMU device state.
1696 */
1697DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis)
1698{
1699 IOMMU_CTRL_T Ctrl;
1700 Ctrl.u64 = ASMAtomicReadU64((volatile uint64_t *)&pThis->Ctrl.u64);
1701 return Ctrl;
1702}
1703
1704
1705/**
1706 * Returns whether MSI is enabled for the IOMMU.
1707 *
1708 * @returns Whether MSI is enabled.
1709 * @param pDevIns The IOMMU device instance.
1710 *
1711 * @note There should be a PCIDevXxx function for this.
1712 */
1713static bool iommuAmdIsMsiEnabled(PPDMDEVINS pDevIns)
1714{
1715 MSI_CAP_HDR_T MsiCapHdr;
1716 MsiCapHdr.u32 = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_MSI_CAP_HDR);
1717 return MsiCapHdr.n.u1MsiEnable;
1718}
1719
1720
1721/**
1722 * Signals a PCI target abort.
1723 *
1724 * @param pDevIns The IOMMU device instance.
1725 */
1726static void iommuAmdSetPciTargetAbort(PPDMDEVINS pDevIns)
1727{
1728 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1729 uint16_t const u16Status = PDMPciDevGetStatus(pPciDev) | VBOX_PCI_STATUS_SIG_TARGET_ABORT;
1730 PDMPciDevSetStatus(pPciDev, u16Status);
1731}
1732
1733
1734/**
1735 * Wakes up the command thread if there are commands to be processed.
1736 *
1737 * @param pDevIns The IOMMU device instance.
1738 *
1739 * @remarks The IOMMU lock must be held while calling this!
1740 */
1741static void iommuAmdCmdThreadWakeUpIfNeeded(PPDMDEVINS pDevIns)
1742{
1743 Log4Func(("\n"));
1744
1745 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
1746 if ( pThis->Status.n.u1CmdBufRunning
1747 && pThis->CmdBufTailPtr.n.off != pThis->CmdBufHeadPtr.n.off
1748 && !ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, true))
1749 {
1750 Log4Func(("Signaling command thread\n"));
1751 PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
1752 }
1753}
1754
1755
1756/**
1757 * Reads the Device Table Base Address Register.
1758 */
1759static VBOXSTRICTRC iommuAmdDevTabBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1760{
1761 RT_NOREF(pDevIns, offReg);
1762 *pu64Value = pThis->aDevTabBaseAddrs[0].u64;
1763 return VINF_SUCCESS;
1764}
1765
1766
1767/**
1768 * Reads the Command Buffer Base Address Register.
1769 */
1770static VBOXSTRICTRC iommuAmdCmdBufBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1771{
1772 RT_NOREF(pDevIns, offReg);
1773 *pu64Value = pThis->CmdBufBaseAddr.u64;
1774 return VINF_SUCCESS;
1775}
1776
1777
1778/**
1779 * Reads the Event Log Base Address Register.
1780 */
1781static VBOXSTRICTRC iommuAmdEvtLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1782{
1783 RT_NOREF(pDevIns, offReg);
1784 *pu64Value = pThis->EvtLogBaseAddr.u64;
1785 return VINF_SUCCESS;
1786}
1787
1788
1789/**
1790 * Reads the Control Register.
1791 */
1792static VBOXSTRICTRC iommuAmdCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1793{
1794 RT_NOREF(pDevIns, offReg);
1795 *pu64Value = pThis->Ctrl.u64;
1796 return VINF_SUCCESS;
1797}
1798
1799
1800/**
1801 * Reads the Exclusion Range Base Address Register.
1802 */
1803static VBOXSTRICTRC iommuAmdExclRangeBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1804{
1805 RT_NOREF(pDevIns, offReg);
1806 *pu64Value = pThis->ExclRangeBaseAddr.u64;
1807 return VINF_SUCCESS;
1808}
1809
1810
1811/**
1812 * Reads to the Exclusion Range Limit Register.
1813 */
1814static VBOXSTRICTRC iommuAmdExclRangeLimit_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1815{
1816 RT_NOREF(pDevIns, offReg);
1817 *pu64Value = pThis->ExclRangeLimit.u64;
1818 return VINF_SUCCESS;
1819}
1820
1821
1822/**
1823 * Reads to the Extended Feature Register.
1824 */
1825static VBOXSTRICTRC iommuAmdExtFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1826{
1827 RT_NOREF(pDevIns, offReg);
1828 *pu64Value = pThis->ExtFeat.u64;
1829 return VINF_SUCCESS;
1830}
1831
1832
1833/**
1834 * Reads to the PPR Log Base Address Register.
1835 */
1836static VBOXSTRICTRC iommuAmdPprLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1837{
1838 RT_NOREF(pDevIns, offReg);
1839 *pu64Value = pThis->PprLogBaseAddr.u64;
1840 return VINF_SUCCESS;
1841}
1842
1843
1844/**
1845 * Writes the Hardware Event Register (Hi).
1846 */
1847static VBOXSTRICTRC iommuAmdHwEvtHi_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1848{
1849 RT_NOREF(pDevIns, offReg);
1850 *pu64Value = pThis->HwEvtHi.u64;
1851 return VINF_SUCCESS;
1852}
1853
1854
1855/**
1856 * Reads the Hardware Event Register (Lo).
1857 */
1858static VBOXSTRICTRC iommuAmdHwEvtLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1859{
1860 RT_NOREF(pDevIns, offReg);
1861 *pu64Value = pThis->HwEvtLo;
1862 return VINF_SUCCESS;
1863}
1864
1865
1866/**
1867 * Reads the Hardware Event Status Register.
1868 */
1869static VBOXSTRICTRC iommuAmdHwEvtStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1870{
1871 RT_NOREF(pDevIns, offReg);
1872 *pu64Value = pThis->HwEvtStatus.u64;
1873 return VINF_SUCCESS;
1874}
1875
1876
1877/**
1878 * Reads to the GA Log Base Address Register.
1879 */
1880static VBOXSTRICTRC iommuAmdGALogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1881{
1882 RT_NOREF(pDevIns, offReg);
1883 *pu64Value = pThis->GALogBaseAddr.u64;
1884 return VINF_SUCCESS;
1885}
1886
1887
1888/**
1889 * Reads to the PPR Log B Base Address Register.
1890 */
1891static VBOXSTRICTRC iommuAmdPprLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1892{
1893 RT_NOREF(pDevIns, offReg);
1894 *pu64Value = pThis->PprLogBBaseAddr.u64;
1895 return VINF_SUCCESS;
1896}
1897
1898
1899/**
1900 * Reads to the Event Log B Base Address Register.
1901 */
1902static VBOXSTRICTRC iommuAmdEvtLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1903{
1904 RT_NOREF(pDevIns, offReg);
1905 *pu64Value = pThis->EvtLogBBaseAddr.u64;
1906 return VINF_SUCCESS;
1907}
1908
1909
1910/**
1911 * Reads the Device Table Segment Base Address Register.
1912 */
1913static VBOXSTRICTRC iommuAmdDevTabSegBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1914{
1915 RT_NOREF(pDevIns);
1916
1917 /* Figure out which segment is being written. */
1918 uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
1919 uint8_t const idxSegment = offSegment + 1;
1920 Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
1921
1922 *pu64Value = pThis->aDevTabBaseAddrs[idxSegment].u64;
1923 return VINF_SUCCESS;
1924}
1925
1926
1927/**
1928 * Reads the Device Specific Feature Extension (DSFX) Register.
1929 */
1930static VBOXSTRICTRC iommuAmdDevSpecificFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1931{
1932 RT_NOREF(pDevIns, offReg);
1933 *pu64Value = pThis->DevSpecificFeat.u64;
1934 return VINF_SUCCESS;
1935}
1936
1937/**
1938 * Reads the Device Specific Control Extension (DSCX) Register.
1939 */
1940static VBOXSTRICTRC iommuAmdDevSpecificCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1941{
1942 RT_NOREF(pDevIns, offReg);
1943 *pu64Value = pThis->DevSpecificCtrl.u64;
1944 return VINF_SUCCESS;
1945}
1946
1947
1948/**
1949 * Reads the Device Specific Status Extension (DSSX) Register.
1950 */
1951static VBOXSTRICTRC iommuAmdDevSpecificStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1952{
1953 RT_NOREF(pDevIns, offReg);
1954 *pu64Value = pThis->DevSpecificStatus.u64;
1955 return VINF_SUCCESS;
1956}
1957
1958
1959/**
1960 * Reads the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
1961 */
1962static VBOXSTRICTRC iommuAmdDevMsiVector_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1963{
1964 RT_NOREF(pDevIns, offReg);
1965 uint32_t const uLo = pThis->MiscInfo.au32[0];
1966 uint32_t const uHi = pThis->MiscInfo.au32[1];
1967 *pu64Value = RT_MAKE_U64(uLo, uHi);
1968 return VINF_SUCCESS;
1969}
1970
1971
1972/**
1973 * Reads the MSI Capability Header Register (32-bit) and the MSI Address (Lo)
1974 * Register (32-bit).
1975 */
1976static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1977{
1978 RT_NOREF(pThis, offReg);
1979 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1980 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1981 uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
1982 uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO);
1983 *pu64Value = RT_MAKE_U64(uLo, uHi);
1984 return VINF_SUCCESS;
1985}
1986
1987
1988/**
1989 * Reads the MSI Address (Hi) Register (32-bit) and the MSI data register (32-bit).
1990 */
1991static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
1992{
1993 RT_NOREF(pThis, offReg);
1994 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
1995 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
1996 uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI);
1997 uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA);
1998 *pu64Value = RT_MAKE_U64(uLo, uHi);
1999 return VINF_SUCCESS;
2000}
2001
2002
2003/**
2004 * Reads the Command Buffer Head Pointer Register.
2005 */
2006static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
2007{
2008 RT_NOREF(pDevIns, offReg);
2009 *pu64Value = pThis->CmdBufHeadPtr.u64;
2010 return VINF_SUCCESS;
2011}
2012
2013
2014/**
2015 * Reads the Command Buffer Tail Pointer Register.
2016 */
2017static VBOXSTRICTRC iommuAmdCmdBufTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
2018{
2019 RT_NOREF(pDevIns, offReg);
2020 *pu64Value = pThis->CmdBufTailPtr.u64;
2021 return VINF_SUCCESS;
2022}
2023
2024
2025/**
2026 * Reads the Event Log Head Pointer Register.
2027 */
2028static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
2029{
2030 RT_NOREF(pDevIns, offReg);
2031 *pu64Value = pThis->EvtLogHeadPtr.u64;
2032 return VINF_SUCCESS;
2033}
2034
2035
2036/**
2037 * Reads the Event Log Tail Pointer Register.
2038 */
2039static VBOXSTRICTRC iommuAmdEvtLogTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
2040{
2041 RT_NOREF(pDevIns, offReg);
2042 *pu64Value = pThis->EvtLogTailPtr.u64;
2043 return VINF_SUCCESS;
2044}
2045
2046
2047/**
2048 * Reads the Status Register.
2049 */
2050static VBOXSTRICTRC iommuAmdStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value)
2051{
2052 RT_NOREF(pDevIns, offReg);
2053 *pu64Value = pThis->Status.u64;
2054 return VINF_SUCCESS;
2055}
2056
2057
2058/**
2059 * Writes the Device Table Base Address Register.
2060 */
2061static VBOXSTRICTRC iommuAmdDevTabBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2062{
2063 RT_NOREF(pDevIns, offReg);
2064
2065 /* Mask out all unrecognized bits. */
2066 u64Value &= IOMMU_DEV_TAB_BAR_VALID_MASK;
2067
2068 /* Update the register. */
2069 pThis->aDevTabBaseAddrs[0].u64 = u64Value;
2070
2071 /* Paranoia. */
2072 Assert(pThis->aDevTabBaseAddrs[0].n.u9Size <= g_auDevTabSegMaxSizes[0]);
2073 return VINF_SUCCESS;
2074}
2075
2076
2077/**
2078 * Writes the Command Buffer Base Address Register.
2079 */
2080static VBOXSTRICTRC iommuAmdCmdBufBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2081{
2082 RT_NOREF(pDevIns, offReg);
2083
2084 /*
2085 * While this is not explicitly specified like the event log base address register,
2086 * the AMD IOMMU spec. does specify "CmdBufRun must be 0b to modify the command buffer registers properly".
2087 * Inconsistent specs :/
2088 */
2089 if (pThis->Status.n.u1CmdBufRunning)
2090 {
2091 LogFunc(("Setting CmdBufBar (%#RX64) when command buffer is running -> Ignored\n", u64Value));
2092 return VINF_SUCCESS;
2093 }
2094
2095 /* Mask out all unrecognized bits. */
2096 CMD_BUF_BAR_T CmdBufBaseAddr;
2097 CmdBufBaseAddr.u64 = u64Value & IOMMU_CMD_BUF_BAR_VALID_MASK;
2098
2099 /* Validate the length. */
2100 if (CmdBufBaseAddr.n.u4Len >= 8)
2101 {
2102 /* Update the register. */
2103 pThis->CmdBufBaseAddr.u64 = CmdBufBaseAddr.u64;
2104
2105 /*
2106 * Writing the command buffer base address, clears the command buffer head and tail pointers.
2107 * See AMD IOMMU spec. 2.4 "Commands".
2108 */
2109 pThis->CmdBufHeadPtr.u64 = 0;
2110 pThis->CmdBufTailPtr.u64 = 0;
2111 }
2112 else
2113 LogFunc(("Command buffer length (%#x) invalid -> Ignored\n", CmdBufBaseAddr.n.u4Len));
2114
2115 return VINF_SUCCESS;
2116}
2117
2118
2119/**
2120 * Writes the Event Log Base Address Register.
2121 */
2122static VBOXSTRICTRC iommuAmdEvtLogBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2123{
2124 RT_NOREF(pDevIns, offReg);
2125
2126 /*
2127 * IOMMU behavior is undefined when software writes this register when event logging is running.
2128 * In our emulation, we ignore the write entirely.
2129 * See AMD IOMMU spec. "Event Log Base Address Register".
2130 */
2131 if (pThis->Status.n.u1EvtLogRunning)
2132 {
2133 LogFunc(("Setting EvtLogBar (%#RX64) when event logging is running -> Ignored\n", u64Value));
2134 return VINF_SUCCESS;
2135 }
2136
2137 /* Mask out all unrecognized bits. */
2138 u64Value &= IOMMU_EVT_LOG_BAR_VALID_MASK;
2139 EVT_LOG_BAR_T EvtLogBaseAddr;
2140 EvtLogBaseAddr.u64 = u64Value;
2141
2142 /* Validate the length. */
2143 if (EvtLogBaseAddr.n.u4Len >= 8)
2144 {
2145 /* Update the register. */
2146 pThis->EvtLogBaseAddr.u64 = EvtLogBaseAddr.u64;
2147
2148 /*
2149 * Writing the event log base address, clears the event log head and tail pointers.
2150 * See AMD IOMMU spec. 2.5 "Event Logging".
2151 */
2152 pThis->EvtLogHeadPtr.u64 = 0;
2153 pThis->EvtLogTailPtr.u64 = 0;
2154 }
2155 else
2156 LogFunc(("Event log length (%#x) invalid -> Ignored\n", EvtLogBaseAddr.n.u4Len));
2157
2158 return VINF_SUCCESS;
2159}
2160
2161
2162/**
2163 * Writes the Control Register.
2164 */
2165static VBOXSTRICTRC iommuAmdCtrl_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2166{
2167 RT_NOREF(pDevIns, offReg);
2168
2169 /* Mask out all unrecognized bits. */
2170 u64Value &= IOMMU_CTRL_VALID_MASK;
2171 IOMMU_CTRL_T NewCtrl;
2172 NewCtrl.u64 = u64Value;
2173
2174 /* Ensure the device table segments are within limits. */
2175 if (NewCtrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup)
2176 {
2177 IOMMU_CTRL_T const OldCtrl = pThis->Ctrl;
2178
2179 /* Update the register. */
2180 ASMAtomicWriteU64(&pThis->Ctrl.u64, NewCtrl.u64);
2181
2182 bool const fNewIommuEn = NewCtrl.n.u1IommuEn;
2183 bool const fOldIommuEn = OldCtrl.n.u1IommuEn;
2184
2185 /* Enable or disable event logging when the bit transitions. */
2186 bool const fOldEvtLogEn = OldCtrl.n.u1EvtLogEn;
2187 bool const fNewEvtLogEn = NewCtrl.n.u1EvtLogEn;
2188 if ( fOldEvtLogEn != fNewEvtLogEn
2189 || fOldIommuEn != fNewIommuEn)
2190 {
2191 if ( fNewIommuEn
2192 && fNewEvtLogEn)
2193 {
2194 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_OVERFLOW);
2195 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_RUNNING);
2196 }
2197 else
2198 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_RUNNING);
2199 }
2200
2201 /* Enable or disable command buffer processing when the bit transitions. */
2202 bool const fOldCmdBufEn = OldCtrl.n.u1CmdBufEn;
2203 bool const fNewCmdBufEn = NewCtrl.n.u1CmdBufEn;
2204 if ( fOldCmdBufEn != fNewCmdBufEn
2205 || fOldIommuEn != fNewIommuEn)
2206 {
2207 if ( fNewCmdBufEn
2208 && fNewIommuEn)
2209 {
2210 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_CMD_BUF_RUNNING);
2211 LogFunc(("Command buffer enabled\n"));
2212
2213 /* Wake up the command thread to start processing commands if any. */
2214 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
2215 }
2216 else
2217 {
2218 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
2219 LogFunc(("Command buffer disabled\n"));
2220 }
2221 }
2222 }
2223 else
2224 {
2225 LogFunc(("Invalid number of device table segments enabled, exceeds %#x (%#RX64) -> Ignored!\n",
2226 pThis->ExtFeat.n.u2DevTabSegSup, NewCtrl.u64));
2227 }
2228
2229 return VINF_SUCCESS;
2230}
2231
2232
2233/**
2234 * Writes to the Exclusion Range Base Address Register.
2235 */
2236static VBOXSTRICTRC iommuAmdExclRangeBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2237{
2238 RT_NOREF(pDevIns, offReg);
2239 pThis->ExclRangeBaseAddr.u64 = u64Value & IOMMU_EXCL_RANGE_BAR_VALID_MASK;
2240 return VINF_SUCCESS;
2241}
2242
2243
2244/**
2245 * Writes to the Exclusion Range Limit Register.
2246 */
2247static VBOXSTRICTRC iommuAmdExclRangeLimit_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2248{
2249 RT_NOREF(pDevIns, offReg);
2250 u64Value &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK;
2251 u64Value |= UINT64_C(0xfff);
2252 pThis->ExclRangeLimit.u64 = u64Value;
2253 return VINF_SUCCESS;
2254}
2255
2256
2257/**
2258 * Writes the Hardware Event Register (Hi).
2259 */
2260static VBOXSTRICTRC iommuAmdHwEvtHi_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2261{
2262 /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
2263 RT_NOREF(pDevIns, offReg);
2264 LogFlowFunc(("Writing %#RX64 to hardware event (Hi) register!\n", u64Value));
2265 pThis->HwEvtHi.u64 = u64Value;
2266 return VINF_SUCCESS;
2267}
2268
2269
2270/**
2271 * Writes the Hardware Event Register (Lo).
2272 */
2273static VBOXSTRICTRC iommuAmdHwEvtLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2274{
2275 /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */
2276 RT_NOREF(pDevIns, offReg);
2277 LogFlowFunc(("Writing %#RX64 to hardware event (Lo) register!\n", u64Value));
2278 pThis->HwEvtLo = u64Value;
2279 return VINF_SUCCESS;
2280}
2281
2282
2283/**
2284 * Writes the Hardware Event Status Register.
2285 */
2286static VBOXSTRICTRC iommuAmdHwEvtStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2287{
2288 RT_NOREF(pDevIns, offReg);
2289
2290 /* Mask out all unrecognized bits. */
2291 u64Value &= IOMMU_HW_EVT_STATUS_VALID_MASK;
2292
2293 /*
2294 * The two bits (HEO and HEV) are RW1C (Read/Write 1-to-Clear; writing 0 has no effect).
2295 * If the current status bits or the bits being written are both 0, we've nothing to do.
2296 * The Overflow bit (bit 1) is only valid when the Valid bit (bit 0) is 1.
2297 */
2298 uint64_t HwStatus = pThis->HwEvtStatus.u64;
2299 if (!(HwStatus & RT_BIT(0)))
2300 return VINF_SUCCESS;
2301 if (u64Value & HwStatus & RT_BIT_64(0))
2302 HwStatus &= ~RT_BIT_64(0);
2303 if (u64Value & HwStatus & RT_BIT_64(1))
2304 HwStatus &= ~RT_BIT_64(1);
2305
2306 /* Update the register. */
2307 pThis->HwEvtStatus.u64 = HwStatus;
2308 return VINF_SUCCESS;
2309}
2310
2311
2312/**
2313 * Writes the Device Table Segment Base Address Register.
2314 */
2315static VBOXSTRICTRC iommuAmdDevTabSegBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2316{
2317 RT_NOREF(pDevIns);
2318
2319 /* Figure out which segment is being written. */
2320 uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3;
2321 uint8_t const idxSegment = offSegment + 1;
2322 Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
2323
2324 /* Mask out all unrecognized bits. */
2325 u64Value &= IOMMU_DEV_TAB_SEG_BAR_VALID_MASK;
2326 DEV_TAB_BAR_T DevTabSegBar;
2327 DevTabSegBar.u64 = u64Value;
2328
2329 /* Validate the size. */
2330 uint16_t const uSegSize = DevTabSegBar.n.u9Size;
2331 uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[idxSegment];
2332 if (uSegSize <= uMaxSegSize)
2333 {
2334 /* Update the register. */
2335 pThis->aDevTabBaseAddrs[idxSegment].u64 = u64Value;
2336 }
2337 else
2338 LogFunc(("Device table segment (%u) size invalid (%#RX32) -> Ignored\n", idxSegment, uSegSize));
2339
2340 return VINF_SUCCESS;
2341}
2342
2343
2344/**
2345 * Writes the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit).
2346 */
2347static VBOXSTRICTRC iommuAmdDevMsiVector_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2348{
2349 RT_NOREF(pDevIns, offReg);
2350
2351 /* MSI Vector Register 0 is read-only. */
2352 /* MSI Vector Register 1. */
2353 uint32_t const uReg = u64Value >> 32;
2354 pThis->MiscInfo.au32[1] = uReg & IOMMU_MSI_VECTOR_1_VALID_MASK;
2355 return VINF_SUCCESS;
2356}
2357
2358
2359/**
2360 * Writes the MSI Capability Header Register (32-bit) or the MSI Address (Lo)
2361 * Register (32-bit).
2362 */
2363static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2364{
2365 RT_NOREF(pThis, offReg);
2366 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
2367 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
2368
2369 /* MSI capability header. */
2370 {
2371 uint32_t const uReg = u64Value;
2372 MSI_CAP_HDR_T MsiCapHdr;
2373 MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR);
2374 MsiCapHdr.n.u1MsiEnable = RT_BOOL(uReg & IOMMU_MSI_CAP_HDR_MSI_EN_MASK);
2375 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR, MsiCapHdr.u32);
2376 }
2377
2378 /* MSI Address Lo. */
2379 {
2380 uint32_t const uReg = u64Value >> 32;
2381 uint32_t const uMsiAddrLo = uReg & VBOX_MSI_ADDR_VALID_MASK;
2382 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, uMsiAddrLo);
2383 }
2384
2385 return VINF_SUCCESS;
2386}
2387
2388
2389/**
2390 * Writes the MSI Address (Hi) Register (32-bit) or the MSI data register (32-bit).
2391 */
2392static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2393{
2394 RT_NOREF(pThis, offReg);
2395 PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
2396 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
2397
2398 /* MSI Address Hi. */
2399 {
2400 uint32_t const uReg = u64Value;
2401 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, uReg);
2402 }
2403
2404 /* MSI Data. */
2405 {
2406 uint32_t const uReg = u64Value >> 32;
2407 uint32_t const uMsiData = uReg & VBOX_MSI_DATA_VALID_MASK;
2408 PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, uMsiData);
2409 }
2410
2411 return VINF_SUCCESS;
2412}
2413
2414
2415/**
2416 * Writes the Command Buffer Head Pointer Register.
2417 */
2418static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2419{
2420 RT_NOREF(pDevIns, offReg);
2421
2422 /*
2423 * IOMMU behavior is undefined when software writes this register when the command buffer is running.
2424 * In our emulation, we ignore the write entirely.
2425 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2426 */
2427 if (pThis->Status.n.u1CmdBufRunning)
2428 {
2429 LogFunc(("Setting CmdBufHeadPtr (%#RX64) when command buffer is running -> Ignored\n", u64Value));
2430 return VINF_SUCCESS;
2431 }
2432
2433 /*
2434 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2435 * In our emulation, we ignore the write entirely.
2436 */
2437 uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK;
2438 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
2439 Assert(cbBuf <= _512K);
2440 if (offBuf >= cbBuf)
2441 {
2442 LogFunc(("Setting CmdBufHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX23) -> Ignored\n", offBuf, cbBuf));
2443 return VINF_SUCCESS;
2444 }
2445
2446 /* Update the register. */
2447 pThis->CmdBufHeadPtr.au32[0] = offBuf;
2448
2449 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
2450
2451 Log4Func(("Set CmdBufHeadPtr to %#RX32\n", offBuf));
2452 return VINF_SUCCESS;
2453}
2454
2455
2456/**
2457 * Writes the Command Buffer Tail Pointer Register.
2458 */
2459static VBOXSTRICTRC iommuAmdCmdBufTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2460{
2461 RT_NOREF(pDevIns, offReg);
2462
2463 /*
2464 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2465 * In our emulation, we ignore the write entirely.
2466 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2467 */
2468 uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK;
2469 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
2470 Assert(cbBuf <= _512K);
2471 if (offBuf >= cbBuf)
2472 {
2473 LogFunc(("Setting CmdBufTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2474 return VINF_SUCCESS;
2475 }
2476
2477 /*
2478 * IOMMU behavior is undefined if software advances the tail pointer equal to or beyond the
2479 * head pointer after adding one or more commands to the buffer.
2480 *
2481 * However, we cannot enforce this strictly because it's legal for software to shrink the
2482 * command queue (by reducing the offset) as well as wrap around the pointer (when head isn't
2483 * at 0). Software might even make the queue empty by making head and tail equal which is
2484 * allowed. I don't think we can or should try too hard to prevent software shooting itself
2485 * in the foot here. As long as we make sure the offset value is within the circular buffer
2486 * bounds (which we do by masking bits above) it should be sufficient.
2487 */
2488 pThis->CmdBufTailPtr.au32[0] = offBuf;
2489
2490 iommuAmdCmdThreadWakeUpIfNeeded(pDevIns);
2491
2492 Log4Func(("Set CmdBufTailPtr to %#RX32\n", offBuf));
2493 return VINF_SUCCESS;
2494}
2495
2496
2497/**
2498 * Writes the Event Log Head Pointer Register.
2499 */
2500static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2501{
2502 RT_NOREF(pDevIns, offReg);
2503
2504 /*
2505 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2506 * In our emulation, we ignore the write entirely.
2507 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2508 */
2509 uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK;
2510 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2511 Assert(cbBuf <= _512K);
2512 if (offBuf >= cbBuf)
2513 {
2514 LogFunc(("Setting EvtLogHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2515 return VINF_SUCCESS;
2516 }
2517
2518 /* Update the register. */
2519 pThis->EvtLogHeadPtr.au32[0] = offBuf;
2520
2521 Log4Func(("Set EvtLogHeadPtr to %#RX32\n", offBuf));
2522 return VINF_SUCCESS;
2523}
2524
2525
2526/**
2527 * Writes the Event Log Tail Pointer Register.
2528 */
2529static VBOXSTRICTRC iommuAmdEvtLogTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2530{
2531 RT_NOREF(pDevIns, offReg);
2532 NOREF(pThis);
2533
2534 /*
2535 * IOMMU behavior is undefined when software writes this register when the event log is running.
2536 * In our emulation, we ignore the write entirely.
2537 * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers".
2538 */
2539 if (pThis->Status.n.u1EvtLogRunning)
2540 {
2541 LogFunc(("Setting EvtLogTailPtr (%#RX64) when event log is running -> Ignored\n", u64Value));
2542 return VINF_SUCCESS;
2543 }
2544
2545 /*
2546 * IOMMU behavior is undefined when software writes a value outside the buffer length.
2547 * In our emulation, we ignore the write entirely.
2548 */
2549 uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK;
2550 uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
2551 Assert(cbBuf <= _512K);
2552 if (offBuf >= cbBuf)
2553 {
2554 LogFunc(("Setting EvtLogTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf));
2555 return VINF_SUCCESS;
2556 }
2557
2558 /* Update the register. */
2559 pThis->EvtLogTailPtr.au32[0] = offBuf;
2560
2561 Log4Func(("Set EvtLogTailPtr to %#RX32\n", offBuf));
2562 return VINF_SUCCESS;
2563}
2564
2565
2566/**
2567 * Writes the Status Register.
2568 */
2569static VBOXSTRICTRC iommuAmdStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value)
2570{
2571 RT_NOREF(pDevIns, offReg);
2572
2573 /* Mask out all unrecognized bits. */
2574 u64Value &= IOMMU_STATUS_VALID_MASK;
2575
2576 /*
2577 * Compute RW1C (read-only, write-1-to-clear) bits and preserve the rest (which are read-only).
2578 * Writing 0 to an RW1C bit has no effect. Writing 1 to an RW1C bit, clears the bit if it's already 1.
2579 */
2580 IOMMU_STATUS_T const OldStatus = pThis->Status;
2581 uint64_t const fOldRw1cBits = (OldStatus.u64 & IOMMU_STATUS_RW1C_MASK);
2582 uint64_t const fOldRoBits = (OldStatus.u64 & ~IOMMU_STATUS_RW1C_MASK);
2583 uint64_t const fNewRw1cBits = (u64Value & IOMMU_STATUS_RW1C_MASK);
2584
2585 uint64_t const uNewStatus = (fOldRw1cBits & ~fNewRw1cBits) | fOldRoBits;
2586
2587 /* Update the register. */
2588 ASMAtomicWriteU64(&pThis->Status.u64, uNewStatus);
2589 return VINF_SUCCESS;
2590}
2591
2592
2593/**
2594 * Register access table 0.
2595 * The MMIO offset of each entry must be a multiple of 8!
2596 */
2597static const IOMMUREGACC g_aRegAccess0[] =
2598{
2599 /* MMIO off. Register name Read function Write function */
2600 { /* 0x00 */ "DEV_TAB_BAR", iommuAmdDevTabBar_r, iommuAmdDevTabBar_w },
2601 { /* 0x08 */ "CMD_BUF_BAR", iommuAmdCmdBufBar_r, iommuAmdCmdBufBar_w },
2602 { /* 0x10 */ "EVT_LOG_BAR", iommuAmdEvtLogBar_r, iommuAmdEvtLogBar_w },
2603 { /* 0x18 */ "CTRL", iommuAmdCtrl_r, iommuAmdCtrl_w },
2604 { /* 0x20 */ "EXCL_BAR", iommuAmdExclRangeBar_r, iommuAmdExclRangeBar_w },
2605 { /* 0x28 */ "EXCL_RANGE_LIMIT", iommuAmdExclRangeLimit_r, iommuAmdExclRangeLimit_w },
2606 { /* 0x30 */ "EXT_FEAT", iommuAmdExtFeat_r, NULL },
2607 { /* 0x38 */ "PPR_LOG_BAR", iommuAmdPprLogBar_r, NULL },
2608 { /* 0x40 */ "HW_EVT_HI", iommuAmdHwEvtHi_r, iommuAmdHwEvtHi_w },
2609 { /* 0x48 */ "HW_EVT_LO", iommuAmdHwEvtLo_r, iommuAmdHwEvtLo_w },
2610 { /* 0x50 */ "HW_EVT_STATUS", iommuAmdHwEvtStatus_r, iommuAmdHwEvtStatus_w },
2611 { /* 0x58 */ NULL, NULL, NULL },
2612
2613 { /* 0x60 */ "SMI_FLT_0", NULL, NULL },
2614 { /* 0x68 */ "SMI_FLT_1", NULL, NULL },
2615 { /* 0x70 */ "SMI_FLT_2", NULL, NULL },
2616 { /* 0x78 */ "SMI_FLT_3", NULL, NULL },
2617 { /* 0x80 */ "SMI_FLT_4", NULL, NULL },
2618 { /* 0x88 */ "SMI_FLT_5", NULL, NULL },
2619 { /* 0x90 */ "SMI_FLT_6", NULL, NULL },
2620 { /* 0x98 */ "SMI_FLT_7", NULL, NULL },
2621 { /* 0xa0 */ "SMI_FLT_8", NULL, NULL },
2622 { /* 0xa8 */ "SMI_FLT_9", NULL, NULL },
2623 { /* 0xb0 */ "SMI_FLT_10", NULL, NULL },
2624 { /* 0xb8 */ "SMI_FLT_11", NULL, NULL },
2625 { /* 0xc0 */ "SMI_FLT_12", NULL, NULL },
2626 { /* 0xc8 */ "SMI_FLT_13", NULL, NULL },
2627 { /* 0xd0 */ "SMI_FLT_14", NULL, NULL },
2628 { /* 0xd8 */ "SMI_FLT_15", NULL, NULL },
2629
2630 { /* 0xe0 */ "GALOG_BAR", iommuAmdGALogBar_r, NULL },
2631 { /* 0xe8 */ "GALOG_TAIL_ADDR", NULL, NULL },
2632 { /* 0xf0 */ "PPR_LOG_B_BAR", iommuAmdPprLogBBaseAddr_r, NULL },
2633 { /* 0xf8 */ "PPR_EVT_B_BAR", iommuAmdEvtLogBBaseAddr_r, NULL },
2634
2635 { /* 0x100 */ "DEV_TAB_SEG_1", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2636 { /* 0x108 */ "DEV_TAB_SEG_2", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2637 { /* 0x110 */ "DEV_TAB_SEG_3", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2638 { /* 0x118 */ "DEV_TAB_SEG_4", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2639 { /* 0x120 */ "DEV_TAB_SEG_5", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2640 { /* 0x128 */ "DEV_TAB_SEG_6", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2641 { /* 0x130 */ "DEV_TAB_SEG_7", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w },
2642
2643 { /* 0x138 */ "DEV_SPECIFIC_FEAT", iommuAmdDevSpecificFeat_r, NULL },
2644 { /* 0x140 */ "DEV_SPECIFIC_CTRL", iommuAmdDevSpecificCtrl_r, NULL },
2645 { /* 0x148 */ "DEV_SPECIFIC_STATUS", iommuAmdDevSpecificStatus_r, NULL },
2646
2647 { /* 0x150 */ "MSI_VECTOR_0 or MSI_VECTOR_1", iommuAmdDevMsiVector_r, iommuAmdDevMsiVector_w },
2648 { /* 0x158 */ "MSI_CAP_HDR or MSI_ADDR_LO", iommuAmdMsiCapHdrAndAddrLo_r, iommuAmdMsiCapHdrAndAddrLo_w },
2649 { /* 0x160 */ "MSI_ADDR_HI or MSI_DATA", iommuAmdMsiAddrHiAndData_r, iommuAmdMsiAddrHiAndData_w },
2650 { /* 0x168 */ "MSI_MAPPING_CAP_HDR or PERF_OPT_CTRL", NULL, NULL },
2651
2652 { /* 0x170 */ "XT_GEN_INTR_CTRL", NULL, NULL },
2653 { /* 0x178 */ "XT_PPR_INTR_CTRL", NULL, NULL },
2654 { /* 0x180 */ "XT_GALOG_INT_CTRL", NULL, NULL },
2655};
2656AssertCompile(RT_ELEMENTS(g_aRegAccess0) == (IOMMU_MMIO_OFF_QWORD_TABLE_0_END - IOMMU_MMIO_OFF_QWORD_TABLE_0_START) / 8);
2657
2658/**
2659 * Register access table 1.
2660 * The MMIO offset of each entry must be a multiple of 8!
2661 */
2662static const IOMMUREGACC g_aRegAccess1[] =
2663{
2664 /* MMIO offset Register name Read function Write function */
2665 { /* 0x200 */ "MARC_APER_BAR_0", NULL, NULL },
2666 { /* 0x208 */ "MARC_APER_RELOC_0", NULL, NULL },
2667 { /* 0x210 */ "MARC_APER_LEN_0", NULL, NULL },
2668 { /* 0x218 */ "MARC_APER_BAR_1", NULL, NULL },
2669 { /* 0x220 */ "MARC_APER_RELOC_1", NULL, NULL },
2670 { /* 0x228 */ "MARC_APER_LEN_1", NULL, NULL },
2671 { /* 0x230 */ "MARC_APER_BAR_2", NULL, NULL },
2672 { /* 0x238 */ "MARC_APER_RELOC_2", NULL, NULL },
2673 { /* 0x240 */ "MARC_APER_LEN_2", NULL, NULL },
2674 { /* 0x248 */ "MARC_APER_BAR_3", NULL, NULL },
2675 { /* 0x250 */ "MARC_APER_RELOC_3", NULL, NULL },
2676 { /* 0x258 */ "MARC_APER_LEN_3", NULL, NULL }
2677};
2678AssertCompile(RT_ELEMENTS(g_aRegAccess1) == (IOMMU_MMIO_OFF_QWORD_TABLE_1_END - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) / 8);
2679
2680/**
2681 * Register access table 2.
2682 * The MMIO offset of each entry must be a multiple of 8!
2683 */
2684static const IOMMUREGACC g_aRegAccess2[] =
2685{
2686 /* MMIO offset Register name Read Function Write function */
2687 { /* 0x1ff8 */ "RSVD_REG", NULL, NULL },
2688
2689 { /* 0x2000 */ "CMD_BUF_HEAD_PTR", iommuAmdCmdBufHeadPtr_r, iommuAmdCmdBufHeadPtr_w },
2690 { /* 0x2008 */ "CMD_BUF_TAIL_PTR", iommuAmdCmdBufTailPtr_r , iommuAmdCmdBufTailPtr_w },
2691 { /* 0x2010 */ "EVT_LOG_HEAD_PTR", iommuAmdEvtLogHeadPtr_r, iommuAmdEvtLogHeadPtr_w },
2692 { /* 0x2018 */ "EVT_LOG_TAIL_PTR", iommuAmdEvtLogTailPtr_r, iommuAmdEvtLogTailPtr_w },
2693
2694 { /* 0x2020 */ "STATUS", iommuAmdStatus_r, iommuAmdStatus_w },
2695 { /* 0x2028 */ NULL, NULL, NULL },
2696
2697 { /* 0x2030 */ "PPR_LOG_HEAD_PTR", NULL, NULL },
2698 { /* 0x2038 */ "PPR_LOG_TAIL_PTR", NULL, NULL },
2699
2700 { /* 0x2040 */ "GALOG_HEAD_PTR", NULL, NULL },
2701 { /* 0x2048 */ "GALOG_TAIL_PTR", NULL, NULL },
2702
2703 { /* 0x2050 */ "PPR_LOG_B_HEAD_PTR", NULL, NULL },
2704 { /* 0x2058 */ "PPR_LOG_B_TAIL_PTR", NULL, NULL },
2705
2706 { /* 0x2060 */ NULL, NULL, NULL },
2707 { /* 0x2068 */ NULL, NULL, NULL },
2708
2709 { /* 0x2070 */ "EVT_LOG_B_HEAD_PTR", NULL, NULL },
2710 { /* 0x2078 */ "EVT_LOG_B_TAIL_PTR", NULL, NULL },
2711
2712 { /* 0x2080 */ "PPR_LOG_AUTO_RESP", NULL, NULL },
2713 { /* 0x2088 */ "PPR_LOG_OVERFLOW_EARLY", NULL, NULL },
2714 { /* 0x2090 */ "PPR_LOG_B_OVERFLOW_EARLY", NULL, NULL }
2715};
2716AssertCompile(RT_ELEMENTS(g_aRegAccess2) == (IOMMU_MMIO_OFF_QWORD_TABLE_2_END - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) / 8);
2717
2718
2719/**
2720 * Gets the register access structure given its MMIO offset.
2721 *
2722 * @returns The register access structure, or NULL if the offset is invalid.
2723 * @param off The MMIO offset of the register being accessed.
2724 */
2725static PCIOMMUREGACC iommuAmdGetRegAccess(uint32_t off)
2726{
2727 /* Figure out which table the register belongs to and validate its index. */
2728 PCIOMMUREGACC pReg;
2729 if (off < IOMMU_MMIO_OFF_QWORD_TABLE_0_END)
2730 {
2731 uint32_t const idxReg = off >> 3;
2732 Assert(idxReg < RT_ELEMENTS(g_aRegAccess0));
2733 pReg = &g_aRegAccess0[idxReg];
2734 }
2735 else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_1_END
2736 && off >= IOMMU_MMIO_OFF_QWORD_TABLE_1_START)
2737 {
2738 uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) >> 3;
2739 Assert(idxReg < RT_ELEMENTS(g_aRegAccess1));
2740 pReg = &g_aRegAccess1[idxReg];
2741 }
2742 else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_2_END
2743 && off >= IOMMU_MMIO_OFF_QWORD_TABLE_2_START)
2744 {
2745 uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) >> 3;
2746 Assert(idxReg < RT_ELEMENTS(g_aRegAccess2));
2747 pReg = &g_aRegAccess2[idxReg];
2748 }
2749 else
2750 pReg = NULL;
2751 return pReg;
2752}
2753
2754
2755/**
2756 * Writes an IOMMU register (32-bit and 64-bit).
2757 *
2758 * @returns Strict VBox status code.
2759 * @param pDevIns The IOMMU device instance.
2760 * @param off MMIO byte offset to the register.
2761 * @param cb The size of the write access.
2762 * @param uValue The value being written.
2763 *
2764 * @thread EMT.
2765 */
2766static VBOXSTRICTRC iommuAmdRegisterWrite(PPDMDEVINS pDevIns, uint32_t off, uint8_t cb, uint64_t uValue)
2767{
2768 /*
2769 * Validate the access in case of IOM bug or incorrect assumption.
2770 */
2771 Assert(off < IOMMU_MMIO_REGION_SIZE);
2772 AssertMsgReturn(cb == 4 || cb == 8, ("Invalid access size %u\n", cb), VINF_SUCCESS);
2773 AssertMsgReturn(!(off & 3), ("Invalid offset %#x\n", off), VINF_SUCCESS);
2774
2775 Log4Func(("off=%#x cb=%u uValue=%#RX64\n", off, cb, uValue));
2776
2777 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2778 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2779 PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
2780 if (pReg)
2781 { /* likely */ }
2782 else
2783 {
2784 LogFunc(("Writing unknown register %#x with %#RX64 -> Ignored\n", off, uValue));
2785 return VINF_SUCCESS;
2786 }
2787
2788 /* If a write handler doesn't exist, it's either a reserved or read-only register. */
2789 if (pReg->pfnWrite)
2790 { /* likely */ }
2791 else
2792 {
2793 LogFunc(("Writing reserved or read-only register off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
2794 return VINF_SUCCESS;
2795 }
2796
2797 /*
2798 * If the write access is 64-bits and aligned on a 64-bit boundary, dispatch right away.
2799 * This handles writes to 64-bit registers as well as aligned, 64-bit writes to two
2800 * consecutive 32-bit registers.
2801 */
2802 if (cb == 8)
2803 {
2804 if (!(off & 7))
2805 {
2806 IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE);
2807 VBOXSTRICTRC rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue);
2808 IOMMU_UNLOCK(pDevIns, pThisCC);
2809 return rcStrict;
2810 }
2811
2812 LogFunc(("Misaligned access while writing register at off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue));
2813 return VINF_SUCCESS;
2814 }
2815
2816 /* We shouldn't get sizes other than 32 bits here as we've specified so with IOM. */
2817 Assert(cb == 4);
2818 if (!(off & 7))
2819 {
2820 VBOXSTRICTRC rcStrict;
2821 IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE);
2822
2823 /*
2824 * Lower 32 bits of a 64-bit register or a 32-bit register is being written.
2825 * Merge with higher 32 bits (after reading the full 64-bits) and perform a 64-bit write.
2826 */
2827 uint64_t u64Read;
2828 if (pReg->pfnRead)
2829 rcStrict = pReg->pfnRead(pDevIns, pThis, off, &u64Read);
2830 else
2831 {
2832 rcStrict = VINF_SUCCESS;
2833 u64Read = 0;
2834 }
2835
2836 if (RT_SUCCESS(rcStrict))
2837 {
2838 uValue = (u64Read & UINT64_C(0xffffffff00000000)) | uValue;
2839 rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue);
2840 }
2841 else
2842 LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2843
2844 IOMMU_UNLOCK(pDevIns, pThisCC);
2845 return rcStrict;
2846 }
2847
2848 /*
2849 * Higher 32 bits of a 64-bit register or a 32-bit register at a 32-bit boundary is being written.
2850 * Merge with lower 32 bits (after reading the full 64-bits) and perform a 64-bit write.
2851 */
2852 VBOXSTRICTRC rcStrict;
2853 Assert(!(off & 3));
2854 Assert(off & 7);
2855 Assert(off >= 4);
2856 uint64_t u64Read;
2857 IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE);
2858 if (pReg->pfnRead)
2859 rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, &u64Read);
2860 else
2861 {
2862 rcStrict = VINF_SUCCESS;
2863 u64Read = 0;
2864 }
2865
2866 if (RT_SUCCESS(rcStrict))
2867 {
2868 uValue = (uValue << 32) | (u64Read & UINT64_C(0xffffffff));
2869 rcStrict = pReg->pfnWrite(pDevIns, pThis, off - 4, uValue);
2870 }
2871 else
2872 LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2873
2874 IOMMU_UNLOCK(pDevIns, pThisCC);
2875 return rcStrict;
2876}
2877
2878
2879/**
2880 * Reads an IOMMU register (64-bit) given its MMIO offset.
2881 *
2882 * All reads are 64-bit but reads to 32-bit registers that are aligned on an 8-byte
2883 * boundary include the lower half of the subsequent register.
2884 *
2885 * This is because most registers are 64-bit and aligned on 8-byte boundaries but
2886 * some are really 32-bit registers aligned on an 8-byte boundary. We cannot assume
2887 * software will only perform 32-bit reads on those 32-bit registers that are
2888 * aligned on 8-byte boundaries.
2889 *
2890 * @returns Strict VBox status code.
2891 * @param pDevIns The IOMMU device instance.
2892 * @param off The MMIO offset of the register in bytes.
2893 * @param puResult Where to store the value being read.
2894 *
2895 * @thread EMT.
2896 */
2897static VBOXSTRICTRC iommuAmdRegisterRead(PPDMDEVINS pDevIns, uint32_t off, uint64_t *puResult)
2898{
2899 Assert(off < IOMMU_MMIO_REGION_SIZE);
2900 Assert(!(off & 7) || !(off & 3));
2901
2902 Log4Func(("off=%#x\n", off));
2903
2904 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
2905 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
2906 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
2907 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); NOREF(pPciDev);
2908
2909 PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off);
2910 if (pReg)
2911 { /* likely */ }
2912 else
2913 {
2914 LogFunc(("Reading unknown register %#x -> Ignored\n", off));
2915 return VINF_IOM_MMIO_UNUSED_FF;
2916 }
2917
2918 /* If a read handler doesn't exist, it's a reserved or unknown register. */
2919 if (pReg->pfnRead)
2920 { /* likely */ }
2921 else
2922 {
2923 LogFunc(("Reading reserved or unknown register off=%#x -> returning 0s\n", off));
2924 return VINF_IOM_MMIO_UNUSED_00;
2925 }
2926
2927 /*
2928 * If the read access is aligned on a 64-bit boundary, read the full 64-bits and return.
2929 * The caller takes care of truncating upper 32 bits for 32-bit reads.
2930 */
2931 if (!(off & 7))
2932 {
2933 IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ);
2934 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off, puResult);
2935 IOMMU_UNLOCK(pDevIns, pThisCC);
2936 return rcStrict;
2937 }
2938
2939 /*
2940 * High 32 bits of a 64-bit register or a 32-bit register at a non 64-bit boundary is being read.
2941 * Read full 64 bits at the previous 64-bit boundary but return only the high 32 bits.
2942 */
2943 Assert(!(off & 3));
2944 Assert(off & 7);
2945 Assert(off >= 4);
2946 IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ);
2947 VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, puResult);
2948 IOMMU_UNLOCK(pDevIns, pThisCC);
2949 if (RT_SUCCESS(rcStrict))
2950 *puResult >>= 32;
2951 else
2952 {
2953 *puResult = 0;
2954 LogFunc(("Reading off %#x during split read failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict)));
2955 }
2956
2957 return rcStrict;
2958}
2959
2960
2961/**
2962 * Raises the MSI interrupt for the IOMMU device.
2963 *
2964 * @param pDevIns The IOMMU device instance.
2965 *
2966 * @thread Any.
2967 * @remarks The IOMMU lock may or may not be held.
2968 */
2969static void iommuAmdMsiInterruptRaise(PPDMDEVINS pDevIns)
2970{
2971 LogFlowFunc(("\n"));
2972 if (iommuAmdIsMsiEnabled(pDevIns))
2973 {
2974 LogFunc(("Raising MSI\n"));
2975 PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_HIGH);
2976 }
2977}
2978
2979#if 0
2980/**
2981 * Clears the MSI interrupt for the IOMMU device.
2982 *
2983 * @param pDevIns The IOMMU device instance.
2984 *
2985 * @thread Any.
2986 * @remarks The IOMMU lock may or may not be held.
2987 */
2988static void iommuAmdMsiInterruptClear(PPDMDEVINS pDevIns)
2989{
2990 if (iommuAmdIsMsiEnabled(pDevIns))
2991 PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_LOW);
2992}
2993#endif
2994
2995/**
2996 * Writes an entry to the event log in memory.
2997 *
2998 * @returns VBox status code.
2999 * @param pDevIns The IOMMU device instance.
3000 * @param pEvent The event to log.
3001 *
3002 * @thread Any.
3003 * @remarks The IOMMU lock must be held while calling this function.
3004 */
3005static int iommuAmdEvtLogEntryWrite(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
3006{
3007 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3008 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3009
3010 IOMMU_LOCK(pDevIns, pThisCC);
3011
3012 /* Check if event logging is active and the log has not overflowed. */
3013 IOMMU_STATUS_T const Status = pThis->Status;
3014 if ( Status.n.u1EvtLogRunning
3015 && !Status.n.u1EvtOverflow)
3016 {
3017 uint32_t const cbEvt = sizeof(*pEvent);
3018
3019 /* Get the offset we need to write the event to in memory (circular buffer offset). */
3020 uint32_t const offEvt = pThis->EvtLogTailPtr.n.off;
3021 Assert(!(offEvt & ~IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK));
3022
3023 /* Ensure we have space in the event log. */
3024 uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len);
3025 uint32_t const cEvts = iommuAmdGetEvtLogEntryCount(pThis);
3026 if (cEvts + 1 < cMaxEvts)
3027 {
3028 /* Write the event log entry to memory. */
3029 RTGCPHYS const GCPhysEvtLog = pThis->EvtLogBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
3030 RTGCPHYS const GCPhysEvtLogEntry = GCPhysEvtLog + offEvt;
3031 int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysEvtLogEntry, pEvent, cbEvt);
3032 if (RT_FAILURE(rc))
3033 LogFunc(("Failed to write event log entry at %#RGp. rc=%Rrc\n", GCPhysEvtLogEntry, rc));
3034
3035 /* Increment the event log tail pointer. */
3036 uint32_t const cbEvtLog = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len);
3037 pThis->EvtLogTailPtr.n.off = (offEvt + cbEvt) % cbEvtLog;
3038
3039 /* Indicate that an event log entry was written. */
3040 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_INTR);
3041
3042 /* Check and signal an interrupt if software wants to receive one when an event log entry is written. */
3043 if (pThis->Ctrl.n.u1EvtIntrEn)
3044 iommuAmdMsiInterruptRaise(pDevIns);
3045 }
3046 else
3047 {
3048 /* Indicate that the event log has overflowed. */
3049 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_OVERFLOW);
3050
3051 /* Check and signal an interrupt if software wants to receive one when the event log has overflowed. */
3052 if (pThis->Ctrl.n.u1EvtIntrEn)
3053 iommuAmdMsiInterruptRaise(pDevIns);
3054 }
3055 }
3056
3057 IOMMU_UNLOCK(pDevIns, pThisCC);
3058
3059 return VINF_SUCCESS;
3060}
3061
3062
3063/**
3064 * Sets an event in the hardware error registers.
3065 *
3066 * @param pDevIns The IOMMU device instance.
3067 * @param pEvent The event.
3068 *
3069 * @thread Any.
3070 */
3071static void iommuAmdHwErrorSet(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent)
3072{
3073 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3074 if (pThis->ExtFeat.n.u1HwErrorSup)
3075 {
3076 if (pThis->HwEvtStatus.n.u1Valid)
3077 pThis->HwEvtStatus.n.u1Overflow = 1;
3078 pThis->HwEvtStatus.n.u1Valid = 1;
3079 pThis->HwEvtHi.u64 = RT_MAKE_U64(pEvent->au32[0], pEvent->au32[1]);
3080 pThis->HwEvtLo = RT_MAKE_U64(pEvent->au32[2], pEvent->au32[3]);
3081 Assert( pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_DEV_TAB_HW_ERROR
3082 || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_PAGE_TAB_HW_ERROR
3083 || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
3084 }
3085}
3086
3087
3088/**
3089 * Initializes a PAGE_TAB_HARDWARE_ERROR event.
3090 *
3091 * @param idDevice The device ID (bus, device, function).
3092 * @param idDomain The domain ID.
3093 * @param GCPhysPtEntity The system physical address of the page table
3094 * entity.
3095 * @param enmOp The IOMMU operation being performed.
3096 * @param pEvtPageTabHwErr Where to store the initialized event.
3097 */
3098static void iommuAmdPageTabHwErrorEventInit(uint16_t idDevice, uint16_t idDomain, RTGCPHYS GCPhysPtEntity, IOMMUOP enmOp,
3099 PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
3100{
3101 memset(pEvtPageTabHwErr, 0, sizeof(*pEvtPageTabHwErr));
3102 pEvtPageTabHwErr->n.u16DevId = idDevice;
3103 pEvtPageTabHwErr->n.u16DomainOrPasidLo = idDomain;
3104 pEvtPageTabHwErr->n.u1GuestOrNested = 0;
3105 pEvtPageTabHwErr->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
3106 pEvtPageTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
3107 pEvtPageTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
3108 pEvtPageTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
3109 pEvtPageTabHwErr->n.u4EvtCode = IOMMU_EVT_PAGE_TAB_HW_ERROR;
3110 pEvtPageTabHwErr->n.u64Addr = GCPhysPtEntity;
3111}
3112
3113
3114/**
3115 * Raises a PAGE_TAB_HARDWARE_ERROR event.
3116 *
3117 * @param pDevIns The IOMMU device instance.
3118 * @param enmOp The IOMMU operation being performed.
3119 * @param pEvtPageTabHwErr The page table hardware error event.
3120 *
3121 * @thread Any.
3122 */
3123static void iommuAmdPageTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr)
3124{
3125 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_PAGE_TAB_HW_ERR_T));
3126 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtPageTabHwErr;
3127
3128 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3129 IOMMU_LOCK(pDevIns, pThisCC);
3130
3131 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
3132 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
3133 if (enmOp != IOMMUOP_CMD)
3134 iommuAmdSetPciTargetAbort(pDevIns);
3135
3136 IOMMU_UNLOCK(pDevIns, pThisCC);
3137
3138 LogFunc(("Raised PAGE_TAB_HARDWARE_ERROR. idDevice=%#x idDomain=%#x GCPhysPtEntity=%#RGp enmOp=%u u2Type=%u\n",
3139 pEvtPageTabHwErr->n.u16DevId, pEvtPageTabHwErr->n.u16DomainOrPasidLo, pEvtPageTabHwErr->n.u64Addr, enmOp,
3140 pEvtPageTabHwErr->n.u2Type));
3141}
3142
3143
3144#ifdef IN_RING3
3145/**
3146 * Initializes a COMMAND_HARDWARE_ERROR event.
3147 *
3148 * @param GCPhysAddr The system physical address the IOMMU attempted to access.
3149 * @param pEvtCmdHwErr Where to store the initialized event.
3150 */
3151static void iommuAmdCmdHwErrorEventInit(RTGCPHYS GCPhysAddr, PEVT_CMD_HW_ERR_T pEvtCmdHwErr)
3152{
3153 memset(pEvtCmdHwErr, 0, sizeof(*pEvtCmdHwErr));
3154 pEvtCmdHwErr->n.u2Type = HWEVTTYPE_DATA_ERROR;
3155 pEvtCmdHwErr->n.u4EvtCode = IOMMU_EVT_COMMAND_HW_ERROR;
3156 pEvtCmdHwErr->n.u64Addr = GCPhysAddr;
3157}
3158
3159
3160/**
3161 * Raises a COMMAND_HARDWARE_ERROR event.
3162 *
3163 * @param pDevIns The IOMMU device instance.
3164 * @param pEvtCmdHwErr The command hardware error event.
3165 *
3166 * @thread Any.
3167 */
3168static void iommuAmdCmdHwErrorEventRaise(PPDMDEVINS pDevIns, PCEVT_CMD_HW_ERR_T pEvtCmdHwErr)
3169{
3170 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_CMD_HW_ERR_T));
3171 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtCmdHwErr;
3172 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3173
3174 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3175 IOMMU_LOCK(pDevIns, pThisCC);
3176
3177 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
3178 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
3179 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
3180
3181 IOMMU_UNLOCK(pDevIns, pThisCC);
3182
3183 LogFunc(("Raised COMMAND_HARDWARE_ERROR. GCPhysCmd=%#RGp u2Type=%u\n", pEvtCmdHwErr->n.u64Addr, pEvtCmdHwErr->n.u2Type));
3184}
3185#endif /* IN_RING3 */
3186
3187
3188/**
3189 * Initializes a DEV_TAB_HARDWARE_ERROR event.
3190 *
3191 * @param idDevice The device ID (bus, device, function).
3192 * @param GCPhysDte The system physical address of the failed device table
3193 * access.
3194 * @param enmOp The IOMMU operation being performed.
3195 * @param pEvtDevTabHwErr Where to store the initialized event.
3196 */
3197static void iommuAmdDevTabHwErrorEventInit(uint16_t idDevice, RTGCPHYS GCPhysDte, IOMMUOP enmOp,
3198 PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
3199{
3200 memset(pEvtDevTabHwErr, 0, sizeof(*pEvtDevTabHwErr));
3201 pEvtDevTabHwErr->n.u16DevId = idDevice;
3202 pEvtDevTabHwErr->n.u1Intr = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
3203 /** @todo IOMMU: Any other transaction type that can set read/write bit? */
3204 pEvtDevTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
3205 pEvtDevTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
3206 pEvtDevTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT;
3207 pEvtDevTabHwErr->n.u4EvtCode = IOMMU_EVT_DEV_TAB_HW_ERROR;
3208 pEvtDevTabHwErr->n.u64Addr = GCPhysDte;
3209}
3210
3211
3212/**
3213 * Raises a DEV_TAB_HARDWARE_ERROR event.
3214 *
3215 * @param pDevIns The IOMMU device instance.
3216 * @param enmOp The IOMMU operation being performed.
3217 * @param pEvtDevTabHwErr The device table hardware error event.
3218 *
3219 * @thread Any.
3220 */
3221static void iommuAmdDevTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr)
3222{
3223 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_DEV_TAB_HW_ERROR_T));
3224 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtDevTabHwErr;
3225
3226 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3227 IOMMU_LOCK(pDevIns, pThisCC);
3228
3229 iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent);
3230 iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent);
3231 if (enmOp != IOMMUOP_CMD)
3232 iommuAmdSetPciTargetAbort(pDevIns);
3233
3234 IOMMU_UNLOCK(pDevIns, pThisCC);
3235
3236 LogFunc(("Raised DEV_TAB_HARDWARE_ERROR. idDevice=%#x GCPhysDte=%#RGp enmOp=%u u2Type=%u\n", pEvtDevTabHwErr->n.u16DevId,
3237 pEvtDevTabHwErr->n.u64Addr, enmOp, pEvtDevTabHwErr->n.u2Type));
3238}
3239
3240
3241#ifdef IN_RING3
3242/**
3243 * Initializes an ILLEGAL_COMMAND_ERROR event.
3244 *
3245 * @param GCPhysCmd The system physical address of the failed command
3246 * access.
3247 * @param pEvtIllegalCmd Where to store the initialized event.
3248 */
3249static void iommuAmdIllegalCmdEventInit(RTGCPHYS GCPhysCmd, PEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
3250{
3251 Assert(!(GCPhysCmd & UINT64_C(0xf)));
3252 memset(pEvtIllegalCmd, 0, sizeof(*pEvtIllegalCmd));
3253 pEvtIllegalCmd->n.u4EvtCode = IOMMU_EVT_ILLEGAL_CMD_ERROR;
3254 pEvtIllegalCmd->n.u64Addr = GCPhysCmd;
3255}
3256
3257
3258/**
3259 * Raises an ILLEGAL_COMMAND_ERROR event.
3260 *
3261 * @param pDevIns The IOMMU device instance.
3262 * @param pEvtIllegalCmd The illegal command error event.
3263 */
3264static void iommuAmdIllegalCmdEventRaise(PPDMDEVINS pDevIns, PCEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd)
3265{
3266 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
3267 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalCmd;
3268 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3269
3270 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3271 ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING);
3272
3273 LogFunc(("Raised ILLEGAL_COMMAND_ERROR. Addr=%#RGp\n", pEvtIllegalCmd->n.u64Addr));
3274}
3275#endif /* IN_RING3 */
3276
3277
3278/**
3279 * Initializes an ILLEGAL_DEV_TABLE_ENTRY event.
3280 *
3281 * @param idDevice The device ID (bus, device, function).
3282 * @param uIova The I/O virtual address.
3283 * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if the
3284 * event was caused by an invalid level encoding in the
3285 * DTE.
3286 * @param enmOp The IOMMU operation being performed.
3287 * @param pEvtIllegalDte Where to store the initialized event.
3288 */
3289static void iommuAmdIllegalDteEventInit(uint16_t idDevice, uint64_t uIova, bool fRsvdNotZero, IOMMUOP enmOp,
3290 PEVT_ILLEGAL_DTE_T pEvtIllegalDte)
3291{
3292 memset(pEvtIllegalDte, 0, sizeof(*pEvtIllegalDte));
3293 pEvtIllegalDte->n.u16DevId = idDevice;
3294 pEvtIllegalDte->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
3295 pEvtIllegalDte->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
3296 pEvtIllegalDte->n.u1RsvdNotZero = fRsvdNotZero;
3297 pEvtIllegalDte->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
3298 pEvtIllegalDte->n.u4EvtCode = IOMMU_EVT_ILLEGAL_DEV_TAB_ENTRY;
3299 pEvtIllegalDte->n.u64Addr = uIova & ~UINT64_C(0x3);
3300 /** @todo r=ramshankar: Not sure why the last 2 bits are marked as reserved by the
3301 * IOMMU spec here but not for this field for I/O page fault event. */
3302 Assert(!(uIova & UINT64_C(0x3)));
3303}
3304
3305
3306/**
3307 * Raises an ILLEGAL_DEV_TABLE_ENTRY event.
3308 *
3309 * @param pDevIns The IOMMU instance data.
3310 * @param enmOp The IOMMU operation being performed.
3311 * @param pEvtIllegalDte The illegal device table entry event.
3312 * @param enmEvtType The illegal device table entry event type.
3313 *
3314 * @thread Any.
3315 */
3316static void iommuAmdIllegalDteEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PCEVT_ILLEGAL_DTE_T pEvtIllegalDte,
3317 EVT_ILLEGAL_DTE_TYPE_T enmEvtType)
3318{
3319 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T));
3320 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalDte;
3321
3322 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3323 if (enmOp != IOMMUOP_CMD)
3324 iommuAmdSetPciTargetAbort(pDevIns);
3325
3326 LogFunc(("Raised ILLEGAL_DTE_EVENT. idDevice=%#x uIova=%#RX64 enmOp=%u enmEvtType=%u\n", pEvtIllegalDte->n.u16DevId,
3327 pEvtIllegalDte->n.u64Addr, enmOp, enmEvtType));
3328 NOREF(enmEvtType);
3329}
3330
3331
3332/**
3333 * Initializes an IO_PAGE_FAULT event.
3334 *
3335 * @param idDevice The device ID (bus, device, function).
3336 * @param idDomain The domain ID.
3337 * @param uIova The I/O virtual address being accessed.
3338 * @param fPresent Transaction to a page marked as present (including
3339 * DTE.V=1) or interrupt marked as remapped
3340 * (IRTE.RemapEn=1).
3341 * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if
3342 * the I/O page fault was caused by invalid level
3343 * encoding.
3344 * @param fPermDenied Permission denied for the address being accessed.
3345 * @param enmOp The IOMMU operation being performed.
3346 * @param pEvtIoPageFault Where to store the initialized event.
3347 */
3348static void iommuAmdIoPageFaultEventInit(uint16_t idDevice, uint16_t idDomain, uint64_t uIova, bool fPresent, bool fRsvdNotZero,
3349 bool fPermDenied, IOMMUOP enmOp, PEVT_IO_PAGE_FAULT_T pEvtIoPageFault)
3350{
3351 Assert(!fPermDenied || fPresent);
3352 memset(pEvtIoPageFault, 0, sizeof(*pEvtIoPageFault));
3353 pEvtIoPageFault->n.u16DevId = idDevice;
3354 //pEvtIoPageFault->n.u4PasidHi = 0;
3355 pEvtIoPageFault->n.u16DomainOrPasidLo = idDomain;
3356 //pEvtIoPageFault->n.u1GuestOrNested = 0;
3357 //pEvtIoPageFault->n.u1NoExecute = 0;
3358 //pEvtIoPageFault->n.u1User = 0;
3359 pEvtIoPageFault->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ);
3360 pEvtIoPageFault->n.u1Present = fPresent;
3361 pEvtIoPageFault->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE);
3362 pEvtIoPageFault->n.u1PermDenied = fPermDenied;
3363 pEvtIoPageFault->n.u1RsvdNotZero = fRsvdNotZero;
3364 pEvtIoPageFault->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ);
3365 pEvtIoPageFault->n.u4EvtCode = IOMMU_EVT_IO_PAGE_FAULT;
3366 pEvtIoPageFault->n.u64Addr = uIova;
3367}
3368
3369
3370/**
3371 * Raises an IO_PAGE_FAULT event.
3372 *
3373 * @param pDevIns The IOMMU instance data.
3374 * @param fIoDevFlags The I/O device flags, see IOMMU_DTE_CACHE_F_XXX.
3375 * @param pIrte The interrupt remapping table entry, can be NULL.
3376 * @param enmOp The IOMMU operation being performed.
3377 * @param pEvtIoPageFault The I/O page fault event.
3378 * @param enmEvtType The I/O page fault event type.
3379 *
3380 * @thread Any.
3381 */
3382static void iommuAmdIoPageFaultEventRaise(PPDMDEVINS pDevIns, uint16_t fIoDevFlags, PCIRTE_T pIrte, IOMMUOP enmOp,
3383 PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
3384{
3385 AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_IO_PAGE_FAULT_T));
3386 PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIoPageFault;
3387 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3388 STAM_COUNTER_INC(&pThis->StatIopfs); NOREF(pThis);
3389
3390#ifdef IOMMU_WITH_DTE_CACHE
3391# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) iommuAmdDteCacheUpdateFlags((a_pDevIns), (a_DevId), \
3392 IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED, \
3393 0 /* fAndMask */)
3394#else
3395# define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) do { } while (0)
3396#endif
3397
3398 bool fSuppressEvtLogging = false;
3399 if ( enmOp == IOMMUOP_MEM_READ
3400 || enmOp == IOMMUOP_MEM_WRITE)
3401 {
3402 uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_VALID
3403 | IOMMU_DTE_CACHE_F_SUPPRESS_IOPF | IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED;
3404 uint16_t const fSuppressAllIopf = IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF;
3405 if ( (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf
3406 || (fIoDevFlags & fSuppressIopf) == fSuppressIopf)
3407 {
3408 fSuppressEvtLogging = true;
3409 }
3410 }
3411 else if (enmOp == IOMMUOP_INTR_REQ)
3412 {
3413 uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_INTR_MAP_VALID | IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR;
3414 if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf)
3415 fSuppressEvtLogging = true;
3416 else if (pIrte) /** @todo Make this compulsary and assert if it isn't provided. */
3417 fSuppressEvtLogging = pIrte->n.u1SuppressIoPf;
3418 }
3419 /* else: Events are never suppressed for commands. */
3420
3421 switch (enmEvtType)
3422 {
3423 case kIoPageFaultType_PermDenied:
3424 {
3425 /* Cannot be triggered by a command. */
3426 Assert(enmOp != IOMMUOP_CMD);
3427 RT_FALL_THRU();
3428 }
3429 case kIoPageFaultType_DteRsvdPagingMode:
3430 case kIoPageFaultType_PteInvalidPageSize:
3431 case kIoPageFaultType_PteInvalidLvlEncoding:
3432 case kIoPageFaultType_SkippedLevelIovaNotZero:
3433 case kIoPageFaultType_PteRsvdNotZero:
3434 case kIoPageFaultType_PteValidNotSet:
3435 case kIoPageFaultType_DteTranslationDisabled:
3436 case kIoPageFaultType_PasidInvalidRange:
3437 {
3438 /*
3439 * For a translation request, the IOMMU doesn't signal an I/O page fault nor does it
3440 * create an event log entry. See AMD IOMMU spec. 2.1.3.2 "I/O Page Faults".
3441 */
3442 if (enmOp != IOMMUOP_TRANSLATE_REQ)
3443 {
3444 if (!fSuppressEvtLogging)
3445 {
3446 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3447 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3448 }
3449 if (enmOp != IOMMUOP_CMD)
3450 iommuAmdSetPciTargetAbort(pDevIns);
3451 }
3452 break;
3453 }
3454
3455 case kIoPageFaultType_UserSupervisor:
3456 {
3457 /* Access is blocked and only creates an event log entry. */
3458 if (!fSuppressEvtLogging)
3459 {
3460 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3461 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3462 }
3463 break;
3464 }
3465
3466 case kIoPageFaultType_IrteAddrInvalid:
3467 case kIoPageFaultType_IrteRsvdNotZero:
3468 case kIoPageFaultType_IrteRemapEn:
3469 case kIoPageFaultType_IrteRsvdIntType:
3470 case kIoPageFaultType_IntrReqAborted:
3471 case kIoPageFaultType_IntrWithPasid:
3472 {
3473 /* Only trigerred by interrupt requests. */
3474 Assert(enmOp == IOMMUOP_INTR_REQ);
3475 if (!fSuppressEvtLogging)
3476 {
3477 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3478 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3479 }
3480 iommuAmdSetPciTargetAbort(pDevIns);
3481 break;
3482 }
3483
3484 case kIoPageFaultType_SmiFilterMismatch:
3485 {
3486 /* Not supported and probably will never be, assert. */
3487 AssertMsgFailed(("kIoPageFaultType_SmiFilterMismatch - Upstream SMI requests not supported/implemented."));
3488 break;
3489 }
3490
3491 case kIoPageFaultType_DevId_Invalid:
3492 {
3493 /* Cannot be triggered by a command. */
3494 Assert(enmOp != IOMMUOP_CMD);
3495 Assert(enmOp != IOMMUOP_TRANSLATE_REQ); /** @todo IOMMU: We don't support translation requests yet. */
3496 if (!fSuppressEvtLogging)
3497 {
3498 iommuAmdEvtLogEntryWrite(pDevIns, pEvent);
3499 IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId);
3500 }
3501 if ( enmOp == IOMMUOP_MEM_READ
3502 || enmOp == IOMMUOP_MEM_WRITE)
3503 iommuAmdSetPciTargetAbort(pDevIns);
3504 break;
3505 }
3506 }
3507
3508#undef IOMMU_DTE_CACHE_SET_PF_RAISED
3509}
3510
3511
3512/**
3513 * Raises an IO_PAGE_FAULT event given the DTE.
3514 *
3515 * @param pDevIns The IOMMU instance data.
3516 * @param pDte The device table entry.
3517 * @param pIrte The interrupt remapping table entry, can be NULL.
3518 * @param enmOp The IOMMU operation being performed.
3519 * @param pEvtIoPageFault The I/O page fault event.
3520 * @param enmEvtType The I/O page fault event type.
3521 *
3522 * @thread Any.
3523 */
3524static void iommuAmdIoPageFaultEventRaiseWithDte(PPDMDEVINS pDevIns, PCDTE_T pDte, PCIRTE_T pIrte, IOMMUOP enmOp,
3525 PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType)
3526{
3527 Assert(pDte);
3528 uint16_t const fIoDevFlags = iommuAmdGetBasicDevFlags(pDte);
3529 return iommuAmdIoPageFaultEventRaise(pDevIns, fIoDevFlags, pIrte, enmOp, pEvtIoPageFault, enmEvtType);
3530}
3531
3532
3533/**
3534 * Reads a device table entry for the given the device ID.
3535 *
3536 * @returns VBox status code.
3537 * @param pDevIns The IOMMU device instance.
3538 * @param idDevice The device ID (bus, device, function).
3539 * @param enmOp The IOMMU operation being performed.
3540 * @param pDte Where to store the device table entry.
3541 *
3542 * @thread Any.
3543 */
3544static int iommuAmdDteRead(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PDTE_T pDte)
3545{
3546 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3547 PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC);
3548
3549 IOMMU_LOCK(pDevIns, pThisCC);
3550
3551 /* Figure out which device table segment is being accessed. */
3552 uint8_t const idxSegsEn = pThis->Ctrl.n.u3DevTabSegEn;
3553 Assert(idxSegsEn < RT_ELEMENTS(g_auDevTabSegShifts));
3554
3555 uint8_t const idxSeg = (idDevice & g_auDevTabSegMasks[idxSegsEn]) >> g_auDevTabSegShifts[idxSegsEn];
3556 Assert(idxSeg < RT_ELEMENTS(pThis->aDevTabBaseAddrs));
3557 AssertCompile(RT_ELEMENTS(g_auDevTabSegShifts) == RT_ELEMENTS(g_auDevTabSegMasks));
3558
3559 RTGCPHYS const GCPhysDevTab = pThis->aDevTabBaseAddrs[idxSeg].n.u40Base << X86_PAGE_4K_SHIFT;
3560 uint32_t const offDte = (idDevice & ~g_auDevTabSegMasks[idxSegsEn]) * sizeof(DTE_T);
3561 RTGCPHYS const GCPhysDte = GCPhysDevTab + offDte;
3562
3563 /* Ensure the DTE falls completely within the device table segment. */
3564 uint32_t const cbDevTabSeg = (pThis->aDevTabBaseAddrs[idxSeg].n.u9Size + 1) << X86_PAGE_4K_SHIFT;
3565
3566 IOMMU_UNLOCK(pDevIns, pThisCC);
3567
3568 if (offDte + sizeof(DTE_T) <= cbDevTabSeg)
3569 {
3570 /* Read the device table entry from guest memory. */
3571 Assert(!(GCPhysDevTab & X86_PAGE_4K_OFFSET_MASK));
3572 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDte, pDte, sizeof(*pDte));
3573 if (RT_SUCCESS(rc))
3574 return VINF_SUCCESS;
3575
3576 /* Raise a device table hardware error. */
3577 LogFunc(("Failed to read device table entry at %#RGp. rc=%Rrc -> DevTabHwError\n", GCPhysDte, rc));
3578
3579 EVT_DEV_TAB_HW_ERROR_T EvtDevTabHwErr;
3580 iommuAmdDevTabHwErrorEventInit(idDevice, GCPhysDte, enmOp, &EvtDevTabHwErr);
3581 iommuAmdDevTabHwErrorEventRaise(pDevIns, enmOp, &EvtDevTabHwErr);
3582 return VERR_IOMMU_DTE_READ_FAILED;
3583 }
3584
3585 /* Raise an I/O page fault for out-of-bounds acccess. */
3586 LogFunc(("Out-of-bounds device table entry. idDevice=%#x offDte=%u cbDevTabSeg=%u -> IOPF\n", idDevice, offDte, cbDevTabSeg));
3587 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3588 iommuAmdIoPageFaultEventInit(idDevice, 0 /* idDomain */, 0 /* uIova */, false /* fPresent */, false /* fRsvdNotZero */,
3589 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3590 iommuAmdIoPageFaultEventRaise(pDevIns, 0 /* fIoDevFlags */, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3591 kIoPageFaultType_DevId_Invalid);
3592 return VERR_IOMMU_DTE_BAD_OFFSET;
3593}
3594
3595
3596/**
3597 * Performs pre-translation checks for the given device table entry.
3598 *
3599 * @returns VBox status code.
3600 * @retval VINF_SUCCESS if the DTE is valid and supports address translation.
3601 * @retval VINF_IOMMU_ADDR_TRANSLATION_DISABLED if the DTE is valid but address
3602 * translation is disabled.
3603 * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED if an error occurred and any
3604 * corresponding event was raised.
3605 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the DTE denies the requested
3606 * permissions.
3607 *
3608 * @param pDevIns The IOMMU device instance.
3609 * @param uIova The I/O virtual address to translate.
3610 * @param idDevice The device ID (bus, device, function).
3611 * @param fPerm The I/O permissions for this access, see
3612 * IOMMU_IO_PERM_XXX.
3613 * @param pDte The device table entry.
3614 * @param enmOp The IOMMU operation being performed.
3615 *
3616 * @thread Any.
3617 */
3618static int iommuAmdPreTranslateChecks(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, uint8_t fPerm, PCDTE_T pDte,
3619 IOMMUOP enmOp)
3620{
3621 /*
3622 * Check if the translation is valid, otherwise raise an I/O page fault.
3623 */
3624 if (pDte->n.u1TranslationValid)
3625 { /* likely */ }
3626 else
3627 {
3628 /** @todo r=ramshankar: The AMD IOMMU spec. says page walk is terminated but
3629 * doesn't explicitly say whether an I/O page fault is raised. From other
3630 * places in the spec. it seems early page walk terminations (starting with
3631 * the DTE) return the state computed so far and raises an I/O page fault. So
3632 * returning an invalid translation rather than skipping translation. */
3633 LogFunc(("Translation valid bit not set -> IOPF\n"));
3634 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3635 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
3636 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3637 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3638 kIoPageFaultType_DteTranslationDisabled);
3639 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3640 }
3641
3642 /*
3643 * Check permissions bits in the DTE.
3644 * Note: This MUST be checked prior to checking the root page table level below!
3645 */
3646 uint8_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
3647 if ((fPerm & fDtePerm) == fPerm)
3648 { /* likely */ }
3649 else
3650 {
3651 LogFunc(("Permission denied by DTE (fPerm=%#x fDtePerm=%#x) -> IOPF\n", fPerm, fDtePerm));
3652 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3653 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3654 true /* fPermDenied */, enmOp, &EvtIoPageFault);
3655 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3656 kIoPageFaultType_PermDenied);
3657 return VERR_IOMMU_ADDR_ACCESS_DENIED;
3658 }
3659
3660 /*
3661 * If the root page table level is 0, translation is disabled and GPA=SPA and
3662 * the DTE.IR and DTE.IW bits control permissions (verified above).
3663 */
3664 uint8_t const uMaxLevel = pDte->n.u3Mode;
3665 if (uMaxLevel != 0)
3666 { /* likely */ }
3667 else
3668 {
3669 Assert((fPerm & fDtePerm) == fPerm); /* Verify we've checked permissions. */
3670 return VINF_IOMMU_ADDR_TRANSLATION_DISABLED;
3671 }
3672
3673 /*
3674 * If the root page table level exceeds the allowed host-address translation level,
3675 * page walk is terminated and translation fails.
3676 */
3677 if (uMaxLevel <= IOMMU_MAX_HOST_PT_LEVEL)
3678 { /* likely */ }
3679 else
3680 {
3681 /** @todo r=ramshankar: I cannot make out from the AMD IOMMU spec. if I should be
3682 * raising an ILLEGAL_DEV_TABLE_ENTRY event or an IO_PAGE_FAULT event here.
3683 * I'm just going with I/O page fault. */
3684 LogFunc(("Invalid root page table level %#x (idDevice=%#x) -> IOPF\n", uMaxLevel, idDevice));
3685 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3686 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3687 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3688 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3689 kIoPageFaultType_PteInvalidLvlEncoding);
3690 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3691 }
3692
3693 /* The DTE allows translations for this device. */
3694 return VINF_SUCCESS;
3695}
3696
3697
3698/**
3699 * Walks the I/O page table to translate the I/O virtual address to a system
3700 * physical address.
3701 *
3702 * @returns VBox status code.
3703 * @param pDevIns The IOMMU device instance.
3704 * @param uIova The I/O virtual address to translate. Must be 4K aligned.
3705 * @param fPerm The I/O permissions for this access, see
3706 * IOMMU_IO_PERM_XXX.
3707 * @param idDevice The device ID (bus, device, function).
3708 * @param pDte The device table entry.
3709 * @param enmOp The IOMMU operation being performed.
3710 * @param pPageLookup Where to store the results of the I/O page lookup. This
3711 * is only updated when VINF_SUCCESS is returned.
3712 *
3713 * @thread Any.
3714 */
3715static int iommuAmdIoPageTableWalk(PPDMDEVINS pDevIns, uint64_t uIova, uint8_t fPerm, uint16_t idDevice, PCDTE_T pDte,
3716 IOMMUOP enmOp, PIOPAGELOOKUP pPageLookup)
3717{
3718 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3719 Assert(pDte->n.u1Valid);
3720 Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK));
3721
3722 /* The virtual address bits indexing table. */
3723 static uint8_t const s_acIovaLevelShifts[] = { 0, 12, 21, 30, 39, 48, 57, 0 };
3724 AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) > IOMMU_MAX_HOST_PT_LEVEL);
3725
3726 /*
3727 * Traverse the I/O page table starting with the page directory in the DTE.
3728 *
3729 * The Valid (Present bit), Translation Valid and Mode (Next-Level bits) in
3730 * the DTE have been validated already, see iommuAmdPreTranslateChecks.
3731 */
3732 IOPTENTITY_T PtEntity;
3733 PtEntity.u64 = pDte->au64[0];
3734 for (;;)
3735 {
3736 uint8_t const uLevel = PtEntity.n.u3NextLevel;
3737
3738 /* Read the page table entity at the current level. */
3739 {
3740 Assert(uLevel > 0 && uLevel < RT_ELEMENTS(s_acIovaLevelShifts));
3741 Assert(uLevel <= IOMMU_MAX_HOST_PT_LEVEL);
3742 uint16_t const idxPte = (uIova >> s_acIovaLevelShifts[uLevel]) & UINT64_C(0x1ff);
3743 uint64_t const offPte = idxPte << 3;
3744 RTGCPHYS const GCPhysPtEntity = (PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK) + offPte;
3745 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysPtEntity, &PtEntity.u64, sizeof(PtEntity));
3746 if (RT_FAILURE(rc))
3747 {
3748 LogFunc(("Failed to read page table entry at %#RGp. rc=%Rrc -> PageTabHwError\n", GCPhysPtEntity, rc));
3749 EVT_PAGE_TAB_HW_ERR_T EvtPageTabHwErr;
3750 iommuAmdPageTabHwErrorEventInit(idDevice, pDte->n.u16DomainId, GCPhysPtEntity, enmOp, &EvtPageTabHwErr);
3751 iommuAmdPageTabHwErrorEventRaise(pDevIns, enmOp, &EvtPageTabHwErr);
3752 return VERR_IOMMU_IPE_2;
3753 }
3754 }
3755
3756 /* Check present bit. */
3757 if (PtEntity.n.u1Present)
3758 { /* likely */ }
3759 else
3760 {
3761 LogFunc(("Page table entry not present. idDevice=%#x uIova=%#RX64 -> IOPF\n", idDevice, uIova));
3762 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3763 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */,
3764 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3765 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3766 kIoPageFaultType_PermDenied);
3767 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3768 }
3769
3770 /* Validate the encoding of the next level. */
3771 uint8_t const uNextLevel = PtEntity.n.u3NextLevel;
3772#if IOMMU_MAX_HOST_PT_LEVEL < 6
3773 if (uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL)
3774 { /* likely */ }
3775 else
3776 {
3777 LogFunc(("Next-level/paging-mode field of the paging entity invalid. uNextLevel=%#x -> IOPF\n", uNextLevel));
3778 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3779 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, true /* fRsvdNotZero */,
3780 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3781 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3782 kIoPageFaultType_PteInvalidLvlEncoding);
3783 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3784 }
3785#endif
3786
3787 /* Check reserved bits. */
3788 uint64_t const fRsvdMask = uNextLevel == 0 || uNextLevel == 7 ? IOMMU_PTE_RSVD_MASK : IOMMU_PDE_RSVD_MASK;
3789 if (!(PtEntity.u64 & fRsvdMask))
3790 { /* likely */ }
3791 else
3792 {
3793 LogFunc(("Page table entity (%#RX64 level=%u) reserved bits set -> IOPF\n", PtEntity.u64, uNextLevel));
3794 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3795 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, true /* fRsvdNotZero */,
3796 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3797 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3798 kIoPageFaultType_PteRsvdNotZero);
3799 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3800 }
3801
3802 /* Check permission bits. */
3803 uint8_t const fPtePerm = (PtEntity.u64 >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK;
3804 if ((fPerm & fPtePerm) == fPerm)
3805 { /* likely */ }
3806 else
3807 {
3808 LogFunc(("Page table entry access denied. idDevice=%#x fPerm=%#x fPtePerm=%#x -> IOPF\n", idDevice, fPerm, fPtePerm));
3809 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3810 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3811 true /* fPermDenied */, enmOp, &EvtIoPageFault);
3812 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3813 kIoPageFaultType_PermDenied);
3814 return VERR_IOMMU_ADDR_ACCESS_DENIED;
3815 }
3816
3817 /* If the next level is 0 or 7, this is the final level PTE. */
3818 if (uNextLevel == 0)
3819 {
3820 /* The page size of the translation is the default size for the level. */
3821 uint8_t const cShift = s_acIovaLevelShifts[uLevel];
3822 RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
3823 pPageLookup->GCPhysSpa = GCPhysPte & X86_GET_PAGE_BASE_MASK(cShift);
3824 pPageLookup->cShift = cShift;
3825 pPageLookup->fPerm = fPtePerm;
3826 return VINF_SUCCESS;
3827 }
3828 if (uNextLevel == 7)
3829 {
3830 /* The default page size of the translation is overridden. */
3831 uint8_t cShift = X86_PAGE_4K_SHIFT;
3832 RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK;
3833 while (GCPhysPte & RT_BIT_64(cShift++))
3834 ;
3835
3836 /* The page size must be larger than the default size and lower than the default size of the higher level. */
3837 if ( cShift > s_acIovaLevelShifts[uLevel]
3838 && cShift < s_acIovaLevelShifts[uLevel + 1])
3839 {
3840 pPageLookup->GCPhysSpa = GCPhysPte & X86_GET_PAGE_BASE_MASK(cShift);
3841 pPageLookup->cShift = cShift;
3842 pPageLookup->fPerm = fPtePerm;
3843 STAM_COUNTER_INC(&pThis->StatNonStdPageSize); NOREF(pThis);
3844 return VINF_SUCCESS;
3845 }
3846
3847 LogFunc(("Page size invalid. idDevice=%#x cShift=%u -> IOPF\n", idDevice, cShift));
3848 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3849 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3850 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3851 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3852 kIoPageFaultType_PteInvalidPageSize);
3853 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3854 }
3855
3856 /* Validate level transition. */
3857 if (uNextLevel < uLevel)
3858 { /* likely */ }
3859 else
3860 {
3861 LogFunc(("Next level (%#x) must be less than the current level (%#x) -> IOPF\n", uNextLevel, uLevel));
3862 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3863 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3864 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3865 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3866 kIoPageFaultType_PteInvalidLvlEncoding);
3867 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3868 }
3869
3870 /* Ensure IOVA bits of skipped levels (if any) are zero. */
3871 uint64_t const fIovaSkipMask = IOMMU_GET_MAX_VALID_IOVA(uLevel - 1) - IOMMU_GET_MAX_VALID_IOVA(uNextLevel);
3872 if (!(uIova & fIovaSkipMask))
3873 { /* likely */ }
3874 else
3875 {
3876 LogFunc(("IOVA of skipped levels are not zero. uIova=%#RX64 fSkipMask=%#RX64 -> IOPF\n", uIova, fIovaSkipMask));
3877 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
3878 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */,
3879 false /* fPermDenied */, enmOp, &EvtIoPageFault);
3880 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
3881 kIoPageFaultType_SkippedLevelIovaNotZero);
3882 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
3883 }
3884
3885 /* Traverse to the next level. */
3886 }
3887}
3888
3889
3890/**
3891 * Page lookup callback for finding an I/O page from guest memory.
3892 *
3893 * @returns VBox status code.
3894 * @retval VINF_SUCCESS when the page is found and has the right permissions.
3895 * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED when address translation fails.
3896 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
3897 * insufficient to what is requested.
3898 *
3899 * @param pDevIns The IOMMU instance data.
3900 * @param uIovaPage The I/O virtual address to lookup in the cache (must be
3901 * 4K aligned).
3902 * @param fPerm The I/O permissions for this access, see
3903 * IOMMU_IO_PERM_XXX.
3904 * @param pAux The auxiliary information required during lookup.
3905 * @param pPageLookup Where to store the looked up I/O page.
3906 */
3907static DECLCALLBACK(int) iommuAmdDteLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
3908 PIOPAGELOOKUP pPageLookup)
3909{
3910 AssertPtr(pAux);
3911 AssertPtr(pPageLookup);
3912 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
3913
3914 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
3915 STAM_PROFILE_ADV_START(&pThis->StatProfDteLookup, a);
3916 int rc = iommuAmdIoPageTableWalk(pDevIns, uIovaPage, fPerm, pAux->idDevice, pAux->pDte, pAux->enmOp, pPageLookup);
3917 STAM_PROFILE_ADV_STOP(&pThis->StatProfDteLookup, a); NOREF(pThis);
3918 return rc;
3919}
3920
3921
3922/**
3923 * Looks up a range of I/O virtual addresses.
3924 *
3925 * @returns VBox status code.
3926 * @param pDevIns The IOMMU instance data.
3927 * @param pfnIoPageLookup The lookup function to use.
3928 * @param pAddrIn The I/O address range to lookup.
3929 * @param pAux The auxiliary information required by the lookup
3930 * function.
3931 * @param pAddrOut Where to store the translated I/O address page
3932 * lookup.
3933 * @param pcbContiguous Where to store the size of the access.
3934 */
3935static int iommuAmdLookupIoAddrRange(PPDMDEVINS pDevIns, PFNIOPAGELOOKUP pfnIoPageLookup, PCIOADDRRANGE pAddrIn,
3936 PCIOMMUOPAUX pAux, PIOPAGELOOKUP pAddrOut, size_t *pcbContiguous)
3937{
3938 int rc;
3939 size_t const cbIova = pAddrIn->cb;
3940 uint8_t const fPerm = pAddrIn->fPerm;
3941 uint64_t const uIova = pAddrIn->uAddr;
3942 RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
3943 size_t cbRemaining = cbIova;
3944 uint64_t uIovaPage = pAddrIn->uAddr & X86_PAGE_4K_BASE_MASK;
3945 uint64_t offIova = pAddrIn->uAddr & X86_PAGE_4K_OFFSET_MASK;
3946 size_t const cbPage = X86_PAGE_4K_SIZE;
3947
3948 IOPAGELOOKUP PageLookupPrev;
3949 RT_ZERO(PageLookupPrev);
3950 for (;;)
3951 {
3952 /* Lookup the physical page corresponding to the I/O virtual address. */
3953 IOPAGELOOKUP PageLookup;
3954 rc = pfnIoPageLookup(pDevIns, uIovaPage, fPerm, pAux, &PageLookup);
3955 if (RT_SUCCESS(rc))
3956 {
3957 /*
3958 * Validate results of the translation.
3959 */
3960 /* The IOTLB cache preserves the original page sizes even though the IOVAs are split into 4K pages. */
3961 Assert(PageLookup.cShift >= X86_PAGE_4K_SHIFT && PageLookup.cShift <= 51);
3962 Assert( pfnIoPageLookup != iommuAmdDteLookupPage
3963 || !(PageLookup.GCPhysSpa & X86_GET_PAGE_OFFSET_MASK(PageLookup.cShift)));
3964 Assert((PageLookup.fPerm & fPerm) == fPerm);
3965
3966 /* Store the translated address before continuing to access more pages. */
3967 if (cbRemaining == cbIova)
3968 {
3969 uint64_t const offSpa = uIova & X86_GET_PAGE_OFFSET_MASK(PageLookup.cShift);
3970 GCPhysSpa = PageLookup.GCPhysSpa | offSpa;
3971 }
3972 /*
3973 * Check if translated address results in a physically contiguous region.
3974 *
3975 * Also ensure that the permissions for all pages in this range are identical
3976 * because we specify a common permission while adding pages in this range
3977 * to the IOTLB cache.
3978 *
3979 * The page size must also be identical since we need to know how many offset
3980 * bits to copy into the final translated address (while retrieving 4K sized
3981 * pages from the IOTLB cache).
3982 */
3983 else if ( PageLookup.GCPhysSpa == PageLookupPrev.GCPhysSpa + cbPage
3984 && PageLookup.fPerm == PageLookupPrev.fPerm
3985 && PageLookup.cShift == PageLookupPrev.cShift)
3986 { /* likely */ }
3987 else
3988 {
3989 Assert(cbRemaining > 0);
3990 rc = VERR_OUT_OF_RANGE;
3991 break;
3992 }
3993
3994 /* Store the page lookup result from the first/previous page. */
3995 PageLookupPrev = PageLookup;
3996
3997 /* Check if we need to access more pages. */
3998 if (cbRemaining > cbPage - offIova)
3999 {
4000 cbRemaining -= (cbPage - offIova); /* Calculate how much more we need to access. */
4001 uIovaPage += cbPage; /* Update address of the next access. */
4002 offIova = 0; /* After the first page, remaining pages are accessed from offset 0. */
4003 }
4004 else
4005 {
4006 /* Caller (PDM) doesn't expect more data accessed than what was requested. */
4007 cbRemaining = 0;
4008 break;
4009 }
4010 }
4011 else
4012 break;
4013 }
4014
4015 pAddrOut->GCPhysSpa = GCPhysSpa; /* Update the translated address. */
4016 pAddrOut->cShift = PageLookupPrev.cShift; /* Update the page size of the lookup. */
4017 pAddrOut->fPerm = PageLookupPrev.fPerm; /* Update the allowed permissions for this access. */
4018 *pcbContiguous = cbIova - cbRemaining; /* Update the size of the contiguous memory region. */
4019 return rc;
4020}
4021
4022
4023/**
4024 * Looks up an I/O virtual address from the device table.
4025 *
4026 * @returns VBox status code.
4027 * @param pDevIns The IOMMU instance data.
4028 * @param idDevice The device ID (bus, device, function).
4029 * @param uIova The I/O virtual address to lookup.
4030 * @param cbIova The size of the access.
4031 * @param fPerm The I/O permissions for this access, see
4032 * IOMMU_IO_PERM_XXX.
4033 * @param enmOp The IOMMU operation being performed.
4034 * @param pGCPhysSpa Where to store the translated system physical address.
4035 * @param pcbContiguous Where to store the number of contiguous bytes translated
4036 * and permission-checked.
4037 *
4038 * @thread Any.
4039 */
4040static int iommuAmdDteLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm, IOMMUOP enmOp,
4041 PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
4042{
4043 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4044 RTGCPHYS GCPhysSpa = NIL_RTGCPHYS;
4045 size_t cbContiguous = 0;
4046
4047 /* Read the device table entry from memory. */
4048 DTE_T Dte;
4049 int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte);
4050 if (RT_SUCCESS(rc))
4051 {
4052 if (Dte.n.u1Valid)
4053 {
4054 /* Validate bits 127:0 of the device table entry when DTE.V is 1. */
4055 uint64_t const fRsvd0 = Dte.au64[0] & ~(IOMMU_DTE_QWORD_0_VALID_MASK & ~IOMMU_DTE_QWORD_0_FEAT_MASK);
4056 uint64_t const fRsvd1 = Dte.au64[1] & ~(IOMMU_DTE_QWORD_1_VALID_MASK & ~IOMMU_DTE_QWORD_1_FEAT_MASK);
4057 if (RT_LIKELY(!fRsvd0 && !fRsvd1))
4058 {
4059 /*
4060 * Check if the DTE is configured for translating addresses.
4061 * Note: Addresses cannot be subject to exclusion as we do -not- support remote IOTLBs,
4062 * so there's no need to check the address exclusion base/limit here.
4063 */
4064 rc = iommuAmdPreTranslateChecks(pDevIns, idDevice, uIova, fPerm, &Dte, enmOp);
4065 if (rc == VINF_SUCCESS)
4066 {
4067 IOADDRRANGE AddrIn;
4068 AddrIn.uAddr = uIova;
4069 AddrIn.cb = cbIova;
4070 AddrIn.fPerm = fPerm;
4071
4072 IOMMUOPAUX Aux;
4073 Aux.enmOp = enmOp;
4074 Aux.pDte = &Dte;
4075 Aux.idDevice = idDevice;
4076 Aux.idDomain = Dte.n.u16DomainId;
4077
4078 /* Lookup the address from the DTE and I/O page tables.*/
4079 IOPAGELOOKUP AddrOut;
4080 rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdDteLookupPage, &AddrIn, &Aux, &AddrOut, &cbContiguous);
4081 GCPhysSpa = AddrOut.GCPhysSpa;
4082
4083 /*
4084 * If we stopped since translation resulted in non-contiguous physical addresses
4085 * or permissions aren't identical for all pages in the access, what we translated
4086 * thus far is still valid.
4087 */
4088 if (rc == VERR_OUT_OF_RANGE)
4089 {
4090 Assert(cbContiguous > 0 && cbContiguous < cbIova);
4091 rc = VINF_SUCCESS;
4092 STAM_COUNTER_INC(&pThis->StatAccessDteNonContig); NOREF(pThis);
4093 }
4094 else if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED)
4095 STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
4096
4097#ifdef IOMMU_WITH_IOTLBE_CACHE
4098 if (RT_SUCCESS(rc))
4099 {
4100 /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */
4101 iommuAmdDteCacheAddOrUpdateFlags(pDevIns, &Dte, idDevice, IOMMU_DTE_CACHE_F_ADDR_TRANSLATE,
4102 0 /* fAndMask */);
4103 /* Update IOTLB for the contiguous range of I/O virtual addresses. */
4104 iommuAmdIotlbAddRange(pDevIns, Aux.idDomain, uIova & X86_PAGE_4K_BASE_MASK, cbContiguous, &AddrOut);
4105 }
4106#endif
4107 }
4108 else if (rc == VINF_IOMMU_ADDR_TRANSLATION_DISABLED)
4109 {
4110 /*
4111 * Translation is disabled for this device (root paging mode is 0).
4112 * GPA=SPA, but the permission bits are important and controls accesses.
4113 */
4114 GCPhysSpa = uIova;
4115 cbContiguous = cbIova;
4116 rc = VINF_SUCCESS;
4117
4118#ifdef IOMMU_WITH_IOTLBE_CACHE
4119 /* Update that addresses permissions of DTE apply (but omit address translation). */
4120 iommuAmdDteCacheAddOrUpdateFlags(pDevIns, &Dte, idDevice, IOMMU_DTE_CACHE_F_IO_PERM,
4121 IOMMU_DTE_CACHE_F_ADDR_TRANSLATE);
4122#endif
4123 }
4124 else
4125 {
4126 /* Address translation failed or access is denied. */
4127 Assert(rc == VERR_IOMMU_ADDR_ACCESS_DENIED || rc == VERR_IOMMU_ADDR_TRANSLATION_FAILED);
4128 GCPhysSpa = NIL_RTGCPHYS;
4129 cbContiguous = 0;
4130 STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied);
4131 }
4132 }
4133 else
4134 {
4135 /* Invalid reserved bits in the DTE, raise an error event. */
4136 LogFunc(("Invalid DTE reserved bits (u64[0]=%#RX64 u64[1]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1));
4137 EVT_ILLEGAL_DTE_T Event;
4138 iommuAmdIllegalDteEventInit(idDevice, uIova, true /* fRsvdNotZero */, enmOp, &Event);
4139 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
4140 rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4141 }
4142 }
4143 else
4144 {
4145 /*
4146 * The DTE is not valid, forward addresses untranslated.
4147 * See AMD IOMMU spec. "Table 5: Feature Enablement for Address Translation".
4148 */
4149 GCPhysSpa = uIova;
4150 cbContiguous = cbIova;
4151 }
4152 }
4153 else
4154 {
4155 LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc));
4156 rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4157 }
4158
4159 *pGCPhysSpa = GCPhysSpa;
4160 *pcbContiguous = cbContiguous;
4161 AssertMsg(rc != VINF_SUCCESS || cbContiguous > 0, ("cbContiguous=%zu\n", cbContiguous));
4162 return rc;
4163}
4164
4165
4166#ifdef IOMMU_WITH_IOTLBE_CACHE
4167/**
4168 * I/O page lookup callback for finding an I/O page from the IOTLB.
4169 *
4170 * @returns VBox status code.
4171 * @retval VINF_SUCCESS when the page is found and has the right permissions.
4172 * @retval VERR_NOT_FOUND when the page is not found.
4173 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are
4174 * insufficient to what is requested.
4175 *
4176 * @param pDevIns The IOMMU instance data.
4177 * @param uIovaPage The I/O virtual address to lookup in the cache (must be
4178 * 4K aligned).
4179 * @param fPerm The I/O permissions for this access, see
4180 * IOMMU_IO_PERM_XXX.
4181 * @param pAux The auxiliary information required during lookup.
4182 * @param pPageLookup Where to store the looked up I/O page.
4183 */
4184static DECLCALLBACK(int) iommuAmdCacheLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux,
4185 PIOPAGELOOKUP pPageLookup)
4186{
4187 Assert(pAux);
4188 Assert(pPageLookup);
4189 Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK));
4190
4191 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4192 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
4193
4194 STAM_PROFILE_ADV_START(&pThis->StatProfIotlbeLookup, a);
4195 PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pThisR3, pAux->idDomain, uIovaPage);
4196 STAM_PROFILE_ADV_STOP(&pThis->StatProfIotlbeLookup, a);
4197 if (pIotlbe)
4198 {
4199 *pPageLookup = pIotlbe->PageLookup;
4200 if ((pPageLookup->fPerm & fPerm) == fPerm)
4201 {
4202 STAM_COUNTER_INC(&pThis->StatAccessCacheHit);
4203 return VINF_SUCCESS;
4204 }
4205 return VERR_IOMMU_ADDR_ACCESS_DENIED;
4206 }
4207 return VERR_NOT_FOUND;
4208}
4209
4210
4211/**
4212 * Lookups a memory access from the IOTLB cache.
4213 *
4214 * @returns VBox status code.
4215 * @retval VINF_SUCCESS if the access was cached and permissions are verified.
4216 * @retval VERR_OUT_OF_RANGE if the access resulted in a non-contiguous physical
4217 * address region.
4218 * @retval VERR_NOT_FOUND if the access was not cached.
4219 * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the access was cached but permissions
4220 * are insufficient.
4221 *
4222 * @param pDevIns The IOMMU instance data.
4223 * @param idDevice The device ID (bus, device, function).
4224 * @param uIova The I/O virtual address to lookup.
4225 * @param cbIova The size of the access.
4226 * @param fPerm The I/O permissions for this access, see
4227 * IOMMU_IO_PERM_XXX.
4228 * @param enmOp The IOMMU operation being performed.
4229 * @param pGCPhysSpa Where to store the translated system physical address.
4230 * @param pcbContiguous Where to store the number of contiguous bytes translated
4231 * and permission-checked.
4232 */
4233static int iommuAmdIotlbCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm,
4234 IOMMUOP enmOp, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
4235{
4236 int rc;
4237 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4238
4239#define IOMMU_IOTLB_LOOKUP_FAILED(a_rc) \
4240 do { \
4241 *pGCPhysSpa = NIL_RTGCPHYS; \
4242 *pcbContiguous = 0; \
4243 rc = (a_rc); \
4244 } while (0)
4245
4246 /*
4247 * We hold the cache lock across both the DTE and the IOTLB lookups (if any) because
4248 * we don't want the DTE cache to be invalidate while we perform IOTBL lookups.
4249 */
4250 IOMMU_CACHE_LOCK(pDevIns, pThis);
4251
4252 /* Lookup the DTE cache entry. */
4253 uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice);
4254 if (idxDteCache < RT_ELEMENTS(pThis->aDteCache))
4255 {
4256 PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache];
4257 if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE))
4258 == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE))
4259 {
4260 /* Lookup IOTLB entries. */
4261 IOADDRRANGE AddrIn;
4262 AddrIn.uAddr = uIova;
4263 AddrIn.cb = cbIova;
4264 AddrIn.fPerm = fPerm;
4265
4266 IOMMUOPAUX Aux;
4267 Aux.enmOp = enmOp;
4268 Aux.pDte = NULL;
4269 Aux.idDevice = idDevice;
4270 Aux.idDomain = pDteCache->idDomain;
4271
4272 IOPAGELOOKUP AddrOut;
4273 rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, pcbContiguous);
4274 *pGCPhysSpa = AddrOut.GCPhysSpa;
4275 Assert(*pcbContiguous <= cbIova);
4276 }
4277 else if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM))
4278 == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM))
4279 {
4280 /* Address translation is disabled, but DTE permissions apply. */
4281 Assert(!(pDteCache->fFlags & IOMMU_DTE_CACHE_F_ADDR_TRANSLATE));
4282 uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_IO_PERM_SHIFT) & IOMMU_DTE_CACHE_F_IO_PERM_MASK;
4283 if ((fDtePerm & fPerm) == fPerm)
4284 {
4285 *pGCPhysSpa = uIova;
4286 *pcbContiguous = cbIova;
4287 rc = VINF_SUCCESS;
4288 }
4289 else
4290 IOMMU_IOTLB_LOOKUP_FAILED(VERR_IOMMU_ADDR_ACCESS_DENIED);
4291 }
4292 else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT)
4293 {
4294 /* Forward addresses untranslated, without checking permissions. */
4295 *pGCPhysSpa = uIova;
4296 *pcbContiguous = cbIova;
4297 rc = VINF_SUCCESS;
4298 }
4299 else
4300 IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND);
4301 }
4302 else
4303 IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND);
4304
4305 IOMMU_CACHE_UNLOCK(pDevIns, pThis);
4306
4307 return rc;
4308
4309#undef IOMMU_IOTLB_LOOKUP_FAILED
4310}
4311#endif /* IOMMU_WITH_IOTLBE_CACHE */
4312
4313
4314/**
4315 * Gets the I/O permission and IOMMU operation type for the given access flags.
4316 *
4317 * @param pThis The shared IOMMU device state.
4318 * @param fFlags The PDM IOMMU flags, PDMIOMMU_MEM_F_XXX.
4319 * @param penmOp Where to store the IOMMU operation.
4320 * @param pfPerm Where to store the IOMMU I/O permission.
4321 * @param fBulk Whether this is a bulk read or write.
4322 */
4323DECLINLINE(void) iommuAmdMemAccessGetPermAndOp(PIOMMU pThis, uint32_t fFlags, PIOMMUOP penmOp, uint8_t *pfPerm, bool fBulk)
4324{
4325 if (fFlags & PDMIOMMU_MEM_F_WRITE)
4326 {
4327 *penmOp = IOMMUOP_MEM_WRITE;
4328 *pfPerm = IOMMU_IO_PERM_WRITE;
4329#ifdef VBOX_WITH_STATISTICS
4330 if (!fBulk)
4331 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemWrite));
4332 else
4333 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkWrite));
4334#else
4335 RT_NOREF2(pThis, fBulk);
4336#endif
4337 }
4338 else
4339 {
4340 Assert(fFlags & PDMIOMMU_MEM_F_READ);
4341 *penmOp = IOMMUOP_MEM_READ;
4342 *pfPerm = IOMMU_IO_PERM_READ;
4343#ifdef VBOX_WITH_STATISTICS
4344 if (!fBulk)
4345 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemRead));
4346 else
4347 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkRead));
4348#else
4349 RT_NOREF2(pThis, fBulk);
4350#endif
4351 }
4352}
4353
4354
4355/**
4356 * Memory access transaction from a device.
4357 *
4358 * @returns VBox status code.
4359 * @param pDevIns The IOMMU device instance.
4360 * @param idDevice The device ID (bus, device, function).
4361 * @param uIova The I/O virtual address being accessed.
4362 * @param cbIova The size of the access.
4363 * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
4364 * @param pGCPhysSpa Where to store the translated system physical address.
4365 * @param pcbContiguous Where to store the number of contiguous bytes translated
4366 * and permission-checked.
4367 *
4368 * @thread Any.
4369 */
4370static DECLCALLBACK(int) iommuAmdMemAccess(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova,
4371 uint32_t fFlags, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous)
4372{
4373 /* Validate. */
4374 AssertPtr(pDevIns);
4375 AssertPtr(pGCPhysSpa);
4376 Assert(cbIova > 0);
4377 Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
4378
4379 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4380 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
4381 if (Ctrl.n.u1IommuEn)
4382 {
4383 IOMMUOP enmOp;
4384 uint8_t fPerm;
4385 iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, false /* fBulk */);
4386 LogFlowFunc(("%s: idDevice=%#x uIova=%#RX64 cb=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, uIova, cbIova));
4387
4388 int rc;
4389#ifdef IOMMU_WITH_IOTLBE_CACHE
4390 /* Lookup the IOVA from the cache. */
4391 rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
4392 if (rc == VINF_SUCCESS)
4393 {
4394 /* All pages in the access were found in the cache with sufficient permissions. */
4395 Assert(*pcbContiguous == cbIova);
4396 Assert(*pGCPhysSpa != NIL_RTGCPHYS);
4397 STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull);
4398 return VINF_SUCCESS;
4399 }
4400 if (rc != VERR_OUT_OF_RANGE)
4401 { /* likely */ }
4402 else
4403 {
4404 /* Access stopped since translations resulted in non-contiguous memory, let caller resume access. */
4405 Assert(*pcbContiguous > 0 && *pcbContiguous < cbIova);
4406 STAM_COUNTER_INC(&pThis->StatAccessCacheNonContig);
4407 return VINF_SUCCESS;
4408 }
4409
4410 /*
4411 * Access incomplete as not all pages were in the cache.
4412 * Or permissions were denied for the access (which typically doesn't happen)
4413 * so go through the slower path and raise the required event.
4414 */
4415 AssertMsg(*pcbContiguous < cbIova, ("Invalid size: cbContiguous=%zu cbIova=%zu\n", *pcbContiguous, cbIova));
4416 uIova += *pcbContiguous;
4417 cbIova -= *pcbContiguous;
4418 /* We currently are including any permission denied pages as cache misses too.*/
4419 STAM_COUNTER_INC(&pThis->StatAccessCacheMiss);
4420#endif
4421
4422 /* Lookup the IOVA from the device table. */
4423 rc = iommuAmdDteLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous);
4424 if (RT_SUCCESS(rc))
4425 { /* likely */ }
4426 else
4427 {
4428 Assert(rc != VERR_OUT_OF_RANGE);
4429 LogFunc(("DTE lookup failed! idDevice=%#x uIova=%#RX64 fPerm=%u cbIova=%zu rc=%#Rrc\n", idDevice, uIova, fPerm,
4430 cbIova, rc));
4431 }
4432
4433 return rc;
4434 }
4435
4436 /* Addresses are forwarded without translation when the IOMMU is disabled. */
4437 *pGCPhysSpa = uIova;
4438 *pcbContiguous = cbIova;
4439 return VINF_SUCCESS;
4440}
4441
4442
4443/**
4444 * Memory access bulk (one or more 4K pages) request from a device.
4445 *
4446 * @returns VBox status code.
4447 * @param pDevIns The IOMMU device instance.
4448 * @param idDevice The device ID (bus, device, function).
4449 * @param cIovas The number of addresses being accessed.
4450 * @param pauIovas The I/O virtual addresses for each page being accessed.
4451 * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX.
4452 * @param paGCPhysSpa Where to store the translated physical addresses.
4453 *
4454 * @thread Any.
4455 */
4456static DECLCALLBACK(int) iommuAmdMemBulkAccess(PPDMDEVINS pDevIns, uint16_t idDevice, size_t cIovas, uint64_t const *pauIovas,
4457 uint32_t fFlags, PRTGCPHYS paGCPhysSpa)
4458{
4459 /* Validate. */
4460 AssertPtr(pDevIns);
4461 Assert(cIovas > 0);
4462 AssertPtr(pauIovas);
4463 AssertPtr(paGCPhysSpa);
4464 Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK));
4465
4466 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4467 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
4468 if (Ctrl.n.u1IommuEn)
4469 {
4470 IOMMUOP enmOp;
4471 uint8_t fPerm;
4472 iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, true /* fBulk */);
4473 LogFlowFunc(("%s: idDevice=%#x cIovas=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, cIovas));
4474
4475 for (size_t i = 0; i < cIovas; i++)
4476 {
4477 int rc;
4478 size_t cbContig;
4479
4480#ifdef IOMMU_WITH_IOTLBE_CACHE
4481 /* Lookup the IOVA from the IOTLB cache. */
4482 rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i],
4483 &cbContig);
4484 if (rc == VINF_SUCCESS)
4485 {
4486 Assert(cbContig == X86_PAGE_SIZE);
4487 Assert(paGCPhysSpa[i] != NIL_RTGCPHYS);
4488 STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull);
4489 continue;
4490 }
4491 Assert(rc == VERR_NOT_FOUND || rc == VERR_IOMMU_ADDR_ACCESS_DENIED);
4492 STAM_COUNTER_INC(&pThis->StatAccessCacheMiss);
4493#endif
4494
4495 /* Lookup the IOVA from the device table. */
4496 rc = iommuAmdDteLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], &cbContig);
4497 if (RT_SUCCESS(rc))
4498 { /* likely */ }
4499 else
4500 {
4501 LogFunc(("Failed! idDevice=%#x uIova=%#RX64 fPerm=%u rc=%Rrc\n", idDevice, pauIovas[i], fPerm, rc));
4502 return rc;
4503 }
4504 Assert(cbContig == X86_PAGE_SIZE);
4505 }
4506 }
4507 else
4508 {
4509 /* Addresses are forwarded without translation when the IOMMU is disabled. */
4510 for (size_t i = 0; i < cIovas; i++)
4511 paGCPhysSpa[i] = pauIovas[i];
4512 }
4513
4514 return VINF_SUCCESS;
4515}
4516
4517
4518/**
4519 * Reads an interrupt remapping table entry from guest memory given its DTE.
4520 *
4521 * @returns VBox status code.
4522 * @param pDevIns The IOMMU device instance.
4523 * @param idDevice The device ID (bus, device, function).
4524 * @param pDte The device table entry.
4525 * @param GCPhysIn The source MSI address (used for reporting errors).
4526 * @param uDataIn The source MSI data.
4527 * @param enmOp The IOMMU operation being performed.
4528 * @param pIrte Where to store the interrupt remapping table entry.
4529 *
4530 * @thread Any.
4531 */
4532static int iommuAmdIrteRead(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, RTGCPHYS GCPhysIn, uint32_t uDataIn,
4533 IOMMUOP enmOp, PIRTE_T pIrte)
4534{
4535 /* Ensure the IRTE length is valid. */
4536 Assert(pDte->n.u4IntrTableLength < IOMMU_DTE_INTR_TAB_LEN_MAX);
4537
4538 RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK;
4539 uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte);
4540 uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uDataIn);
4541 RTGCPHYS const GCPhysIrte = GCPhysIntrTable + offIrte;
4542
4543 /* Ensure the IRTE falls completely within the interrupt table. */
4544 if (offIrte + sizeof(IRTE_T) <= cbIntrTable)
4545 { /* likely */ }
4546 else
4547 {
4548 LogFunc(("IRTE exceeds table length (GCPhysIntrTable=%#RGp cbIntrTable=%u offIrte=%#x uDataIn=%#x) -> IOPF\n",
4549 GCPhysIntrTable, cbIntrTable, offIrte, uDataIn));
4550
4551 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4552 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, GCPhysIn, false /* fPresent */, false /* fRsvdNotZero */,
4553 false /* fPermDenied */, enmOp, &EvtIoPageFault);
4554 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault,
4555 kIoPageFaultType_IrteAddrInvalid);
4556 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4557 }
4558
4559 /* Read the IRTE from memory. */
4560 Assert(!(GCPhysIrte & 3));
4561 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysIrte, pIrte, sizeof(*pIrte));
4562 if (RT_SUCCESS(rc))
4563 return VINF_SUCCESS;
4564
4565 /** @todo The IOMMU spec. does not tell what kind of error is reported in this
4566 * situation. Is it an I/O page fault or a device table hardware error?
4567 * There's no interrupt table hardware error event, but it's unclear what
4568 * we should do here. */
4569 LogFunc(("Failed to read interrupt table entry at %#RGp. rc=%Rrc -> ???\n", GCPhysIrte, rc));
4570 return VERR_IOMMU_IPE_4;
4571}
4572
4573
4574/**
4575 * Remaps the interrupt using the interrupt remapping table.
4576 *
4577 * @returns VBox status code.
4578 * @param pDevIns The IOMMU instance data.
4579 * @param idDevice The device ID (bus, device, function).
4580 * @param pDte The device table entry.
4581 * @param enmOp The IOMMU operation being performed.
4582 * @param pMsiIn The source MSI.
4583 * @param pMsiOut Where to store the remapped MSI.
4584 *
4585 * @thread Any.
4586 */
4587static int iommuAmdIntrRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, IOMMUOP enmOp, PCMSIMSG pMsiIn,
4588 PMSIMSG pMsiOut)
4589{
4590 Assert(pDte->n.u2IntrCtrl == IOMMU_INTR_CTRL_REMAP);
4591
4592 IRTE_T Irte;
4593 uint32_t const uMsiInData = pMsiIn->Data.u32;
4594 int rc = iommuAmdIrteRead(pDevIns, idDevice, pDte, pMsiIn->Addr.u64, uMsiInData, enmOp, &Irte);
4595 if (RT_SUCCESS(rc))
4596 {
4597 if (Irte.n.u1RemapEnable)
4598 {
4599 if (!Irte.n.u1GuestMode)
4600 {
4601 if (Irte.n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO)
4602 {
4603 iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, &Irte);
4604#ifdef IOMMU_WITH_IRTE_CACHE
4605 iommuAmdIrteCacheAdd(pDevIns, idDevice, IOMMU_GET_IRTE_OFF(uMsiInData), &Irte);
4606#endif
4607 return VINF_SUCCESS;
4608 }
4609
4610 LogFunc(("Interrupt type (%#x) invalid -> IOPF\n", Irte.n.u3IntrType));
4611 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4612 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4613 true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4614 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault,
4615 kIoPageFaultType_IrteRsvdIntType);
4616 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4617 }
4618
4619 LogFunc(("Guest mode not supported -> IOPF\n"));
4620 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4621 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4622 true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4623 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRsvdNotZero);
4624 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4625 }
4626
4627 LogFunc(("Remapping disabled -> IOPF\n"));
4628 EVT_IO_PAGE_FAULT_T EvtIoPageFault;
4629 iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable,
4630 false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault);
4631 iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRemapEn);
4632 return VERR_IOMMU_ADDR_TRANSLATION_FAILED;
4633 }
4634
4635 return rc;
4636}
4637
4638
4639/**
4640 * Looks up an MSI interrupt from the interrupt remapping table.
4641 *
4642 * @returns VBox status code.
4643 * @param pDevIns The IOMMU instance data.
4644 * @param idDevice The device ID (bus, device, function).
4645 * @param enmOp The IOMMU operation being performed.
4646 * @param pMsiIn The source MSI.
4647 * @param pMsiOut Where to store the remapped MSI.
4648 *
4649 * @thread Any.
4650 */
4651static int iommuAmdIntrTableLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
4652{
4653 LogFlowFunc(("idDevice=%#x (%#x:%#x:%#x) enmOp=%u\n", idDevice, ((idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK),
4654 ((idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK), (idDevice & VBOX_PCI_DEVFN_FUN_MASK),
4655 enmOp));
4656
4657 /* Read the device table entry from memory. */
4658 DTE_T Dte;
4659 int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte);
4660 if (RT_SUCCESS(rc))
4661 {
4662#ifdef IOMMU_WITH_IRTE_CACHE
4663 iommuAmdDteCacheAdd(pDevIns, idDevice, &Dte);
4664#endif
4665 /* If the DTE is not valid, all interrupts are forwarded without remapping. */
4666 if (Dte.n.u1IntrMapValid)
4667 {
4668 /* Validate bits 255:128 of the device table entry when DTE.IV is 1. */
4669 uint64_t const fRsvd0 = Dte.au64[2] & ~IOMMU_DTE_QWORD_2_VALID_MASK;
4670 uint64_t const fRsvd1 = Dte.au64[3] & ~IOMMU_DTE_QWORD_3_VALID_MASK;
4671 if (RT_LIKELY(!fRsvd0 && !fRsvd1))
4672 { /* likely */ }
4673 else
4674 {
4675 LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1));
4676 EVT_ILLEGAL_DTE_T Event;
4677 iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
4678 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero);
4679 return VERR_IOMMU_INTR_REMAP_FAILED;
4680 }
4681
4682 /*
4683 * LINT0/LINT1 pins cannot be driven by PCI(e) devices. Perhaps for a Southbridge
4684 * that's connected through HyperTransport it might be possible; but for us, it
4685 * doesn't seem we need to specially handle these pins.
4686 */
4687
4688 /*
4689 * Validate the MSI source address.
4690 *
4691 * 64-bit MSIs are supported by the PCI and AMD IOMMU spec. However as far as the
4692 * CPU is concerned, the MSI region is fixed and we must ensure no other device
4693 * claims the region as I/O space.
4694 *
4695 * See PCI spec. 6.1.4. "Message Signaled Interrupt (MSI) Support".
4696 * See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support".
4697 * See Intel spec. 10.11.1 "Message Address Register Format".
4698 */
4699 if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE)
4700 {
4701 /*
4702 * The IOMMU remaps fixed and arbitrated interrupts using the IRTE.
4703 * See AMD IOMMU spec. "2.2.5.1 Interrupt Remapping Tables, Guest Virtual APIC Not Enabled".
4704 */
4705 uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode;
4706 bool fPassThru = false;
4707 switch (u8DeliveryMode)
4708 {
4709 case VBOX_MSI_DELIVERY_MODE_FIXED:
4710 case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO:
4711 {
4712 uint8_t const uIntrCtrl = Dte.n.u2IntrCtrl;
4713 if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP)
4714 {
4715 /* Validate the encoded interrupt table length when IntCtl specifies remapping. */
4716 uint8_t const uIntrTabLen = Dte.n.u4IntrTableLength;
4717 if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX)
4718 {
4719 /*
4720 * We don't support guest interrupt remapping yet. When we do, we'll need to
4721 * check Ctrl.u1GstVirtApicEn and use the guest Virtual APIC Table Root Pointer
4722 * in the DTE rather than the Interrupt Root Table Pointer. Since the caller
4723 * already reads the control register, add that as a parameter when we eventually
4724 * support guest interrupt remapping. For now, just assert.
4725 */
4726 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4727 Assert(!pThis->ExtFeat.n.u1GstVirtApicSup);
4728 NOREF(pThis);
4729
4730 return iommuAmdIntrRemap(pDevIns, idDevice, &Dte, enmOp, pMsiIn, pMsiOut);
4731 }
4732
4733 LogFunc(("Invalid interrupt table length %#x -> Illegal DTE\n", uIntrTabLen));
4734 EVT_ILLEGAL_DTE_T Event;
4735 iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, false /* fRsvdNotZero */, enmOp, &Event);
4736 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntTabLen);
4737 return VERR_IOMMU_INTR_REMAP_FAILED;
4738 }
4739
4740 if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED)
4741 {
4742 fPassThru = true;
4743 break;
4744 }
4745
4746 if (uIntrCtrl == IOMMU_INTR_CTRL_TARGET_ABORT)
4747 {
4748 LogRelMax(10, ("%s: Remapping disallowed for fixed/arbitrated interrupt %#x -> Target abort\n",
4749 IOMMU_LOG_PFX, pMsiIn->Data.n.u8Vector));
4750 iommuAmdSetPciTargetAbort(pDevIns);
4751 return VERR_IOMMU_INTR_REMAP_DENIED;
4752 }
4753
4754 Assert(uIntrCtrl == IOMMU_INTR_CTRL_RSVD); /* Paranoia. */
4755 LogRelMax(10, ("%s: IntCtl mode invalid %#x -> Illegal DTE\n", IOMMU_LOG_PFX, uIntrCtrl));
4756 EVT_ILLEGAL_DTE_T Event;
4757 iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event);
4758 iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntCtl);
4759 return VERR_IOMMU_INTR_REMAP_FAILED;
4760 }
4761
4762 /* SMIs are passed through unmapped. We don't implement SMI filters. */
4763 case VBOX_MSI_DELIVERY_MODE_SMI: fPassThru = true; break;
4764 case VBOX_MSI_DELIVERY_MODE_NMI: fPassThru = Dte.n.u1NmiPassthru; break;
4765 case VBOX_MSI_DELIVERY_MODE_INIT: fPassThru = Dte.n.u1InitPassthru; break;
4766 case VBOX_MSI_DELIVERY_MODE_EXT_INT: fPassThru = Dte.n.u1ExtIntPassthru; break;
4767 default:
4768 {
4769 LogRelMax(10, ("%s: MSI data delivery mode invalid %#x -> Target abort\n", IOMMU_LOG_PFX,
4770 u8DeliveryMode));
4771 iommuAmdSetPciTargetAbort(pDevIns);
4772 return VERR_IOMMU_INTR_REMAP_FAILED;
4773 }
4774 }
4775
4776 /*
4777 * For those other than fixed and arbitrated interrupts, destination mode must be 0 (physical).
4778 * See AMD IOMMU spec. The note below Table 19: "IOMMU Controls and Actions for Upstream Interrupts".
4779 */
4780 if ( u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO
4781 || !pMsiIn->Addr.n.u1DestMode)
4782 {
4783 if (fPassThru)
4784 {
4785 *pMsiOut = *pMsiIn;
4786 return VINF_SUCCESS;
4787 }
4788 LogRelMax(10, ("%s: Remapping/passthru disallowed for interrupt %#x -> Target abort\n", IOMMU_LOG_PFX,
4789 pMsiIn->Data.n.u8Vector));
4790 }
4791 else
4792 LogRelMax(10, ("%s: Logical destination mode invalid for delivery mode %#x\n -> Target abort\n",
4793 IOMMU_LOG_PFX, u8DeliveryMode));
4794
4795 iommuAmdSetPciTargetAbort(pDevIns);
4796 return VERR_IOMMU_INTR_REMAP_DENIED;
4797 }
4798 else
4799 {
4800 /** @todo should be cause a PCI target abort here? */
4801 LogRelMax(10, ("%s: MSI address region invalid %#RX64\n", IOMMU_LOG_PFX, pMsiIn->Addr.u64));
4802 return VERR_IOMMU_INTR_REMAP_FAILED;
4803 }
4804 }
4805 else
4806 {
4807 LogFlowFunc(("DTE interrupt map not valid\n"));
4808 *pMsiOut = *pMsiIn;
4809 return VINF_SUCCESS;
4810 }
4811 }
4812
4813 LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc));
4814 return VERR_IOMMU_INTR_REMAP_FAILED;
4815}
4816
4817
4818/**
4819 * Interrupt remap request from a device.
4820 *
4821 * @returns VBox status code.
4822 * @param pDevIns The IOMMU device instance.
4823 * @param idDevice The device ID (bus, device, function).
4824 * @param pMsiIn The source MSI.
4825 * @param pMsiOut Where to store the remapped MSI.
4826 */
4827static DECLCALLBACK(int) iommuAmdMsiRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCMSIMSG pMsiIn, PMSIMSG pMsiOut)
4828{
4829 /* Validate. */
4830 Assert(pDevIns);
4831 Assert(pMsiIn);
4832 Assert(pMsiOut);
4833
4834 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4835
4836 /* If this MSI was generated by the IOMMU itself, it's not subject to remapping, see @bugref{9654#c104}. */
4837 if (idDevice == pThis->uPciAddress)
4838 return VERR_IOMMU_CANNOT_CALL_SELF;
4839
4840 /* Interrupts are forwarded with remapping when the IOMMU is disabled. */
4841 IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis);
4842 if (Ctrl.n.u1IommuEn)
4843 {
4844 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemap));
4845
4846 int rc;
4847#ifdef IOMMU_WITH_IRTE_CACHE
4848 STAM_PROFILE_ADV_START(&pThis->StatProfIrteCacheLookup, a);
4849 rc = iommuAmdIrteCacheLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
4850 STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteCacheLookup, a);
4851 if (RT_SUCCESS(rc))
4852 {
4853 STAM_COUNTER_INC(&pThis->StatIntrCacheHit);
4854 return VINF_SUCCESS;
4855 }
4856 STAM_COUNTER_INC(&pThis->StatIntrCacheMiss);
4857#endif
4858
4859 STAM_PROFILE_ADV_START(&pThis->StatProfIrteLookup, a);
4860 rc = iommuAmdIntrTableLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut);
4861 STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteLookup, a);
4862 return rc;
4863 }
4864
4865 *pMsiOut = *pMsiIn;
4866 return VINF_SUCCESS;
4867}
4868
4869
4870/**
4871 * @callback_method_impl{FNIOMMMIONEWWRITE}
4872 */
4873static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb)
4874{
4875 NOREF(pvUser);
4876 Assert(cb == 4 || cb == 8);
4877 Assert(!(off & (cb - 1)));
4878
4879 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4880 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioWrite)); NOREF(pThis);
4881
4882 uint64_t const uValue = cb == 8 ? *(uint64_t const *)pv : *(uint32_t const *)pv;
4883 return iommuAmdRegisterWrite(pDevIns, off, cb, uValue);
4884}
4885
4886
4887/**
4888 * @callback_method_impl{FNIOMMMIONEWREAD}
4889 */
4890static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioRead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb)
4891{
4892 NOREF(pvUser);
4893 Assert(cb == 4 || cb == 8);
4894 Assert(!(off & (cb - 1)));
4895
4896 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4897 STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioRead)); NOREF(pThis);
4898
4899 uint64_t uResult;
4900 VBOXSTRICTRC rcStrict = iommuAmdRegisterRead(pDevIns, off, &uResult);
4901 if (rcStrict == VINF_SUCCESS)
4902 {
4903 if (cb == 8)
4904 *(uint64_t *)pv = uResult;
4905 else
4906 *(uint32_t *)pv = (uint32_t)uResult;
4907 }
4908
4909 return rcStrict;
4910}
4911
4912
4913#ifdef IN_RING3
4914/**
4915 * Processes an IOMMU command.
4916 *
4917 * @returns VBox status code.
4918 * @param pDevIns The IOMMU device instance.
4919 * @param pCmd The command to process.
4920 * @param GCPhysCmd The system physical address of the command.
4921 * @param pEvtError Where to store the error event in case of failures.
4922 *
4923 * @thread Command thread.
4924 */
4925static int iommuAmdR3CmdProcess(PPDMDEVINS pDevIns, PCCMD_GENERIC_T pCmd, RTGCPHYS GCPhysCmd, PEVT_GENERIC_T pEvtError)
4926{
4927 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
4928 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
4929
4930 STAM_COUNTER_INC(&pThis->StatCmd);
4931
4932 uint8_t const bCmd = pCmd->n.u4Opcode;
4933 switch (bCmd)
4934 {
4935 case IOMMU_CMD_COMPLETION_WAIT:
4936 {
4937 STAM_COUNTER_INC(&pThis->StatCmdCompWait);
4938
4939 PCCMD_COMWAIT_T pCmdComWait = (PCCMD_COMWAIT_T)pCmd;
4940 AssertCompile(sizeof(*pCmdComWait) == sizeof(*pCmd));
4941
4942 /* Validate reserved bits in the command. */
4943 if (!(pCmdComWait->au64[0] & ~IOMMU_CMD_COM_WAIT_QWORD_0_VALID_MASK))
4944 {
4945 /* If Completion Store is requested, write the StoreData to the specified address. */
4946 if (pCmdComWait->n.u1Store)
4947 {
4948 RTGCPHYS const GCPhysStore = RT_MAKE_U64(pCmdComWait->n.u29StoreAddrLo << 3, pCmdComWait->n.u20StoreAddrHi);
4949 uint64_t const u64Data = pCmdComWait->n.u64StoreData;
4950 int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysStore, &u64Data, sizeof(u64Data));
4951 if (RT_FAILURE(rc))
4952 {
4953 LogFunc(("Cmd(%#x): Failed to write StoreData (%#RX64) to %#RGp, rc=%Rrc\n", bCmd, u64Data,
4954 GCPhysStore, rc));
4955 iommuAmdCmdHwErrorEventInit(GCPhysStore, (PEVT_CMD_HW_ERR_T)pEvtError);
4956 return VERR_IOMMU_CMD_HW_ERROR;
4957 }
4958 }
4959
4960 /* If the command requests an interrupt and completion wait interrupts are enabled, raise it. */
4961 if (pCmdComWait->n.u1Interrupt)
4962 {
4963 IOMMU_LOCK(pDevIns, pThisR3);
4964 ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_COMPLETION_WAIT_INTR);
4965 bool const fRaiseInt = pThis->Ctrl.n.u1CompWaitIntrEn;
4966 IOMMU_UNLOCK(pDevIns, pThisR3);
4967 if (fRaiseInt)
4968 iommuAmdMsiInterruptRaise(pDevIns);
4969 }
4970 return VINF_SUCCESS;
4971 }
4972 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4973 return VERR_IOMMU_CMD_INVALID_FORMAT;
4974 }
4975
4976 case IOMMU_CMD_INV_DEV_TAB_ENTRY:
4977 {
4978 STAM_COUNTER_INC(&pThis->StatCmdInvDte);
4979 PCCMD_INV_DTE_T pCmdInvDte = (PCCMD_INV_DTE_T)pCmd;
4980 AssertCompile(sizeof(*pCmdInvDte) == sizeof(*pCmd));
4981
4982 /* Validate reserved bits in the command. */
4983 if ( !(pCmdInvDte->au64[0] & ~IOMMU_CMD_INV_DTE_QWORD_0_VALID_MASK)
4984 && !(pCmdInvDte->au64[1] & ~IOMMU_CMD_INV_DTE_QWORD_1_VALID_MASK))
4985 {
4986#ifdef IOMMU_WITH_DTE_CACHE
4987 iommuAmdDteCacheRemove(pDevIns, pCmdInvDte->n.u16DevId);
4988#endif
4989 return VINF_SUCCESS;
4990 }
4991 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
4992 return VERR_IOMMU_CMD_INVALID_FORMAT;
4993 }
4994
4995 case IOMMU_CMD_INV_IOMMU_PAGES:
4996 {
4997 STAM_COUNTER_INC(&pThis->StatCmdInvIommuPages);
4998 PCCMD_INV_IOMMU_PAGES_T pCmdInvPages = (PCCMD_INV_IOMMU_PAGES_T)pCmd;
4999 AssertCompile(sizeof(*pCmdInvPages) == sizeof(*pCmd));
5000
5001 /* Validate reserved bits in the command. */
5002 if ( !(pCmdInvPages->au64[0] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_0_VALID_MASK)
5003 && !(pCmdInvPages->au64[1] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_1_VALID_MASK))
5004 {
5005#ifdef IOMMU_WITH_IOTLBE_CACHE
5006 uint64_t const uIova = RT_MAKE_U64(pCmdInvPages->n.u20AddrLo << X86_PAGE_4K_SHIFT, pCmdInvPages->n.u32AddrHi);
5007 uint16_t const idDomain = pCmdInvPages->n.u16DomainId;
5008 uint8_t cShift;
5009 if (!pCmdInvPages->n.u1Size)
5010 cShift = X86_PAGE_4K_SHIFT;
5011 else
5012 {
5013 /* Find the first clear bit starting from bit 12 to 64 of the I/O virtual address. */
5014 unsigned const uFirstZeroBit = ASMBitLastSetU64(~(uIova >> X86_PAGE_4K_SHIFT));
5015 cShift = X86_PAGE_4K_SHIFT + uFirstZeroBit;
5016
5017 /*
5018 * For the address 0x7ffffffffffff000, cShift would be 76 (12+64) and the code below
5019 * would do the right thing by clearing the entire cache for the specified domain ID.
5020 *
5021 * However, for the address 0xfffffffffffff000, cShift would be computed as 12.
5022 * IOMMU behavior is undefined in this case, so it's safe to invalidate just one page.
5023 * A debug-time assert is in place here to let us know if any software tries this.
5024 *
5025 * See AMD IOMMU spec. 2.4.3 "INVALIDATE_IOMMU_PAGES".
5026 * See AMD IOMMU spec. Table 14: "Example Page Size Encodings".
5027 */
5028 Assert(uIova != UINT64_C(0xfffffffffffff000));
5029 }
5030
5031 /*
5032 * Validate invalidation size.
5033 * See AMD IOMMU spec. 2.2.3 "I/O Page Tables for Host Translations".
5034 */
5035 if ( cShift >= 12 /* 4 KB */
5036 && cShift <= 51 /* 2 PB */)
5037 {
5038 /* Remove the range of I/O virtual addresses requesting to be invalidated. */
5039 size_t const cbIova = RT_BIT_64(cShift);
5040 iommuAmdIotlbRemoveRange(pDevIns, idDomain, uIova, cbIova);
5041 }
5042 else
5043 {
5044 /*
5045 * The guest provided size is invalid or exceeds the largest, meaningful page size.
5046 * In such situations we must remove all ranges for the specified domain ID.
5047 */
5048 iommuAmdIotlbRemoveDomainId(pDevIns, idDomain);
5049 }
5050#endif
5051 return VINF_SUCCESS;
5052 }
5053 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5054 return VERR_IOMMU_CMD_INVALID_FORMAT;
5055 }
5056
5057 case IOMMU_CMD_INV_IOTLB_PAGES:
5058 {
5059 STAM_COUNTER_INC(&pThis->StatCmdInvIotlbPages);
5060
5061 uint32_t const uCapHdr = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_CAP_HDR);
5062 if (RT_BF_GET(uCapHdr, IOMMU_BF_CAPHDR_IOTLB_SUP))
5063 {
5064 /** @todo IOMMU: Implement remote IOTLB invalidation. */
5065 return VERR_NOT_IMPLEMENTED;
5066 }
5067 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5068 return VERR_IOMMU_CMD_NOT_SUPPORTED;
5069 }
5070
5071 case IOMMU_CMD_INV_INTR_TABLE:
5072 {
5073 STAM_COUNTER_INC(&pThis->StatCmdInvIntrTable);
5074
5075 PCCMD_INV_INTR_TABLE_T pCmdInvIntrTable = (PCCMD_INV_INTR_TABLE_T)pCmd;
5076 AssertCompile(sizeof(*pCmdInvIntrTable) == sizeof(*pCmd));
5077
5078 /* Validate reserved bits in the command. */
5079 if ( !(pCmdInvIntrTable->au64[0] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_0_VALID_MASK)
5080 && !(pCmdInvIntrTable->au64[1] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_1_VALID_MASK))
5081 {
5082#ifdef IOMMU_WITH_IRTE_CACHE
5083 iommuAmdIrteCacheRemove(pDevIns, pCmdInvIntrTable->u.u16DevId);
5084#endif
5085 return VINF_SUCCESS;
5086 }
5087 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5088 return VERR_IOMMU_CMD_INVALID_FORMAT;
5089 }
5090
5091 case IOMMU_CMD_PREFETCH_IOMMU_PAGES:
5092 {
5093 /* Linux doesn't use prefetching of IOMMU pages, so we don't bother for now. */
5094 STAM_COUNTER_INC(&pThis->StatCmdPrefIommuPages);
5095 Assert(!pThis->ExtFeat.n.u1PrefetchSup);
5096 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5097 return VERR_IOMMU_CMD_NOT_SUPPORTED;
5098 }
5099
5100 case IOMMU_CMD_COMPLETE_PPR_REQ:
5101 {
5102 STAM_COUNTER_INC(&pThis->StatCmdCompletePprReq);
5103
5104 /* We don't support PPR requests yet. */
5105 Assert(!pThis->ExtFeat.n.u1PprSup);
5106 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5107 return VERR_IOMMU_CMD_NOT_SUPPORTED;
5108 }
5109
5110 case IOMMU_CMD_INV_IOMMU_ALL:
5111 {
5112 STAM_COUNTER_INC(&pThis->StatCmdInvIommuAll);
5113 if (pThis->ExtFeat.n.u1InvAllSup)
5114 {
5115 PCCMD_INV_IOMMU_ALL_T pCmdInvAll = (PCCMD_INV_IOMMU_ALL_T)pCmd;
5116 AssertCompile(sizeof(*pCmdInvAll) == sizeof(*pCmd));
5117
5118 /* Validate reserved bits in the command. */
5119 if ( !(pCmdInvAll->au64[0] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_0_VALID_MASK)
5120 && !(pCmdInvAll->au64[1] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_1_VALID_MASK))
5121 {
5122#ifdef IOMMU_WITH_DTE_CACHE
5123 iommuAmdDteCacheRemoveAll(pDevIns);
5124#endif
5125#ifdef IOMMU_WITH_IOTLBE_CACHE
5126 iommuAmdIotlbRemoveAll(pDevIns);
5127#endif
5128 return VINF_SUCCESS;
5129 }
5130 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5131 return VERR_IOMMU_CMD_INVALID_FORMAT;
5132 }
5133 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5134 return VERR_IOMMU_CMD_NOT_SUPPORTED;
5135 }
5136 }
5137
5138 STAM_COUNTER_DEC(&pThis->StatCmd);
5139 LogFunc(("Cmd(%#x): Unrecognized\n", bCmd));
5140 iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError);
5141 return VERR_IOMMU_CMD_NOT_SUPPORTED;
5142}
5143
5144
5145/**
5146 * The IOMMU command thread.
5147 *
5148 * @returns VBox status code.
5149 * @param pDevIns The IOMMU device instance.
5150 * @param pThread The command thread.
5151 */
5152static DECLCALLBACK(int) iommuAmdR3CmdThread(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
5153{
5154 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5155 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
5156
5157 if (pThread->enmState == PDMTHREADSTATE_INITIALIZING)
5158 return VINF_SUCCESS;
5159
5160 /*
5161 * Pre-allocate the maximum command buffer size supported by the IOMMU.
5162 * This avoid trashing the heap as well as not wasting time allocating
5163 * and freeing buffers while processing commands.
5164 */
5165 size_t const cbMaxCmdBuf = sizeof(CMD_GENERIC_T) * iommuAmdGetBufMaxEntries(15);
5166 void *pvCmds = RTMemAllocZ(cbMaxCmdBuf);
5167 AssertPtrReturn(pvCmds, VERR_NO_MEMORY);
5168
5169 while (pThread->enmState == PDMTHREADSTATE_RUNNING)
5170 {
5171 /*
5172 * Sleep perpetually until we are woken up to process commands.
5173 */
5174 bool const fSignaled = ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, false);
5175 if (!fSignaled)
5176 {
5177 int rc = PDMDevHlpSUPSemEventWaitNoResume(pDevIns, pThis->hEvtCmdThread, RT_INDEFINITE_WAIT);
5178 AssertLogRelMsgReturnStmt(RT_SUCCESS(rc) || rc == VERR_INTERRUPTED, ("%Rrc\n", rc), RTMemFree(pvCmds), rc);
5179 if (RT_UNLIKELY(pThread->enmState != PDMTHREADSTATE_RUNNING))
5180 break;
5181 Log4Func(("Woken up with rc=%Rrc\n", rc));
5182 ASMAtomicWriteBool(&pThis->fCmdThreadSignaled, false);
5183 }
5184
5185 /*
5186 * Fetch and process IOMMU commands.
5187 */
5188 /** @todo r=ramshankar: We currently copy all commands from guest memory into a
5189 * temporary host buffer before processing them as a batch. If we want to
5190 * save on host memory a bit, we could (once PGM has the necessary APIs)
5191 * lock the page mappings page mappings and access them directly. */
5192 IOMMU_LOCK(pDevIns, pThisR3);
5193
5194 if (pThis->Status.n.u1CmdBufRunning)
5195 {
5196 /* Get the offsets we need to read commands from memory (circular buffer offset). */
5197 uint32_t const cbCmdBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len);
5198 uint32_t const offTail = pThis->CmdBufTailPtr.n.off;
5199 uint32_t offHead = pThis->CmdBufHeadPtr.n.off;
5200
5201 /* Validate. */
5202 Assert(!(offHead & ~IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK));
5203 Assert(offHead < cbCmdBuf);
5204 Assert(cbCmdBuf <= cbMaxCmdBuf);
5205
5206 if (offHead != offTail)
5207 {
5208 /* Read the entire command buffer from memory (avoids multiple PGM calls). */
5209 RTGCPHYS const GCPhysCmdBufBase = pThis->CmdBufBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT;
5210
5211 IOMMU_UNLOCK(pDevIns, pThisR3);
5212 int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysCmdBufBase, pvCmds, cbCmdBuf);
5213 IOMMU_LOCK(pDevIns, pThisR3);
5214
5215 if (RT_SUCCESS(rc))
5216 {
5217 /* Indicate to software we've fetched all commands from the buffer. */
5218 pThis->CmdBufHeadPtr.n.off = offTail;
5219
5220 /* Allow IOMMU to do other work while we process commands. */
5221 IOMMU_UNLOCK(pDevIns, pThisR3);
5222
5223 /* Process the fetched commands. */
5224 EVT_GENERIC_T EvtError;
5225 do
5226 {
5227 PCCMD_GENERIC_T pCmd = (PCCMD_GENERIC_T)((uintptr_t)pvCmds + offHead);
5228 rc = iommuAmdR3CmdProcess(pDevIns, pCmd, GCPhysCmdBufBase + offHead, &EvtError);
5229 if (RT_FAILURE(rc))
5230 {
5231 if ( rc == VERR_IOMMU_CMD_NOT_SUPPORTED
5232 || rc == VERR_IOMMU_CMD_INVALID_FORMAT)
5233 {
5234 Assert(EvtError.n.u4EvtCode == IOMMU_EVT_ILLEGAL_CMD_ERROR);
5235 iommuAmdIllegalCmdEventRaise(pDevIns, (PCEVT_ILLEGAL_CMD_ERR_T)&EvtError);
5236 }
5237 else if (rc == VERR_IOMMU_CMD_HW_ERROR)
5238 {
5239 Assert(EvtError.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR);
5240 LogFunc(("Raising command hardware error. Cmd=%#x -> COMMAND_HW_ERROR\n", pCmd->n.u4Opcode));
5241 iommuAmdCmdHwErrorEventRaise(pDevIns, (PCEVT_CMD_HW_ERR_T)&EvtError);
5242 }
5243 break;
5244 }
5245
5246 /* Move to the next command in the circular buffer. */
5247 offHead = (offHead + sizeof(CMD_GENERIC_T)) % cbCmdBuf;
5248 } while (offHead != offTail);
5249 }
5250 else
5251 {
5252 LogFunc(("Failed to read command at %#RGp. rc=%Rrc -> COMMAND_HW_ERROR\n", GCPhysCmdBufBase, rc));
5253 EVT_CMD_HW_ERR_T EvtCmdHwErr;
5254 iommuAmdCmdHwErrorEventInit(GCPhysCmdBufBase, &EvtCmdHwErr);
5255 iommuAmdCmdHwErrorEventRaise(pDevIns, &EvtCmdHwErr);
5256
5257 IOMMU_UNLOCK(pDevIns, pThisR3);
5258 }
5259 }
5260 else
5261 IOMMU_UNLOCK(pDevIns, pThisR3);
5262 }
5263 else
5264 IOMMU_UNLOCK(pDevIns, pThisR3);
5265 }
5266
5267 RTMemFree(pvCmds);
5268 LogFlowFunc(("Command thread terminating\n"));
5269 return VINF_SUCCESS;
5270}
5271
5272
5273/**
5274 * Wakes up the command thread so it can respond to a state change.
5275 *
5276 * @returns VBox status code.
5277 * @param pDevIns The IOMMU device instance.
5278 * @param pThread The command thread.
5279 */
5280static DECLCALLBACK(int) iommuAmdR3CmdThreadWakeUp(PPDMDEVINS pDevIns, PPDMTHREAD pThread)
5281{
5282 RT_NOREF(pThread);
5283 Log4Func(("\n"));
5284 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5285 return PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread);
5286}
5287
5288
5289/**
5290 * @callback_method_impl{FNPCICONFIGREAD}
5291 */
5292static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
5293 unsigned cb, uint32_t *pu32Value)
5294{
5295 /** @todo IOMMU: PCI config read stat counter. */
5296 VBOXSTRICTRC rcStrict = PDMDevHlpPCIConfigRead(pDevIns, pPciDev, uAddress, cb, pu32Value);
5297 Log3Func(("uAddress=%#x (cb=%u) -> %#x. rc=%Rrc\n", uAddress, cb, *pu32Value, VBOXSTRICTRC_VAL(rcStrict)));
5298 return rcStrict;
5299}
5300
5301
5302/**
5303 * Sets up the IOMMU MMIO region (usually in response to an IOMMU base address
5304 * register write).
5305 *
5306 * @returns VBox status code.
5307 * @param pDevIns The IOMMU instance data.
5308 *
5309 * @remarks Call this function only when the IOMMU BAR is enabled.
5310 */
5311static int iommuAmdR3MmioSetup(PPDMDEVINS pDevIns)
5312{
5313 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5314 Assert(pThis->IommuBar.n.u1Enable);
5315 Assert(pThis->hMmio != NIL_IOMMMIOHANDLE); /* Paranoia. Ensure we have a valid IOM MMIO handle. */
5316 Assert(!pThis->ExtFeat.n.u1PerfCounterSup); /* Base is 16K aligned when performance counters aren't supported. */
5317 RTGCPHYS const GCPhysMmioBase = RT_MAKE_U64(pThis->IommuBar.au32[0] & 0xffffc000, pThis->IommuBar.au32[1]);
5318 RTGCPHYS const GCPhysMmioBasePrev = PDMDevHlpMmioGetMappingAddress(pDevIns, pThis->hMmio);
5319
5320 /* If the MMIO region is already mapped at the specified address, we're done. */
5321 Assert(GCPhysMmioBase != NIL_RTGCPHYS);
5322 if (GCPhysMmioBasePrev == GCPhysMmioBase)
5323 return VINF_SUCCESS;
5324
5325 /* Unmap the previous MMIO region (which is at a different address). */
5326 if (GCPhysMmioBasePrev != NIL_RTGCPHYS)
5327 {
5328 LogFlowFunc(("Unmapping previous MMIO region at %#RGp\n", GCPhysMmioBasePrev));
5329 int rc = PDMDevHlpMmioUnmap(pDevIns, pThis->hMmio);
5330 if (RT_FAILURE(rc))
5331 {
5332 LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBasePrev, rc));
5333 return rc;
5334 }
5335 }
5336
5337 /* Map the newly specified MMIO region. */
5338 LogFlowFunc(("Mapping MMIO region at %#RGp\n", GCPhysMmioBase));
5339 int rc = PDMDevHlpMmioMap(pDevIns, pThis->hMmio, GCPhysMmioBase);
5340 if (RT_FAILURE(rc))
5341 {
5342 LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBase, rc));
5343 return rc;
5344 }
5345
5346 return VINF_SUCCESS;
5347}
5348
5349
5350/**
5351 * @callback_method_impl{FNPCICONFIGWRITE}
5352 */
5353static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress,
5354 unsigned cb, uint32_t u32Value)
5355{
5356 PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5357
5358 /*
5359 * Discard writes to read-only registers that are specific to the IOMMU.
5360 * Other common PCI registers are handled by the generic code, see devpciR3IsConfigByteWritable().
5361 * See PCI spec. 6.1. "Configuration Space Organization".
5362 */
5363 switch (uAddress)
5364 {
5365 case IOMMU_PCI_OFF_CAP_HDR: /* All bits are read-only. */
5366 case IOMMU_PCI_OFF_RANGE_REG: /* We don't have any devices integrated with the IOMMU. */
5367 case IOMMU_PCI_OFF_MISCINFO_REG_0: /* We don't support MSI-X. */
5368 case IOMMU_PCI_OFF_MISCINFO_REG_1: /* We don't support guest-address translation. */
5369 {
5370 LogFunc(("PCI config write (%#RX32) to read-only register %#x -> Ignored\n", u32Value, uAddress));
5371 return VINF_SUCCESS;
5372 }
5373 }
5374
5375 PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3);
5376 IOMMU_LOCK(pDevIns, pThisR3);
5377
5378 VBOXSTRICTRC rcStrict;
5379 switch (uAddress)
5380 {
5381 case IOMMU_PCI_OFF_BASE_ADDR_REG_LO:
5382 {
5383 if (!pThis->IommuBar.n.u1Enable)
5384 {
5385 pThis->IommuBar.au32[0] = u32Value & IOMMU_BAR_VALID_MASK;
5386 if (pThis->IommuBar.n.u1Enable)
5387 rcStrict = iommuAmdR3MmioSetup(pDevIns);
5388 else
5389 rcStrict = VINF_SUCCESS;
5390 }
5391 else
5392 {
5393 LogFunc(("Writing Base Address (Lo) when it's already enabled -> Ignored\n"));
5394 rcStrict = VINF_SUCCESS;
5395 }
5396 break;
5397 }
5398
5399 case IOMMU_PCI_OFF_BASE_ADDR_REG_HI:
5400 {
5401 if (!pThis->IommuBar.n.u1Enable)
5402 {
5403 AssertCompile((IOMMU_BAR_VALID_MASK >> 32) == 0xffffffff);
5404 pThis->IommuBar.au32[1] = u32Value;
5405 }
5406 else
5407 LogFunc(("Writing Base Address (Hi) when it's already enabled -> Ignored\n"));
5408 rcStrict = VINF_SUCCESS;
5409 break;
5410 }
5411
5412 case IOMMU_PCI_OFF_MSI_CAP_HDR:
5413 {
5414 u32Value |= RT_BIT(23); /* 64-bit MSI addressess must always be enabled for IOMMU. */
5415 RT_FALL_THRU();
5416 }
5417 default:
5418 {
5419 rcStrict = PDMDevHlpPCIConfigWrite(pDevIns, pPciDev, uAddress, cb, u32Value);
5420 break;
5421 }
5422 }
5423
5424 IOMMU_UNLOCK(pDevIns, pThisR3);
5425
5426 Log3Func(("uAddress=%#x (cb=%u) with %#x. rc=%Rrc\n", uAddress, cb, u32Value, VBOXSTRICTRC_VAL(rcStrict)));
5427 return rcStrict;
5428}
5429
5430
5431/**
5432 * @callback_method_impl{FNDBGFHANDLERDEV}
5433 */
5434static DECLCALLBACK(void) iommuAmdR3DbgInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs)
5435{
5436 PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU);
5437 PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0];
5438 PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev);
5439
5440 bool const fVerbose = RTStrCmp(pszArgs, "verbose") == 0;
5441
5442 pHlp->pfnPrintf(pHlp, "AMD-IOMMU:\n");
5443 /* Device Table Base Addresses (all segments). */
5444 for (unsigned i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++)
5445 {
5446 DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i];
5447 pHlp->pfnPrintf(pHlp, " Device Table BAR %u = %#RX64\n", i, DevTabBar.u64);
5448 if (fVerbose)
5449 {
5450 pHlp->pfnPrintf(pHlp, " Size = %#x (%u bytes)\n", DevTabBar.n.u9Size,
5451 IOMMU_GET_DEV_TAB_LEN(&DevTabBar));
5452 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
5453 DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT);
5454 }
5455 }
5456 /* Command Buffer Base Address Register. */
5457 {
5458 CMD_BUF_BAR_T const CmdBufBar = pThis->CmdBufBaseAddr;
5459 uint8_t const uEncodedLen = CmdBufBar.n.u4Len;
5460 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5461 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5462 pHlp->pfnPrintf(pHlp, " Command Buffer BAR = %#RX64\n", CmdBufBar.u64);
5463 if (fVerbose)
5464 {
5465 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
5466 CmdBufBar.n.u40Base << X86_PAGE_4K_SHIFT);
5467 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5468 cEntries, cbBuffer);
5469 }
5470 }
5471 /* Event Log Base Address Register. */
5472 {
5473 EVT_LOG_BAR_T const EvtLogBar = pThis->EvtLogBaseAddr;
5474 uint8_t const uEncodedLen = EvtLogBar.n.u4Len;
5475 uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen);
5476 uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen);
5477 pHlp->pfnPrintf(pHlp, " Event Log BAR = %#RX64\n", EvtLogBar.u64);
5478 if (fVerbose)
5479 {
5480 pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n",
5481 EvtLogBar.n.u40Base << X86_PAGE_4K_SHIFT);
5482 pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen,
5483 cEntries, cbBuffer);
5484 }
5485 }
5486 /* IOMMU Control Register. */
5487 {
5488 IOMMU_CTRL_T const Ctrl = pThis->Ctrl;
5489 pHlp->pfnPrintf(pHlp, " Control = %#RX64\n", Ctrl.u64);
5490 if (fVerbose)
5491 {
5492 pHlp->pfnPrintf(pHlp, " IOMMU enable = %RTbool\n", Ctrl.n.u1IommuEn);
5493 pHlp->pfnPrintf(pHlp, " HT Tunnel translation enable = %RTbool\n", Ctrl.n.u1HtTunEn);
5494 pHlp->pfnPrintf(pHlp, " Event log enable = %RTbool\n", Ctrl.n.u1EvtLogEn);
5495 pHlp->pfnPrintf(pHlp, " Event log interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
5496 pHlp->pfnPrintf(pHlp, " Completion wait interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn);
5497 pHlp->pfnPrintf(pHlp, " Invalidation timeout = %u\n", Ctrl.n.u3InvTimeOut);
5498 pHlp->pfnPrintf(pHlp,