/* $Id: DevIommuAmd.cpp 88818 2021-05-03 10:19:31Z vboxsync $ */ /** @file * IOMMU - Input/Output Memory Management Unit - AMD implementation. */ /* * Copyright (C) 2020 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; * you can redistribute it and/or modify it under the terms of the GNU * General Public License (GPL) as published by the Free Software * Foundation, in version 2 as it comes in the "COPYING" file of the * VirtualBox OSE distribution. VirtualBox OSE is distributed in the * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. */ /********************************************************************************************************************************* * Header Files * *********************************************************************************************************************************/ #define LOG_GROUP LOG_GROUP_DEV_IOMMU #include #include #include #include #include #include #ifdef IN_RING3 # include #endif #include "VBoxDD.h" #include "DevIommuAmd.h" /********************************************************************************************************************************* * Defined Constants And Macros * *********************************************************************************************************************************/ /** Release log prefix string. */ #define IOMMU_LOG_PFX "AMD-IOMMU" /** The current saved state version. */ #define IOMMU_SAVED_STATE_VERSION 1 /** The IOMMU device instance magic. */ #define IOMMU_MAGIC 0x10acce55 /** Enable the IOTLBE cache only in ring-3 for now, see @bugref{9654#c95}. */ #ifdef IN_RING3 # define IOMMU_WITH_IOTLBE_CACHE #endif /** Enable the interrupt cache. */ #define IOMMU_WITH_IRTE_CACHE /* The DTE cache is mandatory for the IOTLB or interrupt cache to work. */ #if defined(IOMMU_WITH_IOTLBE_CACHE) || defined(IOMMU_WITH_IRTE_CACHE) # define IOMMU_WITH_DTE_CACHE /** The maximum number of device IDs in the cache. */ # define IOMMU_DEV_CACHE_COUNT 16 /** An empty device ID. */ # define IOMMU_DTE_CACHE_KEY_NIL 0 #endif #ifdef IOMMU_WITH_IRTE_CACHE /** The maximum number of IRTE cache entries. */ # define IOMMU_IRTE_CACHE_COUNT 32 /** A NIL IRTE cache entry key. */ # define IOMMU_IRTE_CACHE_KEY_NIL (~(uint32_t)0U) /** Gets the device ID from an IRTE cache entry key. */ #define IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(a_Key) RT_HIWORD(a_Key) /** Gets the IOVA from the IOTLB entry key. */ # define IOMMU_IRTE_CACHE_KEY_GET_OFF(a_Key) RT_LOWORD(a_Key) /** Makes an IRTE cache entry key. * * Bits 31:16 is the device ID (Bus, Device, Function). * Bits 15:0 is the the offset into the IRTE table. */ # define IOMMU_IRTE_CACHE_KEY_MAKE(a_DevId, a_off) RT_MAKE_U32(a_off, a_DevId) #endif /* IOMMU_WITH_IRTE_CACHE */ #ifdef IOMMU_WITH_IOTLBE_CACHE /** The maximum number of IOTLB entries. */ # define IOMMU_IOTLBE_MAX 96 /** The mask of bits covering the domain ID in the IOTLBE key. */ # define IOMMU_IOTLB_DOMAIN_ID_MASK UINT64_C(0xffffff0000000000) /** The mask of bits covering the IOVA in the IOTLBE key. */ # define IOMMU_IOTLB_IOVA_MASK (~IOMMU_IOTLB_DOMAIN_ID_MASK) /** The number of bits to shift for the domain ID of the IOTLBE key. */ # define IOMMU_IOTLB_DOMAIN_ID_SHIFT 40 /** A NIL IOTLB key. */ # define IOMMU_IOTLB_KEY_NIL UINT64_C(0) /** Gets the domain ID from an IOTLB entry key. */ # define IOMMU_IOTLB_KEY_GET_DOMAIN_ID(a_Key) ((a_Key) >> IOMMU_IOTLB_DOMAIN_ID_SHIFT) /** Gets the IOVA from the IOTLB entry key. */ # define IOMMU_IOTLB_KEY_GET_IOVA(a_Key) (((a_Key) & IOMMU_IOTLB_IOVA_MASK) << X86_PAGE_4K_SHIFT) /** Makes an IOTLB entry key. * * Address bits 63:52 of the IOVA are zero extended, so top 12 bits are free. * Address bits 11:0 of the IOVA are offset into the minimum page size of 4K, * so bottom 12 bits are free. * * Thus we use the top 24 bits of key to hold bits 15:0 of the domain ID. * We use the bottom 40 bits of the key to hold bits 51:12 of the IOVA. */ # define IOMMU_IOTLB_KEY_MAKE(a_DomainId, a_uIova) ( ((uint64_t)(a_DomainId) << IOMMU_IOTLB_DOMAIN_ID_SHIFT) \ | (((a_uIova) >> X86_PAGE_4K_SHIFT) & IOMMU_IOTLB_IOVA_MASK)) #endif /* IOMMU_WITH_IOTLBE_CACHE */ #ifdef IOMMU_WITH_DTE_CACHE /** @name IOMMU_DTE_CACHE_F_XXX: DTE cache flags. * * Some of these flags are "basic" i.e. they correspond directly to their bits in * the DTE. The rest of the flags are based on checks or operations on several DTE * bits. * * The basic flags are: * - VALID (DTE.V) * - IO_PERM_READ (DTE.IR) * - IO_PERM_WRITE (DTE.IW) * - IO_PERM_RSVD (bit following DTW.IW reserved for future & to keep * masking consistent) * - SUPPRESS_ALL_IOPF (DTE.SA) * - SUPPRESS_IOPF (DTE.SE) * - INTR_MAP_VALID (DTE.IV) * - IGNORE_UNMAPPED_INTR (DTE.IG) * * @see iommuAmdGetBasicDevFlags() * @{ */ /** The DTE is present. */ # define IOMMU_DTE_CACHE_F_PRESENT RT_BIT(0) /** The DTE is valid. */ # define IOMMU_DTE_CACHE_F_VALID RT_BIT(1) /** The DTE permissions apply for address translations. */ # define IOMMU_DTE_CACHE_F_IO_PERM RT_BIT(2) /** DTE permission - I/O read allowed. */ # define IOMMU_DTE_CACHE_F_IO_PERM_READ RT_BIT(3) /** DTE permission - I/O write allowed. */ # define IOMMU_DTE_CACHE_F_IO_PERM_WRITE RT_BIT(4) /** DTE permission - reserved. */ # define IOMMU_DTE_CACHE_F_IO_PERM_RSVD RT_BIT(5) /** Address translation required. */ # define IOMMU_DTE_CACHE_F_ADDR_TRANSLATE RT_BIT(6) /** Suppress all I/O page faults. */ # define IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF RT_BIT(7) /** Suppress I/O page faults. */ # define IOMMU_DTE_CACHE_F_SUPPRESS_IOPF RT_BIT(8) /** Interrupt map valid. */ # define IOMMU_DTE_CACHE_F_INTR_MAP_VALID RT_BIT(9) /** Ignore unmapped interrupts. */ # define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR RT_BIT(10) /** An I/O page fault has been raised for this device. */ # define IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED RT_BIT(11) /** Fixed and arbitrary interrupt control: Target Abort. */ # define IOMMU_DTE_CACHE_F_INTR_CTRL_TARGET_ABORT RT_BIT(12) /** Fixed and arbitrary interrupt control: Forward unmapped. */ # define IOMMU_DTE_CACHE_F_INTR_CTRL_FWD_UNMAPPED RT_BIT(13) /** Fixed and arbitrary interrupt control: Remapped. */ # define IOMMU_DTE_CACHE_F_INTR_CTRL_REMAPPED RT_BIT(14) /** Fixed and arbitrary interrupt control: Reserved. */ # define IOMMU_DTE_CACHE_F_INTR_CTRL_RSVD RT_BIT(15) /** @} */ /** The number of bits to shift I/O device flags for DTE permissions. */ # define IOMMU_DTE_CACHE_F_IO_PERM_SHIFT 3 /** The mask of DTE permissions in I/O device flags. */ # define IOMMU_DTE_CACHE_F_IO_PERM_MASK 0x3 /** The number of bits to shift I/O device flags for interrupt control bits. */ # define IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT 12 /** The mask of interrupt control bits in I/O device flags. */ # define IOMMU_DTE_CACHE_F_INTR_CTRL_MASK 0x3 /** The number of bits to shift for ignore-unmapped interrupts bit. */ # define IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR_SHIFT 10 /** Acquires the cache lock. */ #define IOMMU_CACHE_LOCK(a_pDevIns, a_pThis) \ do { \ int const rcLock = PDMDevHlpCritSectEnter((a_pDevIns), &(a_pThis)->CritSectCache, VINF_SUCCESS); \ AssertRC(rcLock); \ } while (0) /** Releases the cache lock. */ # define IOMMU_CACHE_UNLOCK(a_pDevIns, a_pThis) PDMDevHlpCritSectLeave((a_pDevIns), &(a_pThis)->CritSectCache) #endif /* IOMMU_WITH_DTE_CACHE */ /** Gets the page offset mask given the number of bits to shift. */ #define IOMMU_GET_PAGE_OFF_MASK(a_cShift) (~(UINT64_C(0xffffffffffffffff) << (a_cShift))) /** Acquires the PDM lock (returns a_rcBusy on contention). */ #define IOMMU_LOCK_RET(a_pDevIns, a_pThisCC, a_rcBusy) \ do { \ int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), (a_rcBusy)); \ if (RT_LIKELY(rcLock == VINF_SUCCESS)) \ { /* likely */ } \ else \ return rcLock; \ } while (0) /** Acquires the PDM lock (shouldn't really fail). */ #ifdef IN_RING3 # define IOMMU_LOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VERR_IGNORED) #else # define IOMMU_LOCK(a_pDevIns, a_pThisCC) \ do { \ int const rcLock = (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLock((a_pDevIns), VINF_SUCCESS); \ AssertRC(rcLock); \ } while (0) #endif /** Checks if the current thread owns the PDM lock. */ # define IOMMU_ASSERT_LOCK_IS_OWNER(a_pDevIns, a_pThisCC) \ do \ { \ Assert((a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnLockIsOwner((a_pDevIns))); \ NOREF(a_pThisCC); \ } while (0) /** Releases the PDM lock. */ # define IOMMU_UNLOCK(a_pDevIns, a_pThisCC) (a_pThisCC)->CTX_SUFF(pIommuHlp)->pfnUnlock((a_pDevIns)) /********************************************************************************************************************************* * Structures and Typedefs * *********************************************************************************************************************************/ /** * IOMMU operation (transaction). */ typedef enum IOMMUOP { /** Address translation request. */ IOMMUOP_TRANSLATE_REQ = 0, /** Memory read request. */ IOMMUOP_MEM_READ, /** Memory write request. */ IOMMUOP_MEM_WRITE, /** Interrupt request. */ IOMMUOP_INTR_REQ, /** Command. */ IOMMUOP_CMD } IOMMUOP; /** Pointer to a IOMMU operation. */ typedef IOMMUOP *PIOMMUOP; /** * I/O page lookup. */ typedef struct IOPAGELOOKUP { /** The translated system physical address. */ RTGCPHYS GCPhysSpa; /** The number of offset bits in the system physical address. */ uint8_t cShift; /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */ uint8_t fPerm; } IOPAGELOOKUP; /** Pointer to an I/O page lookup. */ typedef IOPAGELOOKUP *PIOPAGELOOKUP; /** Pointer to a const I/O page lookup. */ typedef IOPAGELOOKUP const *PCIOPAGELOOKUP; /** * I/O address range. */ typedef struct IOADDRRANGE { /** The address (virtual or physical). */ uint64_t uAddr; /** The size of the access in bytes. */ size_t cb; /** The I/O permissions for this translation, see IOMMU_IO_PERM_XXX. */ uint8_t fPerm; } IOADDRRANGE; /** Pointer to an I/O address range. */ typedef IOADDRRANGE *PIOADDRRANGE; /** Pointer to a const I/O address range. */ typedef IOADDRRANGE const *PCIOADDRRANGE; #ifdef IOMMU_WITH_DTE_CACHE /** * Device Table Entry Cache. */ typedef struct DTECACHE { /** This device's flags, see IOMMU_DTE_CACHE_F_XXX. */ uint16_t fFlags; /** The domain ID assigned for this device by software. */ uint16_t idDomain; } DTECACHE; /** Pointer to an I/O device struct. */ typedef DTECACHE *PDTECACHE; /** Pointer to a const I/O device struct. */ typedef DTECACHE *PCDTECACHE; AssertCompileSize(DTECACHE, 4); #endif /* IOMMU_WITH_DTE_CACHE */ #ifdef IOMMU_WITH_IOTLBE_CACHE /** * I/O TLB Entry. * Keep this as small and aligned as possible. */ typedef struct IOTLBE { /** The AVL tree node. */ AVLU64NODECORE Core; /** The least recently used (LRU) list node. */ RTLISTNODE NdLru; /** The I/O page lookup results of the translation. */ IOPAGELOOKUP PageLookup; /** Whether the entry needs to be evicted from the cache. */ bool fEvictPending; } IOTLBE; /** Pointer to an IOMMU I/O TLB entry struct. */ typedef IOTLBE *PIOTLBE; /** Pointer to a const IOMMU I/O TLB entry struct. */ typedef IOTLBE const *PCIOTLBE; AssertCompileSizeAlignment(IOTLBE, 8); AssertCompileMemberOffset(IOTLBE, Core, 0); #endif /* IOMMU_WITH_IOTLBE_CACHE */ #ifdef IOMMU_WITH_IRTE_CACHE /** * Interrupt Remap Table Entry Cache. */ typedef struct IRTECACHE { /** The key, see IOMMU_IRTE_CACHE_KEY_MAKE. */ uint32_t uKey; /** The IRTE. */ IRTE_T Irte; } IRTECACHE; /** Pointer to an IRTE cache struct. */ typedef IRTECACHE *PIRTECACHE; /** Pointer to a const IRTE cache struct. */ typedef IRTECACHE const *PCIRTECACHE; AssertCompileSizeAlignment(IRTECACHE, 4); #endif /* IOMMU_WITH_IRTE_CACHE */ /** * The shared IOMMU device state. */ typedef struct IOMMU { /** IOMMU device index (0 is at the top of the PCI tree hierarchy). */ uint32_t idxIommu; /** IOMMU magic. */ uint32_t u32Magic; /** The MMIO handle. */ IOMMMIOHANDLE hMmio; /** The event semaphore the command thread waits on. */ SUPSEMEVENT hEvtCmdThread; /** Whether the command thread has been signaled for wake up. */ bool volatile fCmdThreadSignaled; /** Padding. */ bool afPadding0[7]; #ifdef IOMMU_WITH_DTE_CACHE /** The critsect that protects the cache from concurrent access. */ PDMCRITSECT CritSectCache; /** Array of device IDs. */ uint16_t aDeviceIds[IOMMU_DEV_CACHE_COUNT]; /** Array of DTE cache entries. */ DTECACHE aDteCache[IOMMU_DEV_CACHE_COUNT]; #endif #ifdef IOMMU_WITH_IRTE_CACHE /** Array of IRTE cache entries. */ IRTECACHE aIrteCache[IOMMU_IRTE_CACHE_COUNT]; #endif /** @name PCI: Base capability block registers. * @{ */ IOMMU_BAR_T IommuBar; /**< IOMMU base address register. */ /** @} */ /** @name MMIO: Control and status registers. * @{ */ DEV_TAB_BAR_T aDevTabBaseAddrs[8]; /**< Device table base address registers. */ CMD_BUF_BAR_T CmdBufBaseAddr; /**< Command buffer base address register. */ EVT_LOG_BAR_T EvtLogBaseAddr; /**< Event log base address register. */ IOMMU_CTRL_T Ctrl; /**< IOMMU control register. */ IOMMU_EXCL_RANGE_BAR_T ExclRangeBaseAddr; /**< IOMMU exclusion range base register. */ IOMMU_EXCL_RANGE_LIMIT_T ExclRangeLimit; /**< IOMMU exclusion range limit. */ IOMMU_EXT_FEAT_T ExtFeat; /**< IOMMU extended feature register. */ /** @} */ /** @name MMIO: Peripheral Page Request (PPR) Log registers. * @{ */ PPR_LOG_BAR_T PprLogBaseAddr; /**< PPR Log base address register. */ IOMMU_HW_EVT_HI_T HwEvtHi; /**< IOMMU hardware event register (Hi). */ IOMMU_HW_EVT_LO_T HwEvtLo; /**< IOMMU hardware event register (Lo). */ IOMMU_HW_EVT_STATUS_T HwEvtStatus; /**< IOMMU hardware event status. */ /** @} */ /** @todo IOMMU: SMI filter. */ /** @name MMIO: Guest Virtual-APIC Log registers. * @{ */ GALOG_BAR_T GALogBaseAddr; /**< Guest Virtual-APIC Log base address register. */ GALOG_TAIL_ADDR_T GALogTailAddr; /**< Guest Virtual-APIC Log Tail address register. */ /** @} */ /** @name MMIO: Alternate PPR and Event Log registers. * @{ */ PPR_LOG_B_BAR_T PprLogBBaseAddr; /**< PPR Log B base address register. */ EVT_LOG_B_BAR_T EvtLogBBaseAddr; /**< Event Log B base address register. */ /** @} */ /** @name MMIO: Device-specific feature registers. * @{ */ DEV_SPECIFIC_FEAT_T DevSpecificFeat; /**< Device-specific feature extension register (DSFX). */ DEV_SPECIFIC_CTRL_T DevSpecificCtrl; /**< Device-specific control extension register (DSCX). */ DEV_SPECIFIC_STATUS_T DevSpecificStatus; /**< Device-specific status extension register (DSSX). */ /** @} */ /** @name MMIO: MSI Capability Block registers. * @{ */ MSI_MISC_INFO_T MiscInfo; /**< MSI Misc. info registers / MSI Vector registers. */ /** @} */ /** @name MMIO: Performance Optimization Control registers. * @{ */ IOMMU_PERF_OPT_CTRL_T PerfOptCtrl; /**< IOMMU Performance optimization control register. */ /** @} */ /** @name MMIO: x2APIC Control registers. * @{ */ IOMMU_XT_GEN_INTR_CTRL_T XtGenIntrCtrl; /**< IOMMU X2APIC General interrupt control register. */ IOMMU_XT_PPR_INTR_CTRL_T XtPprIntrCtrl; /**< IOMMU X2APIC PPR interrupt control register. */ IOMMU_XT_GALOG_INTR_CTRL_T XtGALogIntrCtrl; /**< IOMMU X2APIC Guest Log interrupt control register. */ /** @} */ /** @name MMIO: Memory Address Routing & Control (MARC) registers. * @{ */ MARC_APER_T aMarcApers[4]; /**< MARC Aperture Registers. */ /** @} */ /** @name MMIO: Reserved register. * @{ */ IOMMU_RSVD_REG_T RsvdReg; /**< IOMMU Reserved Register. */ /** @} */ /** @name MMIO: Command and Event Log pointer registers. * @{ */ CMD_BUF_HEAD_PTR_T CmdBufHeadPtr; /**< Command buffer head pointer register. */ CMD_BUF_TAIL_PTR_T CmdBufTailPtr; /**< Command buffer tail pointer register. */ EVT_LOG_HEAD_PTR_T EvtLogHeadPtr; /**< Event log head pointer register. */ EVT_LOG_TAIL_PTR_T EvtLogTailPtr; /**< Event log tail pointer register. */ /** @} */ /** @name MMIO: Command and Event Status register. * @{ */ IOMMU_STATUS_T Status; /**< IOMMU status register. */ /** @} */ /** @name MMIO: PPR Log Head and Tail pointer registers. * @{ */ PPR_LOG_HEAD_PTR_T PprLogHeadPtr; /**< IOMMU PPR log head pointer register. */ PPR_LOG_TAIL_PTR_T PprLogTailPtr; /**< IOMMU PPR log tail pointer register. */ /** @} */ /** @name MMIO: Guest Virtual-APIC Log Head and Tail pointer registers. * @{ */ GALOG_HEAD_PTR_T GALogHeadPtr; /**< Guest Virtual-APIC log head pointer register. */ GALOG_TAIL_PTR_T GALogTailPtr; /**< Guest Virtual-APIC log tail pointer register. */ /** @} */ /** @name MMIO: PPR Log B Head and Tail pointer registers. * @{ */ PPR_LOG_B_HEAD_PTR_T PprLogBHeadPtr; /**< PPR log B head pointer register. */ PPR_LOG_B_TAIL_PTR_T PprLogBTailPtr; /**< PPR log B tail pointer register. */ /** @} */ /** @name MMIO: Event Log B Head and Tail pointer registers. * @{ */ EVT_LOG_B_HEAD_PTR_T EvtLogBHeadPtr; /**< Event log B head pointer register. */ EVT_LOG_B_TAIL_PTR_T EvtLogBTailPtr; /**< Event log B tail pointer register. */ /** @} */ /** @name MMIO: PPR Log Overflow protection registers. * @{ */ PPR_LOG_AUTO_RESP_T PprLogAutoResp; /**< PPR Log Auto Response register. */ PPR_LOG_OVERFLOW_EARLY_T PprLogOverflowEarly; /**< PPR Log Overflow Early Indicator register. */ PPR_LOG_B_OVERFLOW_EARLY_T PprLogBOverflowEarly; /**< PPR Log B Overflow Early Indicator register. */ /** @} */ /** @todo IOMMU: IOMMU Event counter registers. */ #ifdef VBOX_WITH_STATISTICS /** @name IOMMU: Stat counters. * @{ */ STAMCOUNTER StatMmioReadR3; /**< Number of MMIO reads in R3. */ STAMCOUNTER StatMmioReadRZ; /**< Number of MMIO reads in RZ. */ STAMCOUNTER StatMmioWriteR3; /**< Number of MMIO writes in R3. */ STAMCOUNTER StatMmioWriteRZ; /**< Number of MMIO writes in RZ. */ STAMCOUNTER StatMsiRemapR3; /**< Number of MSI remap requests in R3. */ STAMCOUNTER StatMsiRemapRZ; /**< Number of MSI remap requests in RZ. */ STAMCOUNTER StatMemReadR3; /**< Number of memory read translation requests in R3. */ STAMCOUNTER StatMemReadRZ; /**< Number of memory read translation requests in RZ. */ STAMCOUNTER StatMemWriteR3; /**< Number of memory write translation requests in R3. */ STAMCOUNTER StatMemWriteRZ; /**< Number of memory write translation requests in RZ. */ STAMCOUNTER StatMemBulkReadR3; /**< Number of memory read bulk translation requests in R3. */ STAMCOUNTER StatMemBulkReadRZ; /**< Number of memory read bulk translation requests in RZ. */ STAMCOUNTER StatMemBulkWriteR3; /**< Number of memory write bulk translation requests in R3. */ STAMCOUNTER StatMemBulkWriteRZ; /**< Number of memory write bulk translation requests in RZ. */ STAMCOUNTER StatCmd; /**< Number of commands processed in total. */ STAMCOUNTER StatCmdCompWait; /**< Number of Completion Wait commands processed. */ STAMCOUNTER StatCmdInvDte; /**< Number of Invalidate DTE commands processed. */ STAMCOUNTER StatCmdInvIommuPages; /**< Number of Invalidate IOMMU pages commands processed. */ STAMCOUNTER StatCmdInvIotlbPages; /**< Number of Invalidate IOTLB pages commands processed. */ STAMCOUNTER StatCmdInvIntrTable; /**< Number of Invalidate Interrupt Table commands processed. */ STAMCOUNTER StatCmdPrefIommuPages; /**< Number of Prefetch IOMMU Pages commands processed. */ STAMCOUNTER StatCmdCompletePprReq; /**< Number of Complete PPR Requests commands processed. */ STAMCOUNTER StatCmdInvIommuAll; /**< Number of Invalidate IOMMU All commands processed. */ STAMCOUNTER StatIotlbeCached; /**< Number of IOTLB entries in the cache. */ STAMCOUNTER StatIotlbeLazyEvictReuse; /**< Number of IOTLB entries re-used after lazy eviction. */ STAMPROFILEADV StatProfDteLookup; /**< Profiling of I/O page walk (from memory). */ STAMPROFILEADV StatProfIotlbeLookup; /**< Profiling of IOTLB entry lookup (from cache). */ STAMPROFILEADV StatProfIrteLookup; /**< Profiling of IRTE entry lookup (from memory). */ STAMPROFILEADV StatProfIrteCacheLookup; /**< Profiling of IRTE entry lookup (from cache). */ STAMCOUNTER StatAccessCacheHit; /**< Number of IOTLB cache hits. */ STAMCOUNTER StatAccessCacheHitFull; /**< Number of accesses that were fully looked up from the cache. */ STAMCOUNTER StatAccessCacheMiss; /**< Number of cache misses (resulting in DTE lookups). */ STAMCOUNTER StatAccessCacheNonContig; /**< Number of cache accesses resulting in non-contiguous access. */ STAMCOUNTER StatAccessCachePermDenied; /**< Number of cache accesses resulting in insufficient permissions. */ STAMCOUNTER StatAccessDteNonContig; /**< Number of DTE accesses resulting in non-contiguous access. */ STAMCOUNTER StatAccessDtePermDenied; /**< Number of DTE accesses resulting in insufficient permissions. */ STAMCOUNTER StatIntrCacheHit; /**< Number of interrupt cache hits. */ STAMCOUNTER StatIntrCacheMiss; /**< Number of interrupt cache misses. */ /** @} */ #endif } IOMMU; /** Pointer to the IOMMU device state. */ typedef IOMMU *PIOMMU; /** Pointer to the const IOMMU device state. */ typedef const IOMMU *PCIOMMU; AssertCompileMemberAlignment(IOMMU, hMmio, 8); #ifdef IOMMU_WITH_DTE_CACHE AssertCompileMemberAlignment(IOMMU, CritSectCache, 8); AssertCompileMemberAlignment(IOMMU, aDeviceIds, 8); AssertCompileMemberAlignment(IOMMU, aDteCache, 8); #endif #ifdef IOMMU_WITH_IRTE_CACHE AssertCompileMemberAlignment(IOMMU, aIrteCache, 8); #endif AssertCompileMemberAlignment(IOMMU, IommuBar, 8); AssertCompileMemberAlignment(IOMMU, aDevTabBaseAddrs, 8); AssertCompileMemberAlignment(IOMMU, CmdBufHeadPtr, 8); AssertCompileMemberAlignment(IOMMU, Status, 8); /** * The ring-3 IOMMU device state. */ typedef struct IOMMUR3 { /** Device instance. */ PPDMDEVINSR3 pDevInsR3; /** The IOMMU helpers. */ R3PTRTYPE(PCPDMIOMMUHLPR3) pIommuHlpR3; /** The command thread handle. */ R3PTRTYPE(PPDMTHREAD) pCmdThread; #ifdef IOMMU_WITH_IOTLBE_CACHE /** Pointer to array of pre-allocated IOTLBEs. */ PIOTLBE paIotlbes; /** Maps [DomainId,Iova] to [IOTLBE]. */ AVLU64TREE TreeIotlbe; /** LRU list anchor for IOTLB entries. */ RTLISTANCHOR LstLruIotlbe; /** Index of the next unused IOTLB. */ uint32_t idxUnusedIotlbe; /** Number of cached IOTLB entries in the tree. */ uint32_t cCachedIotlbes; #endif } IOMMUR3; /** Pointer to the ring-3 IOMMU device state. */ typedef IOMMUR3 *PIOMMUR3; /** Pointer to the const ring-3 IOMMU device state. */ typedef const IOMMUR3 *PCIOMMUR3; #ifdef IOMMU_WITH_IOTLBE_CACHE AssertCompileMemberAlignment(IOMMUR3, paIotlbes, 8); AssertCompileMemberAlignment(IOMMUR3, TreeIotlbe, 8); AssertCompileMemberAlignment(IOMMUR3, LstLruIotlbe, 8); #endif /** * The ring-0 IOMMU device state. */ typedef struct IOMMUR0 { /** Device instance. */ PPDMDEVINSR0 pDevInsR0; /** The IOMMU helpers. */ R0PTRTYPE(PCPDMIOMMUHLPR0) pIommuHlpR0; } IOMMUR0; /** Pointer to the ring-0 IOMMU device state. */ typedef IOMMUR0 *PIOMMUR0; /** * The raw-mode IOMMU device state. */ typedef struct IOMMURC { /** Device instance. */ PPDMDEVINSRC pDevInsRC; /** The IOMMU helpers. */ RCPTRTYPE(PCPDMIOMMUHLPRC) pIommuHlpRC; } IOMMURC; /** Pointer to the raw-mode IOMMU device state. */ typedef IOMMURC *PIOMMURC; /** The IOMMU device state for the current context. */ typedef CTX_SUFF(IOMMU) IOMMUCC; /** Pointer to the IOMMU device state for the current context. */ typedef CTX_SUFF(PIOMMU) PIOMMUCC; /** * IOMMU register access. */ typedef struct IOMMUREGACC { const char *pszName; VBOXSTRICTRC (*pfnRead)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value); VBOXSTRICTRC (*pfnWrite)(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value); } IOMMUREGACC; /** Pointer to an IOMMU register access. */ typedef IOMMUREGACC *PIOMMUREGACC; /** Pointer to a const IOMMU register access. */ typedef IOMMUREGACC const *PCIOMMUREGACC; #ifdef IOMMU_WITH_IOTLBE_CACHE /** * IOTLBE flush argument. */ typedef struct IOTLBEFLUSHARG { /** The ring-3 IOMMU device state. */ PIOMMUR3 pIommuR3; /** The domain ID to flush. */ uint16_t idDomain; } IOTLBEFLUSHARG; /** Pointer to an IOTLBE flush argument. */ typedef IOTLBEFLUSHARG *PIOTLBEFLUSHARG; /** Pointer to a const IOTLBE flush argument. */ typedef IOTLBEFLUSHARG const *PCIOTLBEFLUSHARG; /** * IOTLBE Info. argument. */ typedef struct IOTLBEINFOARG { /** The ring-3 IOMMU device state. */ PIOMMUR3 pIommuR3; /** The info helper. */ PCDBGFINFOHLP pHlp; /** The domain ID to dump IOTLB entry. */ uint16_t idDomain; } IOTLBEINFOARG; /** Pointer to an IOTLBE flush argument. */ typedef IOTLBEINFOARG *PIOTLBEINFOARG; /** Pointer to a const IOTLBE flush argument. */ typedef IOTLBEINFOARG const *PCIOTLBEINFOARG; #endif /** * IOMMU operation auxiliary info. */ typedef struct IOMMUOPAUX { /** The IOMMU operation being performed. */ IOMMUOP enmOp; /** The device table entry (can be NULL). */ PCDTE_T pDte; /** The device ID (bus, device, function). */ uint16_t idDevice; /** The domain ID (when the DTE isn't provided). */ uint16_t idDomain; } IOMMUOPAUX; /** Pointer to an I/O address lookup struct. */ typedef IOMMUOPAUX *PIOMMUOPAUX; /** Pointer to a const I/O address lookup struct. */ typedef IOMMUOPAUX const *PCIOMMUOPAUX; typedef DECLCALLBACKTYPE(int, FNIOPAGELOOKUP,(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux, PIOPAGELOOKUP pPageLookup)); typedef FNIOPAGELOOKUP *PFNIOPAGELOOKUP; /********************************************************************************************************************************* * Global Variables * *********************************************************************************************************************************/ #ifdef IN_RING3 /** * An array of the number of device table segments supported. * Indexed by u2DevTabSegSup. */ static uint8_t const g_acDevTabSegs[] = { 0, 2, 4, 8 }; #endif #if defined(IN_RING3) || defined(LOG_ENABLED) /** * The IOMMU I/O permission names. */ static const char * const g_aszPerm[] = { "none", "read", "write", "read+write" }; #endif /** * An array of the masks to select the device table segment index from a device ID. */ static uint16_t const g_auDevTabSegMasks[] = { 0x0, 0x8000, 0xc000, 0xe000 }; /** * An array of the shift values to select the device table segment index from a * device ID. */ static uint8_t const g_auDevTabSegShifts[] = { 0, 15, 14, 13 }; /** * The maximum size (inclusive) of each device table segment (0 to 7). * Indexed by the device table segment index. */ static uint16_t const g_auDevTabSegMaxSizes[] = { 0x1ff, 0xff, 0x7f, 0x7f, 0x3f, 0x3f, 0x3f, 0x3f }; #ifndef VBOX_DEVICE_STRUCT_TESTCASE /** * Gets the maximum number of buffer entries for the given buffer length. * * @returns Number of buffer entries. * @param uEncodedLen The length (power-of-2 encoded). */ DECLINLINE(uint32_t) iommuAmdGetBufMaxEntries(uint8_t uEncodedLen) { Assert(uEncodedLen > 7); Assert(uEncodedLen < 16); return 2 << (uEncodedLen - 1); } /** * Gets the total length of the buffer given a base register's encoded length. * * @returns The length of the buffer in bytes. * @param uEncodedLen The length (power-of-2 encoded). */ DECLINLINE(uint32_t) iommuAmdGetTotalBufLength(uint8_t uEncodedLen) { Assert(uEncodedLen > 7); Assert(uEncodedLen < 16); return (2 << (uEncodedLen - 1)) << 4; } /** * Gets the number of (unconsumed) entries in the event log. * * @returns The number of entries in the event log. * @param pThis The shared IOMMU device state. */ static uint32_t iommuAmdGetEvtLogEntryCount(PIOMMU pThis) { uint32_t const idxTail = pThis->EvtLogTailPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT; uint32_t const idxHead = pThis->EvtLogHeadPtr.n.off >> IOMMU_EVT_GENERIC_SHIFT; if (idxTail >= idxHead) return idxTail - idxHead; uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len); return cMaxEvts - idxHead + idxTail; } #if defined(IN_RING3) || defined(LOG_ENABLED) /** * Gets the descriptive I/O permission name for a memory access. * * @returns The I/O permission name. * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX. */ static const char *iommuAmdMemAccessGetPermName(uint8_t fPerm) { /* We shouldn't construct an access with "none" or "read+write" (must be read or write) permissions. */ Assert(fPerm > 0 && fPerm < RT_ELEMENTS(g_aszPerm)); return g_aszPerm[fPerm & IOMMU_IO_PERM_MASK]; } #endif /** * Checks whether two consecutive I/O page lookup results translates to a physically * contiguous region. * * @returns @c true if they are contiguous, @c false otherwise. * @param pPageLookupPrev The I/O page lookup result of the previous page. * @param pPageLookup The I/O page lookup result of the current page. */ static bool iommuAmdLookupIsAccessContig(PCIOPAGELOOKUP pPageLookupPrev, PCIOPAGELOOKUP pPageLookup) { Assert(pPageLookupPrev->fPerm == pPageLookup->fPerm); size_t const cbPrev = RT_BIT_64(pPageLookupPrev->cShift); RTGCPHYS const GCPhysPrev = pPageLookupPrev->GCPhysSpa; RTGCPHYS const GCPhys = pPageLookup->GCPhysSpa; uint64_t const offMaskPrev = IOMMU_GET_PAGE_OFF_MASK(pPageLookupPrev->cShift); uint64_t const offMask = IOMMU_GET_PAGE_OFF_MASK(pPageLookup->cShift); /* Paranoia: Ensure offset bits are 0. */ Assert(!(GCPhysPrev & offMaskPrev)); Assert(!(GCPhys & offMask)); if ((GCPhysPrev & ~offMaskPrev) + cbPrev == (GCPhys & ~offMask)) return true; return false; } /** * Gets the basic I/O device flags for the given device table entry. * * @returns The basic I/O device flags. * @param pDte The device table entry. */ static uint16_t iommuAmdGetBasicDevFlags(PCDTE_T pDte) { /* Extract basic flags from bits 127:0 of the DTE. */ uint16_t fFlags = 0; if (pDte->n.u1Valid) { fFlags |= IOMMU_DTE_CACHE_F_VALID; /** @todo Skip the if checks here (shift/mask the relevant bits over). */ if (pDte->n.u1SuppressAllPfEvents) fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF; if (pDte->n.u1SuppressPfEvents) fFlags |= IOMMU_DTE_CACHE_F_SUPPRESS_IOPF; uint16_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK; AssertCompile(IOMMU_DTE_CACHE_F_IO_PERM_MASK == IOMMU_IO_PERM_MASK); fFlags |= fDtePerm << IOMMU_DTE_CACHE_F_IO_PERM_SHIFT; } /* Extract basic flags from bits 255:128 of the DTE. */ if (pDte->n.u1IntrMapValid) { fFlags |= IOMMU_DTE_CACHE_F_INTR_MAP_VALID; /** @todo Skip the if check here (shift/mask the relevant bit over). */ if (pDte->n.u1IgnoreUnmappedIntrs) fFlags |= IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR; uint16_t const fIntrCtrl = IOMMU_DTE_GET_INTR_CTRL(pDte); AssertCompile(IOMMU_DTE_CACHE_F_INTR_CTRL_MASK == IOMMU_DTE_INTR_CTRL_MASK); fFlags |= fIntrCtrl << IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT; } return fFlags; } /** * Remaps the source MSI to the destination MSI given the IRTE. * * @param pMsiIn The source MSI. * @param pMsiOut Where to store the remapped MSI. * @param pIrte The IRTE used for the remapping. */ static void iommuAmdIrteRemapMsi(PCMSIMSG pMsiIn, PMSIMSG pMsiOut, PCIRTE_T pIrte) { /* Preserve all bits from the source MSI address and data that don't map 1:1 from the IRTE. */ *pMsiOut = *pMsiIn; pMsiOut->Addr.n.u1DestMode = pIrte->n.u1DestMode; pMsiOut->Addr.n.u8DestId = pIrte->n.u8Dest; pMsiOut->Data.n.u8Vector = pIrte->n.u8Vector; pMsiOut->Data.n.u3DeliveryMode = pIrte->n.u3IntrType; } #ifdef IOMMU_WITH_DTE_CACHE /** * Looks up an entry in the DTE cache for the given device ID. * * @returns The index of the entry, or the cache capacity if no entry was found. * @param pThis The shared IOMMU device state. * @param idDevice The device ID (bus, device, function). */ DECLINLINE(uint16_t) iommuAmdDteCacheEntryLookup(PIOMMU pThis, uint16_t idDevice) { uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds); for (uint16_t i = 0; i < cDeviceIds; i++) { if (pThis->aDeviceIds[i] == idDevice) return i; } return cDeviceIds; } /** * Gets an free/unused DTE cache entry. * * @returns The index of an unused entry, or cache capacity if the cache is full. * @param pThis The shared IOMMU device state. */ DECLINLINE(uint16_t) iommuAmdDteCacheEntryGetUnused(PCIOMMU pThis) { /* * ASSUMES device ID 0 is the PCI host bridge or the IOMMU itself * (the latter being an ugly hack) and cannot be a valid device ID. */ uint16_t const cDeviceIds = RT_ELEMENTS(pThis->aDeviceIds); for (uint16_t i = 0; i < cDeviceIds; i++) { if (!pThis->aDeviceIds[i]) return i; } return cDeviceIds; } /** * Adds or updates the I/O device flags for the given device ID. * * @returns VBox status code. * @retval VERR_OUT_OF_RESOURCES if the cache is full. * * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param pDte The device table entry. * @param fOrMask The device flags (usually compound flags) to OR in with the * basic flags, see IOMMU_DTE_CACHE_F_XXX. */ static int iommuAmdDteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, uint16_t fOrMask) { Assert(pDte); Assert(idDevice); int rc = VINF_SUCCESS; uint16_t const fFlags = iommuAmdGetBasicDevFlags(pDte) | IOMMU_DTE_CACHE_F_PRESENT | fOrMask; uint16_t const idDomain = pDte->n.u16DomainId; PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache); uint16_t idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice); if (idxDte < cDteCache) { pThis->aDteCache[idxDte].fFlags = fFlags; pThis->aDteCache[idxDte].idDomain = idDomain; } else if ((idxDte = iommuAmdDteCacheEntryGetUnused(pThis)) < cDteCache) { pThis->aDeviceIds[idxDte] = idDevice; pThis->aDteCache[idxDte].fFlags = fFlags; pThis->aDteCache[idxDte].idDomain = idDomain; } else rc = VERR_OUT_OF_RESOURCES; IOMMU_CACHE_UNLOCK(pDevIns, pThis); return rc; } /** * Adds one or more I/O device flags if the device is already present in the cache. * * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param fFlags Additional device flags to OR with existing flags, see * IOMMU_DTE_CACHE_F_XXX. */ static void iommuAmdDteCacheAddFlags(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t fFlags) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache); uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice); if ( idxDte < cDteCache && (pThis->aDteCache[idxDte].fFlags & IOMMU_DTE_CACHE_F_PRESENT)) pThis->aDteCache[idxDte].fFlags |= fFlags; IOMMU_CACHE_UNLOCK(pDevIns, pThis); } # ifdef IN_RING3 /** * Removes a DTE cache entry. * * @param pDevIns The IOMMU instance data. * @param idDevice The device ID to remove cache entries for. */ static void iommuAmdDteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const cDteCache = RT_ELEMENTS(pThis->aDteCache); uint16_t const idxDte = iommuAmdDteCacheEntryLookup(pThis, idDevice); if (idxDte < cDteCache) { pThis->aDteCache[idxDte].fFlags = 0; pThis->aDteCache[idxDte].idDomain = 0; } IOMMU_CACHE_UNLOCK(pDevIns, pThis); } /** * Removes all entries in the device table entry cache. * * @param pDevIns The IOMMU instance data. */ static void iommuAmdDteCacheRemoveAll(PPDMDEVINS pDevIns) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); RT_ZERO(pThis->aDeviceIds); RT_ZERO(pThis->aDteCache); IOMMU_CACHE_UNLOCK(pDevIns, pThis); } # endif /* IN_RING3 */ #endif /* IOMMU_WITH_DTE_CACHE */ #ifdef IOMMU_WITH_IOTLBE_CACHE /** * Moves the IOTLB entry to the least recently used slot. * * @param pThisR3 The ring-3 IOMMU device state. * @param pIotlbe The IOTLB entry to move. */ DECLINLINE(void) iommuAmdIotlbEntryMoveToLru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe) { if (!RTListNodeIsFirst(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru)) { RTListNodeRemove(&pIotlbe->NdLru); RTListPrepend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru); } } /** * Moves the IOTLB entry to the most recently used slot. * * @param pThisR3 The ring-3 IOMMU device state. * @param pIotlbe The IOTLB entry to move. */ DECLINLINE(void) iommuAmdIotlbEntryMoveToMru(PIOMMUR3 pThisR3, PIOTLBE pIotlbe) { if (!RTListNodeIsLast(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru)) { RTListNodeRemove(&pIotlbe->NdLru); RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru); } } # ifdef IN_RING3 /** * Dumps the IOTLB entry via the debug info helper. * * @returns VINF_SUCCESS. * @param pNode Pointer to an IOTLB entry to dump info. * @param pvUser Pointer to an IOTLBEINFOARG. */ static DECLCALLBACK(int) iommuAmdR3IotlbEntryInfo(PAVLU64NODECORE pNode, void *pvUser) { /* Validate. */ PCIOTLBEINFOARG pArgs = (PCIOTLBEINFOARG)pvUser; AssertPtr(pArgs); AssertPtr(pArgs->pIommuR3); AssertPtr(pArgs->pHlp); //Assert(pArgs->pIommuCC->u32Magic == IOMMU_MAGIC); uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key); if (idDomain == pArgs->idDomain) { PCIOTLBE pIotlbe = (PCIOTLBE)pNode; AVLU64KEY const uKey = pIotlbe->Core.Key; uint64_t const uIova = IOMMU_IOTLB_KEY_GET_IOVA(uKey); RTGCPHYS const GCPhysSpa = pIotlbe->PageLookup.GCPhysSpa; uint8_t const cShift = pIotlbe->PageLookup.cShift; size_t const cbPage = RT_BIT_64(cShift); uint8_t const fPerm = pIotlbe->PageLookup.fPerm; const char *pszPerm = iommuAmdMemAccessGetPermName(fPerm); bool const fEvictPending = pIotlbe->fEvictPending; PCDBGFINFOHLP pHlp = pArgs->pHlp; pHlp->pfnPrintf(pHlp, " Key = %#RX64 (%#RX64)\n", uKey, uIova); pHlp->pfnPrintf(pHlp, " GCPhys = %#RGp\n", GCPhysSpa); pHlp->pfnPrintf(pHlp, " cShift = %u (%zu bytes)\n", cShift, cbPage); pHlp->pfnPrintf(pHlp, " fPerm = %#x (%s)\n", fPerm, pszPerm); pHlp->pfnPrintf(pHlp, " fEvictPending = %RTbool\n", fEvictPending); } return VINF_SUCCESS; } # endif /* IN_RING3 */ /** * Removes the IOTLB entry if it's associated with the specified domain ID. * * @returns VINF_SUCCESS. * @param pNode Pointer to an IOTLBE. * @param pvUser Pointer to an IOTLBEFLUSHARG containing the domain ID. */ static DECLCALLBACK(int) iommuAmdIotlbEntryRemoveDomainId(PAVLU64NODECORE pNode, void *pvUser) { /* Validate. */ PCIOTLBEFLUSHARG pArgs = (PCIOTLBEFLUSHARG)pvUser; AssertPtr(pArgs); AssertPtr(pArgs->pIommuR3); //Assert(pArgs->pIommuR3->u32Magic == IOMMU_MAGIC); uint16_t const idDomain = IOMMU_IOTLB_KEY_GET_DOMAIN_ID(pNode->Key); if (idDomain == pArgs->idDomain) { /* Mark this entry is as invalidated and needs to be evicted later. */ PIOTLBE pIotlbe = (PIOTLBE)pNode; pIotlbe->fEvictPending = true; iommuAmdIotlbEntryMoveToLru(pArgs->pIommuR3, (PIOTLBE)pNode); } return VINF_SUCCESS; } /** * Inserts an IOTLB entry into the cache. * * @param pThis The shared IOMMU device state. * @param pThisR3 The ring-3 IOMMU device state. * @param pIotlbe The IOTLB entry to initialize and insert. * @param idDomain The domain ID. * @param uIova The I/O virtual address. * @param pPageLookup The I/O page lookup result of the access. */ static void iommuAmdIotlbEntryInsert(PIOMMU pThis, PIOMMUR3 pThisR3, PIOTLBE pIotlbe, uint16_t idDomain, uint64_t uIova, PCIOPAGELOOKUP pPageLookup) { /* Initialize the IOTLB entry with results of the I/O page walk. */ pIotlbe->Core.Key = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova); pIotlbe->PageLookup = *pPageLookup; /* Validate. */ Assert(pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL); Assert(!pIotlbe->fEvictPending); /* Check if the entry already exists. */ PIOTLBE pFound = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, pIotlbe->Core.Key); if (!pFound) { /* Insert the entry into the cache. */ bool const fInserted = RTAvlU64Insert(&pThisR3->TreeIotlbe, &pIotlbe->Core); Assert(fInserted); NOREF(fInserted); Assert(pThisR3->cCachedIotlbes < IOMMU_IOTLBE_MAX); ++pThisR3->cCachedIotlbes; STAM_COUNTER_INC(&pThis->StatIotlbeCached); NOREF(pThis); } else { /* Update the existing entry. */ if (pFound->fEvictPending) { pFound->fEvictPending = false; STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse); NOREF(pThis); } Assert(pFound->PageLookup.cShift == pPageLookup->cShift); pFound->PageLookup.fPerm = pPageLookup->fPerm; pFound->PageLookup.GCPhysSpa = pPageLookup->GCPhysSpa; } } /** * Removes an IOTLB entry from the cache for the given key. * * @returns Pointer to the removed IOTLB entry, NULL if the entry wasn't found in * the tree. * @param pThis The shared IOMMU device state. * @param pThisR3 The ring-3 IOMMU device state. * @param uKey The key of the IOTLB entry to remove. */ static PIOTLBE iommuAmdIotlbEntryRemove(PIOMMU pThis, PIOMMUR3 pThisR3, AVLU64KEY uKey) { PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Remove(&pThisR3->TreeIotlbe, uKey); if (pIotlbe) { if (pIotlbe->fEvictPending) STAM_COUNTER_INC(&pThis->StatIotlbeLazyEvictReuse); RT_ZERO(pIotlbe->Core); RT_ZERO(pIotlbe->PageLookup); /* We must not erase the LRU node connections here! */ pIotlbe->fEvictPending = false; Assert(pIotlbe->Core.Key == IOMMU_IOTLB_KEY_NIL); Assert(pThisR3->cCachedIotlbes > 0); --pThisR3->cCachedIotlbes; STAM_COUNTER_DEC(&pThis->StatIotlbeCached); NOREF(pThis); } return pIotlbe; } /** * Looks up an IOTLB from the cache. * * @returns Pointer to IOTLB entry if found, NULL otherwise. * @param pThis The shared IOMMU device state. * @param pThisR3 The ring-3 IOMMU device state. * @param idDomain The domain ID. * @param uIova The I/O virtual address. */ static PIOTLBE iommuAmdIotlbLookup(PIOMMU pThis, PIOMMUR3 pThisR3, uint64_t idDomain, uint64_t uIova) { RT_NOREF(pThis); uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova); PIOTLBE pIotlbe = (PIOTLBE)RTAvlU64Get(&pThisR3->TreeIotlbe, uKey); if ( pIotlbe && !pIotlbe->fEvictPending) return pIotlbe; /* * Domain Id wildcard invalidations only marks entries for eviction later but doesn't remove * them from the cache immediately. We found an entry pending eviction, just return that * nothing was found (rather than evicting now). */ return NULL; } /** * Adds an IOTLB entry to the cache. * * @param pThis The shared IOMMU device state. * @param pThisR3 The ring-3 IOMMU device state. * @param idDomain The domain ID. * @param uIova The I/O virtual address. * @param pPageLookup The I/O page lookup result of the access. */ static void iommuAmdIotlbAdd(PIOMMU pThis, PIOMMUR3 pThisR3, uint16_t idDomain, uint64_t uIova, PCIOPAGELOOKUP pPageLookup) { Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK)); Assert(pPageLookup); Assert(pPageLookup->cShift <= 31); Assert(pPageLookup->fPerm != IOMMU_IO_PERM_NONE); /* * If there are no unused IOTLB entries, evict the LRU entry. * Otherwise, get a new IOTLB entry from the pre-allocated list. */ if (pThisR3->idxUnusedIotlbe == IOMMU_IOTLBE_MAX) { /* Grab the least recently used entry. */ PIOTLBE pIotlbe = RTListGetFirst(&pThisR3->LstLruIotlbe, IOTLBE, NdLru); Assert(pIotlbe); /* If the entry is in the cache, remove it. */ if (pIotlbe->Core.Key != IOMMU_IOTLB_KEY_NIL) iommuAmdIotlbEntryRemove(pThis, pThisR3, pIotlbe->Core.Key); /* Initialize and insert the IOTLB entry into the cache. */ iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIova, pPageLookup); /* Move the entry to the most recently used slot. */ iommuAmdIotlbEntryMoveToMru(pThisR3, pIotlbe); } else { /* Grab an unused IOTLB entry from the pre-allocated list. */ PIOTLBE pIotlbe = &pThisR3->paIotlbes[pThisR3->idxUnusedIotlbe]; ++pThisR3->idxUnusedIotlbe; /* Initialize and insert the IOTLB entry into the cache. */ iommuAmdIotlbEntryInsert(pThis, pThisR3, pIotlbe, idDomain, uIova, pPageLookup); /* Add the entry to the most recently used slot. */ RTListAppend(&pThisR3->LstLruIotlbe, &pIotlbe->NdLru); } } /** * Removes all IOTLB entries from the cache. * * @param pDevIns The IOMMU instance data. */ static void iommuAmdIotlbRemoveAll(PPDMDEVINS pDevIns) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); IOMMU_CACHE_LOCK(pDevIns, pThis); if (pThisR3->cCachedIotlbes > 0) { size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX; RT_BZERO(pThisR3->paIotlbes, cbIotlbes); pThisR3->idxUnusedIotlbe = 0; pThisR3->cCachedIotlbes = 0; STAM_COUNTER_RESET(&pThis->StatIotlbeCached); RTListInit(&pThisR3->LstLruIotlbe); } IOMMU_CACHE_UNLOCK(pDevIns, pThis); } /** * Removes IOTLB entries for the range of I/O virtual addresses and the specified * domain ID from the cache. * * @param pDevIns The IOMMU instance data. * @param idDomain The domain ID. * @param uIova The I/O virtual address to invalidate. * @param cbInvalidate The size of the invalidation (must be 4K aligned). */ static void iommuAmdIotlbRemoveRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIova, size_t cbInvalidate) { /* Validate. */ Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK)); Assert(!(cbInvalidate & X86_PAGE_4K_OFFSET_MASK)); Assert(cbInvalidate >= X86_PAGE_4K_SIZE); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); IOMMU_CACHE_LOCK(pDevIns, pThis); do { uint64_t const uKey = IOMMU_IOTLB_KEY_MAKE(idDomain, uIova); PIOTLBE pIotlbe = iommuAmdIotlbEntryRemove(pThis, pThisR3, uKey); if (pIotlbe) iommuAmdIotlbEntryMoveToLru(pThisR3, pIotlbe); uIova += X86_PAGE_4K_SIZE; cbInvalidate -= X86_PAGE_4K_SIZE; } while (cbInvalidate > 0); IOMMU_CACHE_UNLOCK(pDevIns, pThis); } /** * Removes all IOTLB entries for the specified domain ID. * * @param pDevIns The IOMMU instance data. * @param idDomain The domain ID. */ static void iommuAmdIotlbRemoveDomainId(PPDMDEVINS pDevIns, uint16_t idDomain) { /* * We need to iterate the tree and search based on the domain ID. * But it seems we cannot remove items while iterating the tree. * Thus, we simply mark entries for eviction later but move them to the LRU * so they will eventually get evicted and re-cycled as the cache gets re-populated. */ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); IOMMU_CACHE_LOCK(pDevIns, pThis); IOTLBEFLUSHARG Args; Args.pIommuR3 = pThisR3; Args.idDomain = idDomain; RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdIotlbEntryRemoveDomainId, &Args); IOMMU_CACHE_UNLOCK(pDevIns, pThis); } /** * Adds or updates IOTLB entries for the given range of I/O virtual addresses. * * @param pDevIns The IOMMU instance data. * @param idDomain The domain ID. * @param uIova The I/O virtual address. * @param cbIova The size of the access (must be 4K aligned). * @param GCPhysSpa The translated system-physical address. * @param fPerm The I/O permissions for the access, see IOMMU_IO_PERM_XXX. */ static void iommuAmdIotlbAddRange(PPDMDEVINS pDevIns, uint16_t idDomain, uint64_t uIova, size_t cbIova, RTGCPHYS GCPhysSpa, uint8_t fPerm) { Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK)); Assert(!(GCPhysSpa & X86_PAGE_4K_OFFSET_MASK)); Assert(!(cbIova & X86_PAGE_4K_OFFSET_MASK)); Assert(cbIova >= X86_PAGE_4K_SIZE); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); /* Add IOTLB entries for every page in the access. */ IOPAGELOOKUP PageLookup; RT_ZERO(PageLookup); PageLookup.cShift = X86_PAGE_4K_SHIFT; PageLookup.fPerm = fPerm; PageLookup.GCPhysSpa = GCPhysSpa; size_t cPages = cbIova / X86_PAGE_4K_SIZE; cPages = RT_MIN(cPages, IOMMU_IOTLBE_MAX); IOMMU_CACHE_LOCK(pDevIns, pThis); /** @todo Re-check DTE cache? */ do { iommuAmdIotlbAdd(pThis, pThisR3, idDomain, uIova, &PageLookup); uIova += X86_PAGE_4K_SIZE; PageLookup.GCPhysSpa += X86_PAGE_4K_SIZE; --cPages; } while (cPages > 0); IOMMU_CACHE_UNLOCK(pDevIns, pThis); } #endif /* IOMMU_WITH_IOTLBE_CACHE */ #ifdef IOMMU_WITH_IRTE_CACHE /** * Looks up an IRTE cache entry. * * @returns Index of the found entry, or cache capacity if not found. * @param pThis The shared IOMMU device state. * @param idDevice The device ID (bus, device, function). * @param offIrte The offset into the interrupt remap table. */ static uint16_t iommuAmdIrteCacheEntryLookup(PCIOMMU pThis, uint16_t idDevice, uint16_t offIrte) { /** @todo Consider sorting and binary search when the cache capacity grows. * For the IRTE cache this should be okay since typically guests do not alter the * interrupt remapping once programmed, so hopefully sorting shouldn't happen * often. */ uint32_t const uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte); uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); for (uint16_t i = 0; i < cIrteCache; i++) if (pThis->aIrteCache[i].uKey == uKey) return i; return cIrteCache; } /** * Gets a free/unused IRTE cache entry. * * @returns The index of an unused entry, or cache capacity if the cache is full. * @param pThis The shared IOMMU device state. */ static uint16_t iommuAmdIrteCacheEntryGetUnused(PCIOMMU pThis) { uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); for (uint16_t i = 0; i < cIrteCache; i++) if (pThis->aIrteCache[i].uKey == IOMMU_IRTE_CACHE_KEY_NIL) { Assert(!pThis->aIrteCache[i].Irte.u32); return i; } return cIrteCache; } /** * Looks up the IRTE cache for the given MSI. * * @returns VBox status code. * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param enmOp The IOMMU operation being performed. * @param pMsiIn The source MSI. * @param pMsiOut Where to store the remapped MSI. */ static int iommuAmdIrteCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) { RT_NOREF(enmOp); /* May need it if we have to report errors (currently we fallback to the slower path to do that). */ int rc = VERR_NOT_FOUND; /* Deal with such cases in the slower/fallback path. */ if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE) { /* likely */ } else return rc; PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice); if (idxDteCache < RT_ELEMENTS(pThis->aDteCache)) { PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache]; if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID)) == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_INTR_MAP_VALID)) { Assert((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE); /* Paranoia. */ /* Currently, we only cache remapping of fixed and arbitrated interrupts. */ uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode; if (u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO) { uint8_t const uIntrCtrl = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_INTR_CTRL_SHIFT) & IOMMU_DTE_CACHE_F_INTR_CTRL_MASK; if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP) { /* Interrupt table length has been verified prior to adding entries to the cache. */ uint16_t const offIrte = IOMMU_GET_IRTE_OFF(pMsiIn->Data.u32); uint16_t const idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte); if (idxIrteCache < RT_ELEMENTS(pThis->aIrteCache)) { PCIRTE_T pIrte = &pThis->aIrteCache[idxIrteCache].Irte; Assert(pIrte->n.u1RemapEnable); Assert(pIrte->n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO); iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, pIrte); rc = VINF_SUCCESS; } } else if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED) { *pMsiOut = *pMsiIn; rc = VINF_SUCCESS; } } } else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT) { *pMsiOut = *pMsiIn; rc = VINF_SUCCESS; } } IOMMU_CACHE_UNLOCK(pDevIns, pThis); return rc; } /** * Adds or updates the IRTE cache for the given IRTE. * * @returns VBox status code. * @retval VERR_OUT_OF_RESOURCES if the cache is full. * * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param offIrte The offset into the interrupt remap table. * @param pIrte The IRTE to cache. */ static int iommuAmdIrteCacheAdd(PPDMDEVINS pDevIns, uint16_t idDevice, uint16_t offIrte, PCIRTE_T pIrte) { Assert(offIrte != 0xffff); /* Shouldn't be a valid IRTE table offset since sizeof(IRTE) is a multiple of 4. */ int rc = VINF_SUCCESS; PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); /* Find an existing entry or get an unused slot. */ uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); uint16_t idxIrteCache = iommuAmdIrteCacheEntryLookup(pThis, idDevice, offIrte); if ( idxIrteCache < cIrteCache || (idxIrteCache = iommuAmdIrteCacheEntryGetUnused(pThis)) < cIrteCache) { pThis->aIrteCache[idxIrteCache].uKey = IOMMU_IRTE_CACHE_KEY_MAKE(idDevice, offIrte); pThis->aIrteCache[idxIrteCache].Irte = *pIrte; } else rc = VERR_OUT_OF_RESOURCES; IOMMU_CACHE_UNLOCK(pDevIns, pThis); return rc; } # ifdef IN_RING3 /** * Removes IRTE cache entries for the given device ID. * * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). */ static void iommuAmdIrteCacheRemove(PPDMDEVINS pDevIns, uint16_t idDevice) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); for (uint16_t i = 0; i < cIrteCache; i++) { PIRTECACHE pIrteCache = &pThis->aIrteCache[i]; if (idDevice == IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(pIrteCache->uKey)) { pIrteCache->uKey = IOMMU_IRTE_CACHE_KEY_NIL; pIrteCache->Irte.u32 = 0; /* There could multiple IRTE entries for a device ID, continue searching. */ } } IOMMU_CACHE_UNLOCK(pDevIns, pThis); } /** * Removes all IRTE cache entries. * * @param pDevIns The IOMMU instance data. */ static void iommuAmdIrteCacheRemoveAll(PPDMDEVINS pDevIns) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); for (uint16_t i = 0; i < cIrteCache; i++) { pThis->aIrteCache[i].uKey = IOMMU_IRTE_CACHE_KEY_NIL; pThis->aIrteCache[i].Irte.u32 = 0; } IOMMU_CACHE_UNLOCK(pDevIns, pThis); } # endif /* IN_RING3 */ #endif /* IOMMU_WITH_IRTE_CACHE */ /** * Atomically reads the control register without locking the IOMMU device. * * @returns The control register. * @param pThis The shared IOMMU device state. */ DECL_FORCE_INLINE(IOMMU_CTRL_T) iommuAmdGetCtrlUnlocked(PCIOMMU pThis) { IOMMU_CTRL_T Ctrl; Ctrl.u64 = ASMAtomicReadU64((volatile uint64_t *)&pThis->Ctrl.u64); return Ctrl; } /** * Returns whether MSI is enabled for the IOMMU. * * @returns Whether MSI is enabled. * @param pDevIns The IOMMU device instance. * * @note There should be a PCIDevXxx function for this. */ static bool iommuAmdIsMsiEnabled(PPDMDEVINS pDevIns) { MSI_CAP_HDR_T MsiCapHdr; MsiCapHdr.u32 = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_MSI_CAP_HDR); return MsiCapHdr.n.u1MsiEnable; } /** * Signals a PCI target abort. * * @param pDevIns The IOMMU device instance. */ static void iommuAmdSetPciTargetAbort(PPDMDEVINS pDevIns) { PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; uint16_t const u16Status = PDMPciDevGetStatus(pPciDev) | VBOX_PCI_STATUS_SIG_TARGET_ABORT; PDMPciDevSetStatus(pPciDev, u16Status); } /** * Wakes up the command thread if there are commands to be processed. * * @param pDevIns The IOMMU device instance. * * @remarks The IOMMU lock must be held while calling this! */ static void iommuAmdCmdThreadWakeUpIfNeeded(PPDMDEVINS pDevIns) { Log4Func(("\n")); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); if ( pThis->Status.n.u1CmdBufRunning && pThis->CmdBufTailPtr.n.off != pThis->CmdBufHeadPtr.n.off && !ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, true)) { Log4Func(("Signaling command thread\n")); PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread); } } /** * Reads the Device Table Base Address Register. */ static VBOXSTRICTRC iommuAmdDevTabBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->aDevTabBaseAddrs[0].u64; return VINF_SUCCESS; } /** * Reads the Command Buffer Base Address Register. */ static VBOXSTRICTRC iommuAmdCmdBufBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->CmdBufBaseAddr.u64; return VINF_SUCCESS; } /** * Reads the Event Log Base Address Register. */ static VBOXSTRICTRC iommuAmdEvtLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->EvtLogBaseAddr.u64; return VINF_SUCCESS; } /** * Reads the Control Register. */ static VBOXSTRICTRC iommuAmdCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->Ctrl.u64; return VINF_SUCCESS; } /** * Reads the Exclusion Range Base Address Register. */ static VBOXSTRICTRC iommuAmdExclRangeBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->ExclRangeBaseAddr.u64; return VINF_SUCCESS; } /** * Reads to the Exclusion Range Limit Register. */ static VBOXSTRICTRC iommuAmdExclRangeLimit_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->ExclRangeLimit.u64; return VINF_SUCCESS; } /** * Reads to the Extended Feature Register. */ static VBOXSTRICTRC iommuAmdExtFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->ExtFeat.u64; return VINF_SUCCESS; } /** * Reads to the PPR Log Base Address Register. */ static VBOXSTRICTRC iommuAmdPprLogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->PprLogBaseAddr.u64; return VINF_SUCCESS; } /** * Writes the Hardware Event Register (Hi). */ static VBOXSTRICTRC iommuAmdHwEvtHi_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->HwEvtHi.u64; return VINF_SUCCESS; } /** * Reads the Hardware Event Register (Lo). */ static VBOXSTRICTRC iommuAmdHwEvtLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->HwEvtLo; return VINF_SUCCESS; } /** * Reads the Hardware Event Status Register. */ static VBOXSTRICTRC iommuAmdHwEvtStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->HwEvtStatus.u64; return VINF_SUCCESS; } /** * Reads to the GA Log Base Address Register. */ static VBOXSTRICTRC iommuAmdGALogBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->GALogBaseAddr.u64; return VINF_SUCCESS; } /** * Reads to the PPR Log B Base Address Register. */ static VBOXSTRICTRC iommuAmdPprLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->PprLogBBaseAddr.u64; return VINF_SUCCESS; } /** * Reads to the Event Log B Base Address Register. */ static VBOXSTRICTRC iommuAmdEvtLogBBaseAddr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->EvtLogBBaseAddr.u64; return VINF_SUCCESS; } /** * Reads the Device Table Segment Base Address Register. */ static VBOXSTRICTRC iommuAmdDevTabSegBar_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns); /* Figure out which segment is being written. */ uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3; uint8_t const idxSegment = offSegment + 1; Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs)); *pu64Value = pThis->aDevTabBaseAddrs[idxSegment].u64; return VINF_SUCCESS; } /** * Reads the Device Specific Feature Extension (DSFX) Register. */ static VBOXSTRICTRC iommuAmdDevSpecificFeat_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->DevSpecificFeat.u64; return VINF_SUCCESS; } /** * Reads the Device Specific Control Extension (DSCX) Register. */ static VBOXSTRICTRC iommuAmdDevSpecificCtrl_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->DevSpecificCtrl.u64; return VINF_SUCCESS; } /** * Reads the Device Specific Status Extension (DSSX) Register. */ static VBOXSTRICTRC iommuAmdDevSpecificStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->DevSpecificStatus.u64; return VINF_SUCCESS; } /** * Reads the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit). */ static VBOXSTRICTRC iommuAmdDevMsiVector_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); uint32_t const uLo = pThis->MiscInfo.au32[0]; uint32_t const uHi = pThis->MiscInfo.au32[1]; *pu64Value = RT_MAKE_U64(uLo, uHi); return VINF_SUCCESS; } /** * Reads the MSI Capability Header Register (32-bit) and the MSI Address (Lo) * Register (32-bit). */ static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pThis, offReg); PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR); uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO); *pu64Value = RT_MAKE_U64(uLo, uHi); return VINF_SUCCESS; } /** * Reads the MSI Address (Hi) Register (32-bit) and the MSI data register (32-bit). */ static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pThis, offReg); PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); uint32_t const uLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI); uint32_t const uHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA); *pu64Value = RT_MAKE_U64(uLo, uHi); return VINF_SUCCESS; } /** * Reads the Command Buffer Head Pointer Register. */ static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->CmdBufHeadPtr.u64; return VINF_SUCCESS; } /** * Reads the Command Buffer Tail Pointer Register. */ static VBOXSTRICTRC iommuAmdCmdBufTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->CmdBufTailPtr.u64; return VINF_SUCCESS; } /** * Reads the Event Log Head Pointer Register. */ static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->EvtLogHeadPtr.u64; return VINF_SUCCESS; } /** * Reads the Event Log Tail Pointer Register. */ static VBOXSTRICTRC iommuAmdEvtLogTailPtr_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->EvtLogTailPtr.u64; return VINF_SUCCESS; } /** * Reads the Status Register. */ static VBOXSTRICTRC iommuAmdStatus_r(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t *pu64Value) { RT_NOREF(pDevIns, offReg); *pu64Value = pThis->Status.u64; return VINF_SUCCESS; } /** * Writes the Device Table Base Address Register. */ static VBOXSTRICTRC iommuAmdDevTabBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* Mask out all unrecognized bits. */ u64Value &= IOMMU_DEV_TAB_BAR_VALID_MASK; /* Update the register. */ pThis->aDevTabBaseAddrs[0].u64 = u64Value; /* Paranoia. */ Assert(pThis->aDevTabBaseAddrs[0].n.u9Size <= g_auDevTabSegMaxSizes[0]); return VINF_SUCCESS; } /** * Writes the Command Buffer Base Address Register. */ static VBOXSTRICTRC iommuAmdCmdBufBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* * While this is not explicitly specified like the event log base address register, * the AMD IOMMU spec. does specify "CmdBufRun must be 0b to modify the command buffer registers properly". * Inconsistent specs :/ */ if (pThis->Status.n.u1CmdBufRunning) { LogFunc(("Setting CmdBufBar (%#RX64) when command buffer is running -> Ignored\n", u64Value)); return VINF_SUCCESS; } /* Mask out all unrecognized bits. */ CMD_BUF_BAR_T CmdBufBaseAddr; CmdBufBaseAddr.u64 = u64Value & IOMMU_CMD_BUF_BAR_VALID_MASK; /* Validate the length. */ if (CmdBufBaseAddr.n.u4Len >= 8) { /* Update the register. */ pThis->CmdBufBaseAddr.u64 = CmdBufBaseAddr.u64; /* * Writing the command buffer base address, clears the command buffer head and tail pointers. * See AMD IOMMU spec. 2.4 "Commands". */ pThis->CmdBufHeadPtr.u64 = 0; pThis->CmdBufTailPtr.u64 = 0; } else LogFunc(("Command buffer length (%#x) invalid -> Ignored\n", CmdBufBaseAddr.n.u4Len)); return VINF_SUCCESS; } /** * Writes the Event Log Base Address Register. */ static VBOXSTRICTRC iommuAmdEvtLogBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* * IOMMU behavior is undefined when software writes this register when event logging is running. * In our emulation, we ignore the write entirely. * See AMD IOMMU spec. "Event Log Base Address Register". */ if (pThis->Status.n.u1EvtLogRunning) { LogFunc(("Setting EvtLogBar (%#RX64) when event logging is running -> Ignored\n", u64Value)); return VINF_SUCCESS; } /* Mask out all unrecognized bits. */ u64Value &= IOMMU_EVT_LOG_BAR_VALID_MASK; EVT_LOG_BAR_T EvtLogBaseAddr; EvtLogBaseAddr.u64 = u64Value; /* Validate the length. */ if (EvtLogBaseAddr.n.u4Len >= 8) { /* Update the register. */ pThis->EvtLogBaseAddr.u64 = EvtLogBaseAddr.u64; /* * Writing the event log base address, clears the event log head and tail pointers. * See AMD IOMMU spec. 2.5 "Event Logging". */ pThis->EvtLogHeadPtr.u64 = 0; pThis->EvtLogTailPtr.u64 = 0; } else LogFunc(("Event log length (%#x) invalid -> Ignored\n", EvtLogBaseAddr.n.u4Len)); return VINF_SUCCESS; } /** * Writes the Control Register. */ static VBOXSTRICTRC iommuAmdCtrl_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* Mask out all unrecognized bits. */ u64Value &= IOMMU_CTRL_VALID_MASK; IOMMU_CTRL_T NewCtrl; NewCtrl.u64 = u64Value; /* Ensure the device table segments are within limits. */ if (NewCtrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup) { IOMMU_CTRL_T const OldCtrl = pThis->Ctrl; /* Update the register. */ ASMAtomicWriteU64(&pThis->Ctrl.u64, NewCtrl.u64); bool const fNewIommuEn = NewCtrl.n.u1IommuEn; bool const fOldIommuEn = OldCtrl.n.u1IommuEn; /* Enable or disable event logging when the bit transitions. */ bool const fOldEvtLogEn = OldCtrl.n.u1EvtLogEn; bool const fNewEvtLogEn = NewCtrl.n.u1EvtLogEn; if ( fOldEvtLogEn != fNewEvtLogEn || fOldIommuEn != fNewIommuEn) { if ( fNewIommuEn && fNewEvtLogEn) { ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_OVERFLOW); ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_RUNNING); } else ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_EVT_LOG_RUNNING); } /* Enable or disable command buffer processing when the bit transitions. */ bool const fOldCmdBufEn = OldCtrl.n.u1CmdBufEn; bool const fNewCmdBufEn = NewCtrl.n.u1CmdBufEn; if ( fOldCmdBufEn != fNewCmdBufEn || fOldIommuEn != fNewIommuEn) { if ( fNewCmdBufEn && fNewIommuEn) { ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_CMD_BUF_RUNNING); LogFunc(("Command buffer enabled\n")); /* Wake up the command thread to start processing commands if any. */ iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); } else { ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING); LogFunc(("Command buffer disabled\n")); } } } else { LogFunc(("Invalid number of device table segments enabled, exceeds %#x (%#RX64) -> Ignored!\n", pThis->ExtFeat.n.u2DevTabSegSup, NewCtrl.u64)); } return VINF_SUCCESS; } /** * Writes to the Exclusion Range Base Address Register. */ static VBOXSTRICTRC iommuAmdExclRangeBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); pThis->ExclRangeBaseAddr.u64 = u64Value & IOMMU_EXCL_RANGE_BAR_VALID_MASK; return VINF_SUCCESS; } /** * Writes to the Exclusion Range Limit Register. */ static VBOXSTRICTRC iommuAmdExclRangeLimit_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); u64Value &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK; u64Value |= UINT64_C(0xfff); pThis->ExclRangeLimit.u64 = u64Value; return VINF_SUCCESS; } /** * Writes the Hardware Event Register (Hi). */ static VBOXSTRICTRC iommuAmdHwEvtHi_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */ RT_NOREF(pDevIns, offReg); LogFlowFunc(("Writing %#RX64 to hardware event (Hi) register!\n", u64Value)); pThis->HwEvtHi.u64 = u64Value; return VINF_SUCCESS; } /** * Writes the Hardware Event Register (Lo). */ static VBOXSTRICTRC iommuAmdHwEvtLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { /** @todo IOMMU: Why the heck is this marked read/write by the AMD IOMMU spec? */ RT_NOREF(pDevIns, offReg); LogFlowFunc(("Writing %#RX64 to hardware event (Lo) register!\n", u64Value)); pThis->HwEvtLo = u64Value; return VINF_SUCCESS; } /** * Writes the Hardware Event Status Register. */ static VBOXSTRICTRC iommuAmdHwEvtStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* Mask out all unrecognized bits. */ u64Value &= IOMMU_HW_EVT_STATUS_VALID_MASK; /* * The two bits (HEO and HEV) are RW1C (Read/Write 1-to-Clear; writing 0 has no effect). * If the current status bits or the bits being written are both 0, we've nothing to do. * The Overflow bit (bit 1) is only valid when the Valid bit (bit 0) is 1. */ uint64_t HwStatus = pThis->HwEvtStatus.u64; if (!(HwStatus & RT_BIT(0))) return VINF_SUCCESS; if (u64Value & HwStatus & RT_BIT_64(0)) HwStatus &= ~RT_BIT_64(0); if (u64Value & HwStatus & RT_BIT_64(1)) HwStatus &= ~RT_BIT_64(1); /* Update the register. */ pThis->HwEvtStatus.u64 = HwStatus; return VINF_SUCCESS; } /** * Writes the Device Table Segment Base Address Register. */ static VBOXSTRICTRC iommuAmdDevTabSegBar_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns); /* Figure out which segment is being written. */ uint8_t const offSegment = (offReg - IOMMU_MMIO_OFF_DEV_TAB_SEG_FIRST) >> 3; uint8_t const idxSegment = offSegment + 1; Assert(idxSegment < RT_ELEMENTS(pThis->aDevTabBaseAddrs)); /* Mask out all unrecognized bits. */ u64Value &= IOMMU_DEV_TAB_SEG_BAR_VALID_MASK; DEV_TAB_BAR_T DevTabSegBar; DevTabSegBar.u64 = u64Value; /* Validate the size. */ uint16_t const uSegSize = DevTabSegBar.n.u9Size; uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[idxSegment]; if (uSegSize <= uMaxSegSize) { /* Update the register. */ pThis->aDevTabBaseAddrs[idxSegment].u64 = u64Value; } else LogFunc(("Device table segment (%u) size invalid (%#RX32) -> Ignored\n", idxSegment, uSegSize)); return VINF_SUCCESS; } /** * Writes the MSI Vector Register 0 (32-bit) and the MSI Vector Register 1 (32-bit). */ static VBOXSTRICTRC iommuAmdDevMsiVector_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* MSI Vector Register 0 is read-only. */ /* MSI Vector Register 1. */ uint32_t const uReg = u64Value >> 32; pThis->MiscInfo.au32[1] = uReg & IOMMU_MSI_VECTOR_1_VALID_MASK; return VINF_SUCCESS; } /** * Writes the MSI Capability Header Register (32-bit) or the MSI Address (Lo) * Register (32-bit). */ static VBOXSTRICTRC iommuAmdMsiCapHdrAndAddrLo_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pThis, offReg); PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); /* MSI capability header. */ { uint32_t const uReg = u64Value; MSI_CAP_HDR_T MsiCapHdr; MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR); MsiCapHdr.n.u1MsiEnable = RT_BOOL(uReg & IOMMU_MSI_CAP_HDR_MSI_EN_MASK); PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR, MsiCapHdr.u32); } /* MSI Address Lo. */ { uint32_t const uReg = u64Value >> 32; uint32_t const uMsiAddrLo = uReg & VBOX_MSI_ADDR_VALID_MASK; PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, uMsiAddrLo); } return VINF_SUCCESS; } /** * Writes the MSI Address (Hi) Register (32-bit) or the MSI data register (32-bit). */ static VBOXSTRICTRC iommuAmdMsiAddrHiAndData_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pThis, offReg); PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); /* MSI Address Hi. */ { uint32_t const uReg = u64Value; PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, uReg); } /* MSI Data. */ { uint32_t const uReg = u64Value >> 32; uint32_t const uMsiData = uReg & VBOX_MSI_DATA_VALID_MASK; PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, uMsiData); } return VINF_SUCCESS; } /** * Writes the Command Buffer Head Pointer Register. */ static VBOXSTRICTRC iommuAmdCmdBufHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* * IOMMU behavior is undefined when software writes this register when the command buffer is running. * In our emulation, we ignore the write entirely. * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". */ if (pThis->Status.n.u1CmdBufRunning) { LogFunc(("Setting CmdBufHeadPtr (%#RX64) when command buffer is running -> Ignored\n", u64Value)); return VINF_SUCCESS; } /* * IOMMU behavior is undefined when software writes a value outside the buffer length. * In our emulation, we ignore the write entirely. */ uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); Assert(cbBuf <= _512K); if (offBuf >= cbBuf) { LogFunc(("Setting CmdBufHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX23) -> Ignored\n", offBuf, cbBuf)); return VINF_SUCCESS; } /* Update the register. */ pThis->CmdBufHeadPtr.au32[0] = offBuf; iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); Log4Func(("Set CmdBufHeadPtr to %#RX32\n", offBuf)); return VINF_SUCCESS; } /** * Writes the Command Buffer Tail Pointer Register. */ static VBOXSTRICTRC iommuAmdCmdBufTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* * IOMMU behavior is undefined when software writes a value outside the buffer length. * In our emulation, we ignore the write entirely. * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". */ uint32_t const offBuf = u64Value & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); Assert(cbBuf <= _512K); if (offBuf >= cbBuf) { LogFunc(("Setting CmdBufTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf)); return VINF_SUCCESS; } /* * IOMMU behavior is undefined if software advances the tail pointer equal to or beyond the * head pointer after adding one or more commands to the buffer. * * However, we cannot enforce this strictly because it's legal for software to shrink the * command queue (by reducing the offset) as well as wrap around the pointer (when head isn't * at 0). Software might even make the queue empty by making head and tail equal which is * allowed. I don't think we can or should try too hard to prevent software shooting itself * in the foot here. As long as we make sure the offset value is within the circular buffer * bounds (which we do by masking bits above) it should be sufficient. */ pThis->CmdBufTailPtr.au32[0] = offBuf; iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); Log4Func(("Set CmdBufTailPtr to %#RX32\n", offBuf)); return VINF_SUCCESS; } /** * Writes the Event Log Head Pointer Register. */ static VBOXSTRICTRC iommuAmdEvtLogHeadPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* * IOMMU behavior is undefined when software writes a value outside the buffer length. * In our emulation, we ignore the write entirely. * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". */ uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); Assert(cbBuf <= _512K); if (offBuf >= cbBuf) { LogFunc(("Setting EvtLogHeadPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf)); return VINF_SUCCESS; } /* Update the register. */ pThis->EvtLogHeadPtr.au32[0] = offBuf; LogFlowFunc(("Set EvtLogHeadPtr to %#RX32\n", offBuf)); return VINF_SUCCESS; } /** * Writes the Event Log Tail Pointer Register. */ static VBOXSTRICTRC iommuAmdEvtLogTailPtr_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); NOREF(pThis); /* * IOMMU behavior is undefined when software writes this register when the event log is running. * In our emulation, we ignore the write entirely. * See AMD IOMMU spec. 3.3.13 "Command and Event Log Pointer Registers". */ if (pThis->Status.n.u1EvtLogRunning) { LogFunc(("Setting EvtLogTailPtr (%#RX64) when event log is running -> Ignored\n", u64Value)); return VINF_SUCCESS; } /* * IOMMU behavior is undefined when software writes a value outside the buffer length. * In our emulation, we ignore the write entirely. */ uint32_t const offBuf = u64Value & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); Assert(cbBuf <= _512K); if (offBuf >= cbBuf) { LogFunc(("Setting EvtLogTailPtr (%#RX32) to a value that exceeds buffer length (%#RX32) -> Ignored\n", offBuf, cbBuf)); return VINF_SUCCESS; } /* Update the register. */ pThis->EvtLogTailPtr.au32[0] = offBuf; LogFlowFunc(("Set EvtLogTailPtr to %#RX32\n", offBuf)); return VINF_SUCCESS; } /** * Writes the Status Register. */ static VBOXSTRICTRC iommuAmdStatus_w(PPDMDEVINS pDevIns, PIOMMU pThis, uint32_t offReg, uint64_t u64Value) { RT_NOREF(pDevIns, offReg); /* Mask out all unrecognized bits. */ u64Value &= IOMMU_STATUS_VALID_MASK; /* * Compute RW1C (read-only, write-1-to-clear) bits and preserve the rest (which are read-only). * Writing 0 to an RW1C bit has no effect. Writing 1 to an RW1C bit, clears the bit if it's already 1. */ IOMMU_STATUS_T const OldStatus = pThis->Status; uint64_t const fOldRw1cBits = (OldStatus.u64 & IOMMU_STATUS_RW1C_MASK); uint64_t const fOldRoBits = (OldStatus.u64 & ~IOMMU_STATUS_RW1C_MASK); uint64_t const fNewRw1cBits = (u64Value & IOMMU_STATUS_RW1C_MASK); uint64_t const uNewStatus = (fOldRw1cBits & ~fNewRw1cBits) | fOldRoBits; /* Update the register. */ ASMAtomicWriteU64(&pThis->Status.u64, uNewStatus); return VINF_SUCCESS; } /** * Register access table 0. * The MMIO offset of each entry must be a multiple of 8! */ static const IOMMUREGACC g_aRegAccess0[] = { /* MMIO off. Register name Read function Write function */ { /* 0x00 */ "DEV_TAB_BAR", iommuAmdDevTabBar_r, iommuAmdDevTabBar_w }, { /* 0x08 */ "CMD_BUF_BAR", iommuAmdCmdBufBar_r, iommuAmdCmdBufBar_w }, { /* 0x10 */ "EVT_LOG_BAR", iommuAmdEvtLogBar_r, iommuAmdEvtLogBar_w }, { /* 0x18 */ "CTRL", iommuAmdCtrl_r, iommuAmdCtrl_w }, { /* 0x20 */ "EXCL_BAR", iommuAmdExclRangeBar_r, iommuAmdExclRangeBar_w }, { /* 0x28 */ "EXCL_RANGE_LIMIT", iommuAmdExclRangeLimit_r, iommuAmdExclRangeLimit_w }, { /* 0x30 */ "EXT_FEAT", iommuAmdExtFeat_r, NULL }, { /* 0x38 */ "PPR_LOG_BAR", iommuAmdPprLogBar_r, NULL }, { /* 0x40 */ "HW_EVT_HI", iommuAmdHwEvtHi_r, iommuAmdHwEvtHi_w }, { /* 0x48 */ "HW_EVT_LO", iommuAmdHwEvtLo_r, iommuAmdHwEvtLo_w }, { /* 0x50 */ "HW_EVT_STATUS", iommuAmdHwEvtStatus_r, iommuAmdHwEvtStatus_w }, { /* 0x58 */ NULL, NULL, NULL }, { /* 0x60 */ "SMI_FLT_0", NULL, NULL }, { /* 0x68 */ "SMI_FLT_1", NULL, NULL }, { /* 0x70 */ "SMI_FLT_2", NULL, NULL }, { /* 0x78 */ "SMI_FLT_3", NULL, NULL }, { /* 0x80 */ "SMI_FLT_4", NULL, NULL }, { /* 0x88 */ "SMI_FLT_5", NULL, NULL }, { /* 0x90 */ "SMI_FLT_6", NULL, NULL }, { /* 0x98 */ "SMI_FLT_7", NULL, NULL }, { /* 0xa0 */ "SMI_FLT_8", NULL, NULL }, { /* 0xa8 */ "SMI_FLT_9", NULL, NULL }, { /* 0xb0 */ "SMI_FLT_10", NULL, NULL }, { /* 0xb8 */ "SMI_FLT_11", NULL, NULL }, { /* 0xc0 */ "SMI_FLT_12", NULL, NULL }, { /* 0xc8 */ "SMI_FLT_13", NULL, NULL }, { /* 0xd0 */ "SMI_FLT_14", NULL, NULL }, { /* 0xd8 */ "SMI_FLT_15", NULL, NULL }, { /* 0xe0 */ "GALOG_BAR", iommuAmdGALogBar_r, NULL }, { /* 0xe8 */ "GALOG_TAIL_ADDR", NULL, NULL }, { /* 0xf0 */ "PPR_LOG_B_BAR", iommuAmdPprLogBBaseAddr_r, NULL }, { /* 0xf8 */ "PPR_EVT_B_BAR", iommuAmdEvtLogBBaseAddr_r, NULL }, { /* 0x100 */ "DEV_TAB_SEG_1", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, { /* 0x108 */ "DEV_TAB_SEG_2", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, { /* 0x110 */ "DEV_TAB_SEG_3", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, { /* 0x118 */ "DEV_TAB_SEG_4", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, { /* 0x120 */ "DEV_TAB_SEG_5", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, { /* 0x128 */ "DEV_TAB_SEG_6", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, { /* 0x130 */ "DEV_TAB_SEG_7", iommuAmdDevTabSegBar_r, iommuAmdDevTabSegBar_w }, { /* 0x138 */ "DEV_SPECIFIC_FEAT", iommuAmdDevSpecificFeat_r, NULL }, { /* 0x140 */ "DEV_SPECIFIC_CTRL", iommuAmdDevSpecificCtrl_r, NULL }, { /* 0x148 */ "DEV_SPECIFIC_STATUS", iommuAmdDevSpecificStatus_r, NULL }, { /* 0x150 */ "MSI_VECTOR_0 or MSI_VECTOR_1", iommuAmdDevMsiVector_r, iommuAmdDevMsiVector_w }, { /* 0x158 */ "MSI_CAP_HDR or MSI_ADDR_LO", iommuAmdMsiCapHdrAndAddrLo_r, iommuAmdMsiCapHdrAndAddrLo_w }, { /* 0x160 */ "MSI_ADDR_HI or MSI_DATA", iommuAmdMsiAddrHiAndData_r, iommuAmdMsiAddrHiAndData_w }, { /* 0x168 */ "MSI_MAPPING_CAP_HDR or PERF_OPT_CTRL", NULL, NULL }, { /* 0x170 */ "XT_GEN_INTR_CTRL", NULL, NULL }, { /* 0x178 */ "XT_PPR_INTR_CTRL", NULL, NULL }, { /* 0x180 */ "XT_GALOG_INT_CTRL", NULL, NULL }, }; AssertCompile(RT_ELEMENTS(g_aRegAccess0) == (IOMMU_MMIO_OFF_QWORD_TABLE_0_END - IOMMU_MMIO_OFF_QWORD_TABLE_0_START) / 8); /** * Register access table 1. * The MMIO offset of each entry must be a multiple of 8! */ static const IOMMUREGACC g_aRegAccess1[] = { /* MMIO offset Register name Read function Write function */ { /* 0x200 */ "MARC_APER_BAR_0", NULL, NULL }, { /* 0x208 */ "MARC_APER_RELOC_0", NULL, NULL }, { /* 0x210 */ "MARC_APER_LEN_0", NULL, NULL }, { /* 0x218 */ "MARC_APER_BAR_1", NULL, NULL }, { /* 0x220 */ "MARC_APER_RELOC_1", NULL, NULL }, { /* 0x228 */ "MARC_APER_LEN_1", NULL, NULL }, { /* 0x230 */ "MARC_APER_BAR_2", NULL, NULL }, { /* 0x238 */ "MARC_APER_RELOC_2", NULL, NULL }, { /* 0x240 */ "MARC_APER_LEN_2", NULL, NULL }, { /* 0x248 */ "MARC_APER_BAR_3", NULL, NULL }, { /* 0x250 */ "MARC_APER_RELOC_3", NULL, NULL }, { /* 0x258 */ "MARC_APER_LEN_3", NULL, NULL } }; AssertCompile(RT_ELEMENTS(g_aRegAccess1) == (IOMMU_MMIO_OFF_QWORD_TABLE_1_END - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) / 8); /** * Register access table 2. * The MMIO offset of each entry must be a multiple of 8! */ static const IOMMUREGACC g_aRegAccess2[] = { /* MMIO offset Register name Read Function Write function */ { /* 0x1ff8 */ "RSVD_REG", NULL, NULL }, { /* 0x2000 */ "CMD_BUF_HEAD_PTR", iommuAmdCmdBufHeadPtr_r, iommuAmdCmdBufHeadPtr_w }, { /* 0x2008 */ "CMD_BUF_TAIL_PTR", iommuAmdCmdBufTailPtr_r , iommuAmdCmdBufTailPtr_w }, { /* 0x2010 */ "EVT_LOG_HEAD_PTR", iommuAmdEvtLogHeadPtr_r, iommuAmdEvtLogHeadPtr_w }, { /* 0x2018 */ "EVT_LOG_TAIL_PTR", iommuAmdEvtLogTailPtr_r, iommuAmdEvtLogTailPtr_w }, { /* 0x2020 */ "STATUS", iommuAmdStatus_r, iommuAmdStatus_w }, { /* 0x2028 */ NULL, NULL, NULL }, { /* 0x2030 */ "PPR_LOG_HEAD_PTR", NULL, NULL }, { /* 0x2038 */ "PPR_LOG_TAIL_PTR", NULL, NULL }, { /* 0x2040 */ "GALOG_HEAD_PTR", NULL, NULL }, { /* 0x2048 */ "GALOG_TAIL_PTR", NULL, NULL }, { /* 0x2050 */ "PPR_LOG_B_HEAD_PTR", NULL, NULL }, { /* 0x2058 */ "PPR_LOG_B_TAIL_PTR", NULL, NULL }, { /* 0x2060 */ NULL, NULL, NULL }, { /* 0x2068 */ NULL, NULL, NULL }, { /* 0x2070 */ "EVT_LOG_B_HEAD_PTR", NULL, NULL }, { /* 0x2078 */ "EVT_LOG_B_TAIL_PTR", NULL, NULL }, { /* 0x2080 */ "PPR_LOG_AUTO_RESP", NULL, NULL }, { /* 0x2088 */ "PPR_LOG_OVERFLOW_EARLY", NULL, NULL }, { /* 0x2090 */ "PPR_LOG_B_OVERFLOW_EARLY", NULL, NULL } }; AssertCompile(RT_ELEMENTS(g_aRegAccess2) == (IOMMU_MMIO_OFF_QWORD_TABLE_2_END - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) / 8); /** * Gets the register access structure given its MMIO offset. * * @returns The register access structure, or NULL if the offset is invalid. * @param off The MMIO offset of the register being accessed. */ static PCIOMMUREGACC iommuAmdGetRegAccess(uint32_t off) { /* Figure out which table the register belongs to and validate its index. */ PCIOMMUREGACC pReg; if (off < IOMMU_MMIO_OFF_QWORD_TABLE_0_END) { uint32_t const idxReg = off >> 3; Assert(idxReg < RT_ELEMENTS(g_aRegAccess0)); pReg = &g_aRegAccess0[idxReg]; } else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_1_END && off >= IOMMU_MMIO_OFF_QWORD_TABLE_1_START) { uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_1_START) >> 3; Assert(idxReg < RT_ELEMENTS(g_aRegAccess1)); pReg = &g_aRegAccess1[idxReg]; } else if ( off < IOMMU_MMIO_OFF_QWORD_TABLE_2_END && off >= IOMMU_MMIO_OFF_QWORD_TABLE_2_START) { uint32_t const idxReg = (off - IOMMU_MMIO_OFF_QWORD_TABLE_2_START) >> 3; Assert(idxReg < RT_ELEMENTS(g_aRegAccess2)); pReg = &g_aRegAccess2[idxReg]; } else pReg = NULL; return pReg; } /** * Writes an IOMMU register (32-bit and 64-bit). * * @returns Strict VBox status code. * @param pDevIns The IOMMU device instance. * @param off MMIO byte offset to the register. * @param cb The size of the write access. * @param uValue The value being written. * * @thread EMT. */ static VBOXSTRICTRC iommuAmdRegisterWrite(PPDMDEVINS pDevIns, uint32_t off, uint8_t cb, uint64_t uValue) { /* * Validate the access in case of IOM bug or incorrect assumption. */ Assert(off < IOMMU_MMIO_REGION_SIZE); AssertMsgReturn(cb == 4 || cb == 8, ("Invalid access size %u\n", cb), VINF_SUCCESS); AssertMsgReturn(!(off & 3), ("Invalid offset %#x\n", off), VINF_SUCCESS); Log4Func(("off=%#x cb=%u uValue=%#RX64\n", off, cb, uValue)); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off); if (pReg) { /* likely */ } else { LogFunc(("Writing unknown register %#x with %#RX64 -> Ignored\n", off, uValue)); return VINF_SUCCESS; } /* If a write handler doesn't exist, it's either a reserved or read-only register. */ if (pReg->pfnWrite) { /* likely */ } else { LogFunc(("Writing reserved or read-only register off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue)); return VINF_SUCCESS; } /* * If the write access is 64-bits and aligned on a 64-bit boundary, dispatch right away. * This handles writes to 64-bit registers as well as aligned, 64-bit writes to two * consecutive 32-bit registers. */ if (cb == 8) { if (!(off & 7)) { IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE); VBOXSTRICTRC rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue); IOMMU_UNLOCK(pDevIns, pThisCC); return rcStrict; } LogFunc(("Misaligned access while writing register at off=%#x (cb=%u) with %#RX64 -> Ignored\n", off, cb, uValue)); return VINF_SUCCESS; } /* We shouldn't get sizes other than 32 bits here as we've specified so with IOM. */ Assert(cb == 4); if (!(off & 7)) { VBOXSTRICTRC rcStrict; IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_WRITE); /* * Lower 32 bits of a 64-bit register or a 32-bit register is being written. * Merge with higher 32 bits (after reading the full 64-bits) and perform a 64-bit write. */ uint64_t u64Read; if (pReg->pfnRead) rcStrict = pReg->pfnRead(pDevIns, pThis, off, &u64Read); else { rcStrict = VINF_SUCCESS; u64Read = 0; } if (RT_SUCCESS(rcStrict)) { uValue = (u64Read & UINT64_C(0xffffffff00000000)) | uValue; rcStrict = pReg->pfnWrite(pDevIns, pThis, off, uValue); } else LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict))); IOMMU_UNLOCK(pDevIns, pThisCC); return rcStrict; } /* * Higher 32 bits of a 64-bit register or a 32-bit register at a 32-bit boundary is being written. * Merge with lower 32 bits (after reading the full 64-bits) and perform a 64-bit write. */ VBOXSTRICTRC rcStrict; Assert(!(off & 3)); Assert(off & 7); Assert(off >= 4); uint64_t u64Read; if (pReg->pfnRead) rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, &u64Read); else { rcStrict = VINF_SUCCESS; u64Read = 0; } if (RT_SUCCESS(rcStrict)) { uValue = (uValue << 32) | (u64Read & UINT64_C(0xffffffff)); rcStrict = pReg->pfnWrite(pDevIns, pThis, off - 4, uValue); } else LogFunc(("Reading off %#x during split write failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict))); IOMMU_UNLOCK(pDevIns, pThisCC); return rcStrict; } /** * Reads an IOMMU register (64-bit) given its MMIO offset. * * All reads are 64-bit but reads to 32-bit registers that are aligned on an 8-byte * boundary include the lower half of the subsequent register. * * This is because most registers are 64-bit and aligned on 8-byte boundaries but * some are really 32-bit registers aligned on an 8-byte boundary. We cannot assume * software will only perform 32-bit reads on those 32-bit registers that are * aligned on 8-byte boundaries. * * @returns Strict VBox status code. * @param pDevIns The IOMMU device instance. * @param off The MMIO offset of the register in bytes. * @param puResult Where to store the value being read. * * @thread EMT. */ static VBOXSTRICTRC iommuAmdRegisterRead(PPDMDEVINS pDevIns, uint32_t off, uint64_t *puResult) { Assert(off < IOMMU_MMIO_REGION_SIZE); Assert(!(off & 7) || !(off & 3)); Log4Func(("off=%#x\n", off)); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); NOREF(pPciDev); PCIOMMUREGACC pReg = iommuAmdGetRegAccess(off); if (pReg) { /* likely */ } else { LogFunc(("Reading unknown register %#x -> Ignored\n", off)); return VINF_IOM_MMIO_UNUSED_FF; } /* If a read handler doesn't exist, it's a reserved or unknown register. */ if (pReg->pfnRead) { /* likely */ } else { LogFunc(("Reading reserved or unknown register off=%#x -> returning 0s\n", off)); return VINF_IOM_MMIO_UNUSED_00; } /* * If the read access is aligned on a 64-bit boundary, read the full 64-bits and return. * The caller takes care of truncating upper 32 bits for 32-bit reads. */ if (!(off & 7)) { IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ); VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off, puResult); IOMMU_UNLOCK(pDevIns, pThisCC); return rcStrict; } /* * High 32 bits of a 64-bit register or a 32-bit register at a non 64-bit boundary is being read. * Read full 64 bits at the previous 64-bit boundary but return only the high 32 bits. */ Assert(!(off & 3)); Assert(off & 7); Assert(off >= 4); IOMMU_LOCK_RET(pDevIns, pThisCC, VINF_IOM_R3_MMIO_READ); VBOXSTRICTRC rcStrict = pReg->pfnRead(pDevIns, pThis, off - 4, puResult); IOMMU_UNLOCK(pDevIns, pThisCC); if (RT_SUCCESS(rcStrict)) *puResult >>= 32; else { *puResult = 0; LogFunc(("Reading off %#x during split read failed! rc=%Rrc\n -> Ignored", off, VBOXSTRICTRC_VAL(rcStrict))); } return rcStrict; } /** * Raises the MSI interrupt for the IOMMU device. * * @param pDevIns The IOMMU device instance. * * @thread Any. * @remarks The IOMMU lock may or may not be held. */ static void iommuAmdMsiInterruptRaise(PPDMDEVINS pDevIns) { LogFlowFunc(("\n")); if (iommuAmdIsMsiEnabled(pDevIns)) { LogFunc(("Raising MSI\n")); PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_HIGH); } } #if 0 /** * Clears the MSI interrupt for the IOMMU device. * * @param pDevIns The IOMMU device instance. * * @thread Any. * @remarks The IOMMU lock may or may not be held. */ static void iommuAmdMsiInterruptClear(PPDMDEVINS pDevIns) { if (iommuAmdIsMsiEnabled(pDevIns)) PDMDevHlpPCISetIrq(pDevIns, 0, PDM_IRQ_LEVEL_LOW); } #endif /** * Writes an entry to the event log in memory. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param pEvent The event to log. * * @thread Any. * @remarks The IOMMU lock must be held while calling this function. */ static int iommuAmdEvtLogEntryWrite(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); IOMMU_LOCK(pDevIns, pThisCC); /* Check if event logging is active and the log has not overflowed. */ IOMMU_STATUS_T const Status = pThis->Status; if ( Status.n.u1EvtLogRunning && !Status.n.u1EvtOverflow) { uint32_t const cbEvt = sizeof(*pEvent); /* Get the offset we need to write the event to in memory (circular buffer offset). */ uint32_t const offEvt = pThis->EvtLogTailPtr.n.off; Assert(!(offEvt & ~IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK)); /* Ensure we have space in the event log. */ uint32_t const cMaxEvts = iommuAmdGetBufMaxEntries(pThis->EvtLogBaseAddr.n.u4Len); uint32_t const cEvts = iommuAmdGetEvtLogEntryCount(pThis); if (cEvts + 1 < cMaxEvts) { /* Write the event log entry to memory. */ RTGCPHYS const GCPhysEvtLog = pThis->EvtLogBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT; RTGCPHYS const GCPhysEvtLogEntry = GCPhysEvtLog + offEvt; int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysEvtLogEntry, pEvent, cbEvt); if (RT_FAILURE(rc)) LogFunc(("Failed to write event log entry at %#RGp. rc=%Rrc\n", GCPhysEvtLogEntry, rc)); /* Increment the event log tail pointer. */ uint32_t const cbEvtLog = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); pThis->EvtLogTailPtr.n.off = (offEvt + cbEvt) % cbEvtLog; /* Indicate that an event log entry was written. */ ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_INTR); /* Check and signal an interrupt if software wants to receive one when an event log entry is written. */ if (pThis->Ctrl.n.u1EvtIntrEn) iommuAmdMsiInterruptRaise(pDevIns); } else { /* Indicate that the event log has overflowed. */ ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_EVT_LOG_OVERFLOW); /* Check and signal an interrupt if software wants to receive one when the event log has overflowed. */ if (pThis->Ctrl.n.u1EvtIntrEn) iommuAmdMsiInterruptRaise(pDevIns); } } IOMMU_UNLOCK(pDevIns, pThisCC); return VINF_SUCCESS; } /** * Sets an event in the hardware error registers. * * @param pDevIns The IOMMU device instance. * @param pEvent The event. * * @thread Any. */ static void iommuAmdHwErrorSet(PPDMDEVINS pDevIns, PCEVT_GENERIC_T pEvent) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); if (pThis->ExtFeat.n.u1HwErrorSup) { if (pThis->HwEvtStatus.n.u1Valid) pThis->HwEvtStatus.n.u1Overflow = 1; pThis->HwEvtStatus.n.u1Valid = 1; pThis->HwEvtHi.u64 = RT_MAKE_U64(pEvent->au32[0], pEvent->au32[1]); pThis->HwEvtLo = RT_MAKE_U64(pEvent->au32[2], pEvent->au32[3]); Assert( pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_DEV_TAB_HW_ERROR || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_PAGE_TAB_HW_ERROR || pThis->HwEvtHi.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR); } } /** * Initializes a PAGE_TAB_HARDWARE_ERROR event. * * @param idDevice The device ID (bus, device, function). * @param idDomain The domain ID. * @param GCPhysPtEntity The system physical address of the page table * entity. * @param enmOp The IOMMU operation being performed. * @param pEvtPageTabHwErr Where to store the initialized event. */ static void iommuAmdPageTabHwErrorEventInit(uint16_t idDevice, uint16_t idDomain, RTGCPHYS GCPhysPtEntity, IOMMUOP enmOp, PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr) { memset(pEvtPageTabHwErr, 0, sizeof(*pEvtPageTabHwErr)); pEvtPageTabHwErr->n.u16DevId = idDevice; pEvtPageTabHwErr->n.u16DomainOrPasidLo = idDomain; pEvtPageTabHwErr->n.u1GuestOrNested = 0; pEvtPageTabHwErr->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); pEvtPageTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); pEvtPageTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); pEvtPageTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT; pEvtPageTabHwErr->n.u4EvtCode = IOMMU_EVT_PAGE_TAB_HW_ERROR; pEvtPageTabHwErr->n.u64Addr = GCPhysPtEntity; } /** * Raises a PAGE_TAB_HARDWARE_ERROR event. * * @param pDevIns The IOMMU device instance. * @param enmOp The IOMMU operation being performed. * @param pEvtPageTabHwErr The page table hardware error event. * * @thread Any. */ static void iommuAmdPageTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_PAGE_TAB_HW_ERR_T pEvtPageTabHwErr) { AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_PAGE_TAB_HW_ERR_T)); PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtPageTabHwErr; PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); IOMMU_LOCK(pDevIns, pThisCC); iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent); iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent); if (enmOp != IOMMUOP_CMD) iommuAmdSetPciTargetAbort(pDevIns); IOMMU_UNLOCK(pDevIns, pThisCC); LogFunc(("Raised PAGE_TAB_HARDWARE_ERROR. idDevice=%#x idDomain=%#x GCPhysPtEntity=%#RGp enmOp=%u u2Type=%u\n", pEvtPageTabHwErr->n.u16DevId, pEvtPageTabHwErr->n.u16DomainOrPasidLo, pEvtPageTabHwErr->n.u64Addr, enmOp, pEvtPageTabHwErr->n.u2Type)); } #ifdef IN_RING3 /** * Initializes a COMMAND_HARDWARE_ERROR event. * * @param GCPhysAddr The system physical address the IOMMU attempted to access. * @param pEvtCmdHwErr Where to store the initialized event. */ static void iommuAmdCmdHwErrorEventInit(RTGCPHYS GCPhysAddr, PEVT_CMD_HW_ERR_T pEvtCmdHwErr) { memset(pEvtCmdHwErr, 0, sizeof(*pEvtCmdHwErr)); pEvtCmdHwErr->n.u2Type = HWEVTTYPE_DATA_ERROR; pEvtCmdHwErr->n.u4EvtCode = IOMMU_EVT_COMMAND_HW_ERROR; pEvtCmdHwErr->n.u64Addr = GCPhysAddr; } /** * Raises a COMMAND_HARDWARE_ERROR event. * * @param pDevIns The IOMMU device instance. * @param pEvtCmdHwErr The command hardware error event. * * @thread Any. */ static void iommuAmdCmdHwErrorEventRaise(PPDMDEVINS pDevIns, PCEVT_CMD_HW_ERR_T pEvtCmdHwErr) { AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_CMD_HW_ERR_T)); PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtCmdHwErr; PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); IOMMU_LOCK(pDevIns, pThisCC); iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent); iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent); ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING); IOMMU_UNLOCK(pDevIns, pThisCC); LogFunc(("Raised COMMAND_HARDWARE_ERROR. GCPhysCmd=%#RGp u2Type=%u\n", pEvtCmdHwErr->n.u64Addr, pEvtCmdHwErr->n.u2Type)); } #endif /* IN_RING3 */ /** * Initializes a DEV_TAB_HARDWARE_ERROR event. * * @param idDevice The device ID (bus, device, function). * @param GCPhysDte The system physical address of the failed device table * access. * @param enmOp The IOMMU operation being performed. * @param pEvtDevTabHwErr Where to store the initialized event. */ static void iommuAmdDevTabHwErrorEventInit(uint16_t idDevice, RTGCPHYS GCPhysDte, IOMMUOP enmOp, PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr) { memset(pEvtDevTabHwErr, 0, sizeof(*pEvtDevTabHwErr)); pEvtDevTabHwErr->n.u16DevId = idDevice; pEvtDevTabHwErr->n.u1Intr = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); /** @todo IOMMU: Any other transaction type that can set read/write bit? */ pEvtDevTabHwErr->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); pEvtDevTabHwErr->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); pEvtDevTabHwErr->n.u2Type = enmOp == IOMMUOP_CMD ? HWEVTTYPE_DATA_ERROR : HWEVTTYPE_TARGET_ABORT; pEvtDevTabHwErr->n.u4EvtCode = IOMMU_EVT_DEV_TAB_HW_ERROR; pEvtDevTabHwErr->n.u64Addr = GCPhysDte; } /** * Raises a DEV_TAB_HARDWARE_ERROR event. * * @param pDevIns The IOMMU device instance. * @param enmOp The IOMMU operation being performed. * @param pEvtDevTabHwErr The device table hardware error event. * * @thread Any. */ static void iommuAmdDevTabHwErrorEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PEVT_DEV_TAB_HW_ERROR_T pEvtDevTabHwErr) { AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_DEV_TAB_HW_ERROR_T)); PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtDevTabHwErr; PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); IOMMU_LOCK(pDevIns, pThisCC); iommuAmdHwErrorSet(pDevIns, (PCEVT_GENERIC_T)pEvent); iommuAmdEvtLogEntryWrite(pDevIns, (PCEVT_GENERIC_T)pEvent); if (enmOp != IOMMUOP_CMD) iommuAmdSetPciTargetAbort(pDevIns); IOMMU_UNLOCK(pDevIns, pThisCC); LogFunc(("Raised DEV_TAB_HARDWARE_ERROR. idDevice=%#x GCPhysDte=%#RGp enmOp=%u u2Type=%u\n", pEvtDevTabHwErr->n.u16DevId, pEvtDevTabHwErr->n.u64Addr, enmOp, pEvtDevTabHwErr->n.u2Type)); } #ifdef IN_RING3 /** * Initializes an ILLEGAL_COMMAND_ERROR event. * * @param GCPhysCmd The system physical address of the failed command * access. * @param pEvtIllegalCmd Where to store the initialized event. */ static void iommuAmdIllegalCmdEventInit(RTGCPHYS GCPhysCmd, PEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd) { Assert(!(GCPhysCmd & UINT64_C(0xf))); memset(pEvtIllegalCmd, 0, sizeof(*pEvtIllegalCmd)); pEvtIllegalCmd->n.u4EvtCode = IOMMU_EVT_ILLEGAL_CMD_ERROR; pEvtIllegalCmd->n.u64Addr = GCPhysCmd; } /** * Raises an ILLEGAL_COMMAND_ERROR event. * * @param pDevIns The IOMMU device instance. * @param pEvtIllegalCmd The illegal command error event. */ static void iommuAmdIllegalCmdEventRaise(PPDMDEVINS pDevIns, PCEVT_ILLEGAL_CMD_ERR_T pEvtIllegalCmd) { AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T)); PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalCmd; PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); iommuAmdEvtLogEntryWrite(pDevIns, pEvent); ASMAtomicAndU64(&pThis->Status.u64, ~IOMMU_STATUS_CMD_BUF_RUNNING); LogFunc(("Raised ILLEGAL_COMMAND_ERROR. Addr=%#RGp\n", pEvtIllegalCmd->n.u64Addr)); } #endif /* IN_RING3 */ /** * Initializes an ILLEGAL_DEV_TABLE_ENTRY event. * * @param idDevice The device ID (bus, device, function). * @param uIova The I/O virtual address. * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if the * event was caused by an invalid level encoding in the * DTE. * @param enmOp The IOMMU operation being performed. * @param pEvtIllegalDte Where to store the initialized event. */ static void iommuAmdIllegalDteEventInit(uint16_t idDevice, uint64_t uIova, bool fRsvdNotZero, IOMMUOP enmOp, PEVT_ILLEGAL_DTE_T pEvtIllegalDte) { memset(pEvtIllegalDte, 0, sizeof(*pEvtIllegalDte)); pEvtIllegalDte->n.u16DevId = idDevice; pEvtIllegalDte->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); pEvtIllegalDte->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); pEvtIllegalDte->n.u1RsvdNotZero = fRsvdNotZero; pEvtIllegalDte->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); pEvtIllegalDte->n.u4EvtCode = IOMMU_EVT_ILLEGAL_DEV_TAB_ENTRY; pEvtIllegalDte->n.u64Addr = uIova & ~UINT64_C(0x3); /** @todo r=ramshankar: Not sure why the last 2 bits are marked as reserved by the * IOMMU spec here but not for this field for I/O page fault event. */ Assert(!(uIova & UINT64_C(0x3))); } /** * Raises an ILLEGAL_DEV_TABLE_ENTRY event. * * @param pDevIns The IOMMU instance data. * @param enmOp The IOMMU operation being performed. * @param pEvtIllegalDte The illegal device table entry event. * @param enmEvtType The illegal device table entry event type. * * @thread Any. */ static void iommuAmdIllegalDteEventRaise(PPDMDEVINS pDevIns, IOMMUOP enmOp, PCEVT_ILLEGAL_DTE_T pEvtIllegalDte, EVT_ILLEGAL_DTE_TYPE_T enmEvtType) { AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_ILLEGAL_DTE_T)); PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIllegalDte; iommuAmdEvtLogEntryWrite(pDevIns, pEvent); if (enmOp != IOMMUOP_CMD) iommuAmdSetPciTargetAbort(pDevIns); LogFunc(("Raised ILLEGAL_DTE_EVENT. idDevice=%#x uIova=%#RX64 enmOp=%u enmEvtType=%u\n", pEvtIllegalDte->n.u16DevId, pEvtIllegalDte->n.u64Addr, enmOp, enmEvtType)); NOREF(enmEvtType); } /** * Initializes an IO_PAGE_FAULT event. * * @param idDevice The device ID (bus, device, function). * @param idDomain The domain ID. * @param uIova The I/O virtual address being accessed. * @param fPresent Transaction to a page marked as present (including * DTE.V=1) or interrupt marked as remapped * (IRTE.RemapEn=1). * @param fRsvdNotZero Whether reserved bits are not zero. Pass @c false if * the I/O page fault was caused by invalid level * encoding. * @param fPermDenied Permission denied for the address being accessed. * @param enmOp The IOMMU operation being performed. * @param pEvtIoPageFault Where to store the initialized event. */ static void iommuAmdIoPageFaultEventInit(uint16_t idDevice, uint16_t idDomain, uint64_t uIova, bool fPresent, bool fRsvdNotZero, bool fPermDenied, IOMMUOP enmOp, PEVT_IO_PAGE_FAULT_T pEvtIoPageFault) { Assert(!fPermDenied || fPresent); memset(pEvtIoPageFault, 0, sizeof(*pEvtIoPageFault)); pEvtIoPageFault->n.u16DevId = idDevice; //pEvtIoPageFault->n.u4PasidHi = 0; pEvtIoPageFault->n.u16DomainOrPasidLo = idDomain; //pEvtIoPageFault->n.u1GuestOrNested = 0; //pEvtIoPageFault->n.u1NoExecute = 0; //pEvtIoPageFault->n.u1User = 0; pEvtIoPageFault->n.u1Interrupt = RT_BOOL(enmOp == IOMMUOP_INTR_REQ); pEvtIoPageFault->n.u1Present = fPresent; pEvtIoPageFault->n.u1ReadWrite = RT_BOOL(enmOp == IOMMUOP_MEM_WRITE); pEvtIoPageFault->n.u1PermDenied = fPermDenied; pEvtIoPageFault->n.u1RsvdNotZero = fRsvdNotZero; pEvtIoPageFault->n.u1Translation = RT_BOOL(enmOp == IOMMUOP_TRANSLATE_REQ); pEvtIoPageFault->n.u4EvtCode = IOMMU_EVT_IO_PAGE_FAULT; pEvtIoPageFault->n.u64Addr = uIova; } /** * Raises an IO_PAGE_FAULT event. * * @param pDevIns The IOMMU instance data. * @param fIoDevFlags The I/O device flags, see IOMMU_DTE_CACHE_F_XXX. * @param pIrte The interrupt remapping table entry, can be NULL. * @param enmOp The IOMMU operation being performed. * @param pEvtIoPageFault The I/O page fault event. * @param enmEvtType The I/O page fault event type. * * @thread Any. */ static void iommuAmdIoPageFaultEventRaise(PPDMDEVINS pDevIns, uint16_t fIoDevFlags, PCIRTE_T pIrte, IOMMUOP enmOp, PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType) { AssertCompile(sizeof(EVT_GENERIC_T) == sizeof(EVT_IO_PAGE_FAULT_T)); PCEVT_GENERIC_T pEvent = (PCEVT_GENERIC_T)pEvtIoPageFault; #ifdef IOMMU_WITH_DTE_CACHE # define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) iommuAmdDteCacheAddFlags((a_pDevIns), (a_DevId), \ IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED) #else # define IOMMU_DTE_CACHE_SET_PF_RAISED(a_pDevIns, a_DevId) do { } while (0) #endif bool fSuppressEvtLogging = false; if ( enmOp == IOMMUOP_MEM_READ || enmOp == IOMMUOP_MEM_WRITE) { uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_SUPPRESS_IOPF | IOMMU_DTE_CACHE_F_IO_PAGE_FAULT_RAISED; uint16_t const fSuppressAllIopf = IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_SUPPRESS_ALL_IOPF; if ( (fIoDevFlags & fSuppressAllIopf) == fSuppressAllIopf || (fIoDevFlags & fSuppressIopf) == fSuppressIopf) { fSuppressEvtLogging = true; } } else if (enmOp == IOMMUOP_INTR_REQ) { uint16_t const fSuppressIopf = IOMMU_DTE_CACHE_F_INTR_MAP_VALID | IOMMU_DTE_CACHE_F_IGNORE_UNMAPPED_INTR; if ((fIoDevFlags & fSuppressIopf) == fSuppressIopf) fSuppressEvtLogging = true; else if (pIrte) /** @todo Make this compulsary and assert if it isn't provided. */ fSuppressEvtLogging = pIrte->n.u1SuppressIoPf; } /* else: Events are never suppressed for commands. */ switch (enmEvtType) { case kIoPageFaultType_PermDenied: { /* Cannot be triggered by a command. */ Assert(enmOp != IOMMUOP_CMD); RT_FALL_THRU(); } case kIoPageFaultType_DteRsvdPagingMode: case kIoPageFaultType_PteInvalidPageSize: case kIoPageFaultType_PteInvalidLvlEncoding: case kIoPageFaultType_SkippedLevelIovaNotZero: case kIoPageFaultType_PteRsvdNotZero: case kIoPageFaultType_PteValidNotSet: case kIoPageFaultType_DteTranslationDisabled: case kIoPageFaultType_PasidInvalidRange: { /* * For a translation request, the IOMMU doesn't signal an I/O page fault nor does it * create an event log entry. See AMD IOMMU spec. 2.1.3.2 "I/O Page Faults". */ if (enmOp != IOMMUOP_TRANSLATE_REQ) { if (!fSuppressEvtLogging) { iommuAmdEvtLogEntryWrite(pDevIns, pEvent); IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); } if (enmOp != IOMMUOP_CMD) iommuAmdSetPciTargetAbort(pDevIns); } break; } case kIoPageFaultType_UserSupervisor: { /* Access is blocked and only creates an event log entry. */ if (!fSuppressEvtLogging) { iommuAmdEvtLogEntryWrite(pDevIns, pEvent); IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); } break; } case kIoPageFaultType_IrteAddrInvalid: case kIoPageFaultType_IrteRsvdNotZero: case kIoPageFaultType_IrteRemapEn: case kIoPageFaultType_IrteRsvdIntType: case kIoPageFaultType_IntrReqAborted: case kIoPageFaultType_IntrWithPasid: { /* Only trigerred by interrupt requests. */ Assert(enmOp == IOMMUOP_INTR_REQ); if (!fSuppressEvtLogging) { iommuAmdEvtLogEntryWrite(pDevIns, pEvent); IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); } iommuAmdSetPciTargetAbort(pDevIns); break; } case kIoPageFaultType_SmiFilterMismatch: { /* Not supported and probably will never be, assert. */ AssertMsgFailed(("kIoPageFaultType_SmiFilterMismatch - Upstream SMI requests not supported/implemented.")); break; } case kIoPageFaultType_DevId_Invalid: { /* Cannot be triggered by a command. */ Assert(enmOp != IOMMUOP_CMD); Assert(enmOp != IOMMUOP_TRANSLATE_REQ); /** @todo IOMMU: We don't support translation requests yet. */ if (!fSuppressEvtLogging) { iommuAmdEvtLogEntryWrite(pDevIns, pEvent); IOMMU_DTE_CACHE_SET_PF_RAISED(pDevIns, pEvtIoPageFault->n.u16DevId); } if ( enmOp == IOMMUOP_MEM_READ || enmOp == IOMMUOP_MEM_WRITE) iommuAmdSetPciTargetAbort(pDevIns); break; } } #undef IOMMU_DTE_CACHE_SET_PF_RAISED } /** * Raises an IO_PAGE_FAULT event given the DTE. * * @param pDevIns The IOMMU instance data. * @param pDte The device table entry. * @param pIrte The interrupt remapping table entry, can be NULL. * @param enmOp The IOMMU operation being performed. * @param pEvtIoPageFault The I/O page fault event. * @param enmEvtType The I/O page fault event type. * * @thread Any. */ static void iommuAmdIoPageFaultEventRaiseWithDte(PPDMDEVINS pDevIns, PCDTE_T pDte, PCIRTE_T pIrte, IOMMUOP enmOp, PCEVT_IO_PAGE_FAULT_T pEvtIoPageFault, EVT_IO_PAGE_FAULT_TYPE_T enmEvtType) { Assert(pDte); uint16_t const fIoDevFlags = iommuAmdGetBasicDevFlags(pDte); return iommuAmdIoPageFaultEventRaise(pDevIns, fIoDevFlags, pIrte, enmOp, pEvtIoPageFault, enmEvtType); } /** * Reads a device table entry for the given the device ID. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param idDevice The device ID (bus, device, function). * @param enmOp The IOMMU operation being performed. * @param pDte Where to store the device table entry. * * @thread Any. */ static int iommuAmdDteRead(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PDTE_T pDte) { PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); IOMMU_LOCK(pDevIns, pThisCC); /* Figure out which device table segment is being accessed. */ uint8_t const idxSegsEn = pThis->Ctrl.n.u3DevTabSegEn; Assert(idxSegsEn < RT_ELEMENTS(g_auDevTabSegShifts)); uint8_t const idxSeg = (idDevice & g_auDevTabSegMasks[idxSegsEn]) >> g_auDevTabSegShifts[idxSegsEn]; Assert(idxSeg < RT_ELEMENTS(pThis->aDevTabBaseAddrs)); AssertCompile(RT_ELEMENTS(g_auDevTabSegShifts) == RT_ELEMENTS(g_auDevTabSegMasks)); RTGCPHYS const GCPhysDevTab = pThis->aDevTabBaseAddrs[idxSeg].n.u40Base << X86_PAGE_4K_SHIFT; uint32_t const offDte = (idDevice & ~g_auDevTabSegMasks[idxSegsEn]) * sizeof(DTE_T); RTGCPHYS const GCPhysDte = GCPhysDevTab + offDte; /* Ensure the DTE falls completely within the device table segment. */ uint32_t const cbDevTabSeg = (pThis->aDevTabBaseAddrs[idxSeg].n.u9Size + 1) << X86_PAGE_4K_SHIFT; IOMMU_UNLOCK(pDevIns, pThisCC); if (offDte + sizeof(DTE_T) <= cbDevTabSeg) { /* Read the device table entry from guest memory. */ Assert(!(GCPhysDevTab & X86_PAGE_4K_OFFSET_MASK)); int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDte, pDte, sizeof(*pDte)); if (RT_SUCCESS(rc)) return rc; /* Raise a device table hardware error. */ LogFunc(("Failed to read device table entry at %#RGp. rc=%Rrc -> DevTabHwError\n", GCPhysDte, rc)); EVT_DEV_TAB_HW_ERROR_T EvtDevTabHwErr; iommuAmdDevTabHwErrorEventInit(idDevice, GCPhysDte, enmOp, &EvtDevTabHwErr); iommuAmdDevTabHwErrorEventRaise(pDevIns, enmOp, &EvtDevTabHwErr); return VERR_IOMMU_DTE_READ_FAILED; } /* Raise an I/O page fault for out-of-bounds acccess. */ EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, 0 /* idDomain */, 0 /* uIova */, false /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaise(pDevIns, 0 /* fIoDevFlags */, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_DevId_Invalid); return VERR_IOMMU_DTE_BAD_OFFSET; } /** * Performs pre-translation checks for the given device table entry. * * @returns VBox status code. * @retval VINF_SUCCESS if the DTE is valid and supports address translation. * @retval VINF_IOMMU_ADDR_TRANSLATION_DISABLED if the DTE is valid but address * translation is disabled. * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED if an error occurred and any * corresponding event was raised. * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the DTE denies the requested * permissions. * * @param pDevIns The IOMMU device instance. * @param uIova The I/O virtual address to translate. * @param idDevice The device ID (bus, device, function). * @param fPerm The I/O permissions for this access, see * IOMMU_IO_PERM_XXX. * @param pDte The device table entry. * @param enmOp The IOMMU operation being performed. * * @thread Any. */ static int iommuAmdPreTranslateChecks(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, uint8_t fPerm, PCDTE_T pDte, IOMMUOP enmOp) { /* * Check if the translation is valid, otherwise raise an I/O page fault. */ if (pDte->n.u1TranslationValid) { /* likely */ } else { /** @todo r=ramshankar: The AMD IOMMU spec. says page walk is terminated but * doesn't explicitly say whether an I/O page fault is raised. From other * places in the spec. it seems early page walk terminations (starting with * the DTE) return the state computed so far and raises an I/O page fault. So * returning an invalid translation rather than skipping translation. */ LogFunc(("Translation valid bit not set -> IOPF\n")); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_DteTranslationDisabled); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } /* * Check permissions bits in the DTE. * Note: This MUST be checked prior to checking the root page table level below! */ uint8_t const fDtePerm = (pDte->au64[0] >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK; if ((fPerm & fDtePerm) == fPerm) { /* likely */ } else { LogFunc(("Permission denied by DTE (fPerm=%#x fDtePerm=%#x) -> IOPF\n", fPerm, fDtePerm)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, true /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_PermDenied); return VERR_IOMMU_ADDR_ACCESS_DENIED; } /* * If the root page table level is 0, translation is disabled and GPA=SPA and * the DTE.IR and DTE.IW bits control permissions (verified above). */ uint8_t const uMaxLevel = pDte->n.u3Mode; if (uMaxLevel != 0) { /* likely */ } else { Assert((fPerm & fDtePerm) == fPerm); /* Verify we've checked permissions. */ return VINF_IOMMU_ADDR_TRANSLATION_DISABLED; } /* * If the root page table level exceeds the allowed host-address translation level, * page walk is terminated and translation fails. */ if (uMaxLevel <= IOMMU_MAX_HOST_PT_LEVEL) { /* likely */ } else { /** @todo r=ramshankar: I cannot make out from the AMD IOMMU spec. if I should be * raising an ILLEGAL_DEV_TABLE_ENTRY event or an IO_PAGE_FAULT event here. * I'm just going with I/O page fault. */ LogFunc(("Invalid root page table level %#x (idDevice=%#x) -> IOPF\n", uMaxLevel, idDevice)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_PteInvalidLvlEncoding); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } /* The DTE allows translations for this device. */ return VINF_SUCCESS; } /** * Walks the I/O page table to translate the I/O virtual address to a system * physical address. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param uIova The I/O virtual address to translate. Must be 4K aligned. * @param fPerm The I/O permissions for this access, see * IOMMU_IO_PERM_XXX. * @param idDevice The device ID (bus, device, function). * @param pDte The device table entry. * @param enmOp The IOMMU operation being performed. * @param pPageLookup Where to store the results of the I/O page lookup. This * is only updated when VINF_SUCCESS is returned. * * @thread Any. */ static int iommuAmdIoPageTableWalk(PPDMDEVINS pDevIns, uint64_t uIova, uint8_t fPerm, uint16_t idDevice, PCDTE_T pDte, IOMMUOP enmOp, PIOPAGELOOKUP pPageLookup) { Assert(pDte->n.u1Valid); Assert(!(uIova & X86_PAGE_4K_OFFSET_MASK)); /* The virtual address bits indexing table. */ static uint8_t const s_acIovaLevelShifts[] = { 0, 12, 21, 30, 39, 48, 57, 0 }; static uint64_t const s_auIovaLevelMasks[] = { UINT64_C(0x0000000000000000), UINT64_C(0x00000000001ff000), UINT64_C(0x000000003fe00000), UINT64_C(0x0000007fc0000000), UINT64_C(0x0000ff8000000000), UINT64_C(0x01ff000000000000), UINT64_C(0xfe00000000000000), UINT64_C(0x0000000000000000) }; AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) == RT_ELEMENTS(s_auIovaLevelMasks)); AssertCompile(RT_ELEMENTS(s_acIovaLevelShifts) > IOMMU_MAX_HOST_PT_LEVEL); /* Traverse the I/O page table starting with the page directory in the DTE. */ IOPTENTITY_T PtEntity; PtEntity.u64 = pDte->au64[0]; for (;;) { /* Figure out the system physical address of the page table at the current level. */ uint8_t const uLevel = PtEntity.n.u3NextLevel; /* Read the page table entity at the current level. */ { Assert(uLevel > 0 && uLevel < RT_ELEMENTS(s_acIovaLevelShifts)); Assert(uLevel <= IOMMU_MAX_HOST_PT_LEVEL); uint16_t const idxPte = (uIova >> s_acIovaLevelShifts[uLevel]) & UINT64_C(0x1ff); uint64_t const offPte = idxPte << 3; RTGCPHYS const GCPhysPtEntity = (PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK) + offPte; int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysPtEntity, &PtEntity.u64, sizeof(PtEntity)); if (RT_FAILURE(rc)) { LogFunc(("Failed to read page table entry at %#RGp. rc=%Rrc -> PageTabHwError\n", GCPhysPtEntity, rc)); EVT_PAGE_TAB_HW_ERR_T EvtPageTabHwErr; iommuAmdPageTabHwErrorEventInit(idDevice, pDte->n.u16DomainId, GCPhysPtEntity, enmOp, &EvtPageTabHwErr); iommuAmdPageTabHwErrorEventRaise(pDevIns, enmOp, &EvtPageTabHwErr); return VERR_IOMMU_IPE_2; } } /* Check present bit. */ if (PtEntity.n.u1Present) { /* likely */ } else { LogFunc(("Page table entry not present (idDevice=%#x) -> IOPF\n", idDevice)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, false /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_PermDenied); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } /* Check permission bits. */ uint8_t const fPtePerm = (PtEntity.u64 >> IOMMU_IO_PERM_SHIFT) & IOMMU_IO_PERM_MASK; if ((fPerm & fPtePerm) == fPerm) { /* likely */ } else { LogFunc(("Page table entry access denied (idDevice=%#x fPerm=%#x fPtePerm=%#x) -> IOPF\n", idDevice, fPerm, fPtePerm)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, true /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_PermDenied); return VERR_IOMMU_ADDR_ACCESS_DENIED; } /* If this is a PTE, we're at the final level and we're done. */ uint8_t const uNextLevel = PtEntity.n.u3NextLevel; if (uNextLevel == 0) { /* The page size of the translation is the default (4K). */ pPageLookup->GCPhysSpa = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK; pPageLookup->cShift = X86_PAGE_4K_SHIFT; pPageLookup->fPerm = fPtePerm; return VINF_SUCCESS; } if (uNextLevel == 7) { /* The default page size of the translation is overridden. */ RTGCPHYS const GCPhysPte = PtEntity.u64 & IOMMU_PTENTITY_ADDR_MASK; uint8_t cShift = X86_PAGE_4K_SHIFT; while (GCPhysPte & RT_BIT_64(cShift++)) ; /* The page size must be larger than the default size and lower than the default size of the higher level. */ Assert(uLevel < IOMMU_MAX_HOST_PT_LEVEL); /* PTE at level 6 handled outside the loop, uLevel should be <= 5. */ if ( cShift > s_acIovaLevelShifts[uLevel] && cShift < s_acIovaLevelShifts[uLevel + 1]) { pPageLookup->GCPhysSpa = GCPhysPte; pPageLookup->cShift = cShift; pPageLookup->fPerm = fPtePerm; return VINF_SUCCESS; } LogFunc(("Page size invalid cShift=%#x -> IOPF\n", cShift)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_PteInvalidPageSize); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } /* Validate the next level encoding of the PDE. */ #if IOMMU_MAX_HOST_PT_LEVEL < 6 if (uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL) { /* likely */ } else { LogFunc(("Next level of PDE invalid uNextLevel=%#x -> IOPF\n", uNextLevel)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_PteInvalidLvlEncoding); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } #else Assert(uNextLevel <= IOMMU_MAX_HOST_PT_LEVEL); #endif /* Validate level transition. */ if (uNextLevel < uLevel) { /* likely */ } else { LogFunc(("Next level (%#x) must be less than the current level (%#x) -> IOPF\n", uNextLevel, uLevel)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_PteInvalidLvlEncoding); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } /* Ensure IOVA bits of skipped levels are zero. */ Assert(uLevel > 0); uint64_t uIovaSkipMask = 0; for (unsigned idxLevel = uLevel - 1; idxLevel > uNextLevel; idxLevel--) uIovaSkipMask |= s_auIovaLevelMasks[idxLevel]; if (!(uIova & uIovaSkipMask)) { /* likely */ } else { LogFunc(("IOVA of skipped levels are not zero %#RX64 (SkipMask=%#RX64) -> IOPF\n", uIova, uIovaSkipMask)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, uIova, true /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_SkippedLevelIovaNotZero); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } /* Continue with traversing the page directory at this level. */ } } /** * Page lookup callback for finding an I/O page from guest memory. * * @returns VBox status code. * @retval VINF_SUCCESS when the page is found and has the right permissions. * @retval VERR_IOMMU_ADDR_TRANSLATION_FAILED when address translation fails. * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are * insufficient to what is requested. * * @param pDevIns The IOMMU instance data. * @param uIovaPage The I/O virtual address to lookup in the cache (must be * 4K aligned). * @param fPerm The I/O permissions for this access, see * IOMMU_IO_PERM_XXX. * @param pAux The auxiliary information required during lookup. * @param pPageLookup Where to store the looked up I/O page. */ static DECLCALLBACK(int) iommuAmdDteLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux, PIOPAGELOOKUP pPageLookup) { AssertPtr(pAux); AssertPtr(pPageLookup); Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK)); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); STAM_PROFILE_ADV_START(&pThis->StatProfDteLookup, a); int rc = iommuAmdIoPageTableWalk(pDevIns, uIovaPage, fPerm, pAux->idDevice, pAux->pDte, pAux->enmOp, pPageLookup); STAM_PROFILE_ADV_STOP(&pThis->StatProfDteLookup, a); NOREF(pThis); return rc; } /** * Looks up a range of I/O virtual addresses. * * @returns VBox status code. * @param pDevIns The IOMMU instance data. * @param pfnIoPageLookup The lookup function to use. * @param pAddrIn The I/O address range to lookup. * @param pAux The auxiliary information required by the lookup * function. * @param pAddrOut Where to store the translated I/O address range. * @param pcbPages Where to store the size of the access (round up to * the page size). Optional, can be NULL. */ static int iommuAmdLookupIoAddrRange(PPDMDEVINS pDevIns, PFNIOPAGELOOKUP pfnIoPageLookup, PCIOADDRRANGE pAddrIn, PCIOMMUOPAUX pAux, PIOADDRRANGE pAddrOut, size_t *pcbPages) { AssertPtr(pfnIoPageLookup); AssertPtr(pAddrIn); AssertPtr(pAddrOut); int rc; size_t const cbIova = pAddrIn->cb; uint8_t const fPerm = pAddrIn->fPerm; uint64_t const uIova = pAddrIn->uAddr; RTGCPHYS GCPhysSpa = NIL_RTGCPHYS; size_t cbRemaining = cbIova; uint64_t uIovaPage = pAddrIn->uAddr & X86_PAGE_4K_BASE_MASK; uint64_t offIova = pAddrIn->uAddr & X86_PAGE_4K_OFFSET_MASK; uint64_t cbPages = 0; IOPAGELOOKUP PageLookupPrev; RT_ZERO(PageLookupPrev); for (;;) { IOPAGELOOKUP PageLookup; rc = pfnIoPageLookup(pDevIns, uIovaPage, fPerm, pAux, &PageLookup); if (RT_SUCCESS(rc)) { Assert(PageLookup.cShift >= X86_PAGE_4K_SHIFT); /* Store the translated address before continuing to access more pages. */ if (cbRemaining == cbIova) { uint64_t const offMask = IOMMU_GET_PAGE_OFF_MASK(PageLookup.cShift); uint64_t const offSpa = uIova & offMask; Assert(!(PageLookup.GCPhysSpa & offMask)); GCPhysSpa = PageLookup.GCPhysSpa | offSpa; } /* Check if addresses translated so far result in a physically contiguous region. */ else if (!iommuAmdLookupIsAccessContig(&PageLookupPrev, &PageLookup)) { rc = VERR_OUT_OF_RANGE; break; } /* Store the page lookup result from the first/previous page. */ PageLookupPrev = PageLookup; /* Update size of all pages read thus far. */ uint64_t const cbPage = RT_BIT_64(PageLookup.cShift); cbPages += cbPage; /* Check if we need to access more pages. */ if (cbRemaining > cbPage - offIova) { cbRemaining -= (cbPage - offIova); /* Calculate how much more we need to access. */ uIovaPage += cbPage; /* Update address of the next access. */ offIova = 0; /* After first page, all pages are accessed from off 0. */ } else { cbRemaining = 0; break; } } else break; } pAddrOut->uAddr = GCPhysSpa; /* Update the translated address. */ pAddrOut->cb = cbIova - cbRemaining; /* Update the size of the contiguous memory region. */ pAddrOut->fPerm = PageLookupPrev.fPerm; /* Update the allowed permissions for this access. */ if (pcbPages) *pcbPages = cbPages; /* Update the size of the pages accessed. */ return rc; } /** * Looks up an I/O virtual address from the device table. * * @returns VBox status code. * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param uIova The I/O virtual address to lookup. * @param cbIova The size of the access. * @param fPerm The I/O permissions for this access, see * IOMMU_IO_PERM_XXX. * @param enmOp The IOMMU operation being performed. * @param pGCPhysSpa Where to store the translated system physical address. * @param pcbContiguous Where to store the number of contiguous bytes translated * and permission-checked. * * @thread Any. */ static int iommuAmdDteLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm, IOMMUOP enmOp, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); RTGCPHYS GCPhysSpa = NIL_RTGCPHYS; size_t cbContiguous = 0; /* Read the device table entry from memory. */ DTE_T Dte; int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte); if (RT_SUCCESS(rc)) { if (Dte.n.u1Valid) { /* Validate bits 127:0 of the device table entry when DTE.V is 1. */ uint64_t const fRsvd0 = Dte.au64[0] & ~(IOMMU_DTE_QWORD_0_VALID_MASK & ~IOMMU_DTE_QWORD_0_FEAT_MASK); uint64_t const fRsvd1 = Dte.au64[1] & ~(IOMMU_DTE_QWORD_1_VALID_MASK & ~IOMMU_DTE_QWORD_1_FEAT_MASK); if (RT_LIKELY(!fRsvd0 && !fRsvd1)) { /* * Check if the DTE is configured for translating addresses. * Note: Addresses cannot be subject to exclusion as we do -not- support remote IOTLBs, * so there's no need to check the address exclusion base/limit here. */ rc = iommuAmdPreTranslateChecks(pDevIns, idDevice, uIova, fPerm, &Dte, enmOp); if (rc == VINF_SUCCESS) { IOADDRRANGE AddrIn; AddrIn.uAddr = uIova; AddrIn.cb = cbIova; AddrIn.fPerm = fPerm; IOMMUOPAUX Aux; Aux.enmOp = enmOp; Aux.pDte = &Dte; Aux.idDevice = idDevice; Aux.idDomain = Dte.n.u16DomainId; IOADDRRANGE AddrOut; /* Lookup the address from the DTE and I/O page tables.*/ size_t cbPages = 0; rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdDteLookupPage, &AddrIn, &Aux, &AddrOut, &cbPages); GCPhysSpa = AddrOut.uAddr; cbContiguous = AddrOut.cb; /* If we stopped since translation resulted in non-contiguous physical addresses, what we translated so far is still valid. */ if (rc == VERR_OUT_OF_RANGE) { Assert(cbContiguous > 0 && cbContiguous < cbIova); rc = VINF_SUCCESS; STAM_COUNTER_INC(&pThis->StatAccessDteNonContig); NOREF(pThis); } if (rc == VERR_IOMMU_ADDR_ACCESS_DENIED) STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied); #ifdef IOMMU_WITH_IOTLBE_CACHE if (RT_SUCCESS(rc)) { /* Update that addresses requires translation (cumulative permissions of DTE and I/O page tables). */ iommuAmdDteCacheAdd(pDevIns, idDevice, &Dte, IOMMU_DTE_CACHE_F_ADDR_TRANSLATE); /* Update IOTLB for the contiguous range of I/O virtual addresses. */ iommuAmdIotlbAddRange(pDevIns, Dte.n.u16DomainId, uIova & X86_PAGE_4K_BASE_MASK, cbPages, GCPhysSpa & X86_PAGE_4K_BASE_MASK, AddrOut.fPerm); } #endif } else if (rc == VINF_IOMMU_ADDR_TRANSLATION_DISABLED) { /* * Translation is disabled for this device (root paging mode is 0). * GPA=SPA, but the permission bits are important and controls accesses. */ GCPhysSpa = uIova; cbContiguous = cbIova; rc = VINF_SUCCESS; #ifdef IOMMU_WITH_IOTLBE_CACHE /* Update that addresses permissions of DTE apply (but omit address translation). */ iommuAmdDteCacheAdd(pDevIns, idDevice, &Dte, IOMMU_DTE_CACHE_F_IO_PERM); #endif } else { /* Address translation failed or access is denied. */ Assert(rc == VERR_IOMMU_ADDR_ACCESS_DENIED || rc == VERR_IOMMU_ADDR_TRANSLATION_FAILED); GCPhysSpa = NIL_RTGCPHYS; cbContiguous = 0; STAM_COUNTER_INC(&pThis->StatAccessDtePermDenied); } } else { /* Invalid reserved bits in the DTE, raise an error event. */ LogFunc(("Invalid DTE reserved bits (u64[0]=%#RX64 u64[1]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1)); EVT_ILLEGAL_DTE_T Event; iommuAmdIllegalDteEventInit(idDevice, uIova, true /* fRsvdNotZero */, enmOp, &Event); iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero); rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED; } } else { /* * The DTE is not valid, forward addresses untranslated. * See AMD IOMMU spec. "Table 5: Feature Enablement for Address Translation". */ GCPhysSpa = uIova; cbContiguous = cbIova; #ifdef IOMMU_WITH_IOTLBE_CACHE /* Update that addresses don't require translation (nor permission checks) but a DTE is present. */ iommuAmdDteCacheAdd(pDevIns, idDevice, &Dte, 0 /* fFlags */); #endif } } else { LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc)); rc = VERR_IOMMU_ADDR_TRANSLATION_FAILED; } *pGCPhysSpa = GCPhysSpa; *pcbContiguous = cbContiguous; AssertMsg(rc != VINF_SUCCESS || cbContiguous > 0, ("cbContiguous=%zu\n", cbContiguous)); return rc; } #ifdef IOMMU_WITH_IOTLBE_CACHE /** * I/O page lookup callback for finding an I/O page from the IOTLB. * * @returns VBox status code. * @retval VINF_SUCCESS when the page is found and has the right permissions. * @retval VERR_NOT_FOUND when the page is not found. * @retval VERR_IOMMU_ADDR_ACCESS_DENIED when the page is found but permissions are * insufficient to what is requested. * * @param pDevIns The IOMMU instance data. * @param uIovaPage The I/O virtual address to lookup in the cache (must be * 4K aligned). * @param fPerm The I/O permissions for this access, see * IOMMU_IO_PERM_XXX. * @param pAux The auxiliary information required during lookup. * @param pPageLookup Where to store the looked up I/O page. */ static DECLCALLBACK(int) iommuAmdCacheLookupPage(PPDMDEVINS pDevIns, uint64_t uIovaPage, uint8_t fPerm, PCIOMMUOPAUX pAux, PIOPAGELOOKUP pPageLookup) { Assert(pAux); Assert(pPageLookup); Assert(!(uIovaPage & X86_PAGE_4K_OFFSET_MASK)); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); STAM_PROFILE_ADV_START(&pThis->StatProfIotlbeLookup, a); PCIOTLBE pIotlbe = iommuAmdIotlbLookup(pThis, pThisR3, pAux->idDomain, uIovaPage); STAM_PROFILE_ADV_STOP(&pThis->StatProfIotlbeLookup, a); if (pIotlbe) { *pPageLookup = pIotlbe->PageLookup; if ((pPageLookup->fPerm & fPerm) == fPerm) { STAM_COUNTER_INC(&pThis->StatAccessCacheHit); return VINF_SUCCESS; } return VERR_IOMMU_ADDR_ACCESS_DENIED; } return VERR_NOT_FOUND; } /** * Lookups a memory access from the IOTLB cache. * * @returns VBox status code. * @retval VINF_SUCCESS if the access was cached and permissions are verified. * @retval VERR_OUT_OF_RANGE if the access resulted in a non-contiguous physical * address region. * @retval VERR_NOT_FOUND if the access was not cached. * @retval VERR_IOMMU_ADDR_ACCESS_DENIED if the access was cached but permissions * are insufficient. * * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param uIova The I/O virtual address to lookup. * @param cbIova The size of the access. * @param fPerm The I/O permissions for this access, see * IOMMU_IO_PERM_XXX. * @param enmOp The IOMMU operation being performed. * @param pGCPhysSpa Where to store the translated system physical address. * @param pcbContiguous Where to store the number of contiguous bytes translated * and permission-checked. */ static int iommuAmdIotlbCacheLookup(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint8_t fPerm, IOMMUOP enmOp, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous) { int rc; PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); #define IOMMU_IOTLB_LOOKUP_FAILED(a_rc) \ do { \ *pGCPhysSpa = NIL_RTGCPHYS; \ *pcbContiguous = 0; \ rc = (a_rc); \ } while (0) /* * We hold the cache lock across both the DTE and the IOTLB lookups (if any) because * we don't want the DTE cache to be invalidate while we perform IOTBL lookups. */ IOMMU_CACHE_LOCK(pDevIns, pThis); /* Lookup the DTE cache entry. */ uint16_t const idxDteCache = iommuAmdDteCacheEntryLookup(pThis, idDevice); if (idxDteCache < RT_ELEMENTS(pThis->aDteCache)) { PCDTECACHE pDteCache = &pThis->aDteCache[idxDteCache]; if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE)) == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_ADDR_TRANSLATE)) { /* Lookup IOTLB entries. */ IOADDRRANGE AddrIn; AddrIn.uAddr = uIova; AddrIn.cb = cbIova; AddrIn.fPerm = fPerm; IOMMUOPAUX Aux; Aux.enmOp = enmOp; Aux.pDte = NULL; Aux.idDevice = idDevice; Aux.idDomain = pDteCache->idDomain; IOADDRRANGE AddrOut; rc = iommuAmdLookupIoAddrRange(pDevIns, iommuAmdCacheLookupPage, &AddrIn, &Aux, &AddrOut, NULL /* pcbPages */); Assert(AddrOut.cb <= cbIova); *pGCPhysSpa = AddrOut.uAddr; *pcbContiguous = AddrOut.cb; } else if ((pDteCache->fFlags & (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM)) == (IOMMU_DTE_CACHE_F_PRESENT | IOMMU_DTE_CACHE_F_VALID | IOMMU_DTE_CACHE_F_IO_PERM)) { /* Address translation is disabled, but DTE permissions apply. */ Assert(!(pDteCache->fFlags & IOMMU_DTE_CACHE_F_ADDR_TRANSLATE)); uint8_t const fDtePerm = (pDteCache->fFlags >> IOMMU_DTE_CACHE_F_IO_PERM_SHIFT) & IOMMU_DTE_CACHE_F_IO_PERM_MASK; if ((fDtePerm & fPerm) == fPerm) { *pGCPhysSpa = uIova; *pcbContiguous = cbIova; rc = VINF_SUCCESS; } else IOMMU_IOTLB_LOOKUP_FAILED(VERR_IOMMU_ADDR_ACCESS_DENIED); } else if (pDteCache->fFlags & IOMMU_DTE_CACHE_F_PRESENT) { /* Forward addresses untranslated, without checking permissions. */ *pGCPhysSpa = uIova; *pcbContiguous = cbIova; rc = VINF_SUCCESS; } else IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND); } else IOMMU_IOTLB_LOOKUP_FAILED(VERR_NOT_FOUND); IOMMU_CACHE_UNLOCK(pDevIns, pThis); return rc; #undef IOMMU_IOTLB_LOOKUP_FAILED } #endif /* IOMMU_WITH_IOTLBE_CACHE */ /** * Gets the I/O permission and IOMMU operation type for the given access flags. * * @param pThis The shared IOMMU device state. * @param fFlags The PDM IOMMU flags, PDMIOMMU_MEM_F_XXX. * @param penmOp Where to store the IOMMU operation. * @param pfPerm Where to store the IOMMU I/O permission. * @param fBulk Whether this is a bulk read or write. */ DECLINLINE(void) iommuAmdMemAccessGetPermAndOp(PIOMMU pThis, uint32_t fFlags, PIOMMUOP penmOp, uint8_t *pfPerm, bool fBulk) { if (fFlags & PDMIOMMU_MEM_F_WRITE) { *penmOp = IOMMUOP_MEM_WRITE; *pfPerm = IOMMU_IO_PERM_WRITE; #ifdef VBOX_WITH_STATISTICS if (!fBulk) STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemWrite)); else STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkWrite)); #else RT_NOREF2(pThis, fBulk); #endif } else { Assert(fFlags & PDMIOMMU_MEM_F_READ); *penmOp = IOMMUOP_MEM_READ; *pfPerm = IOMMU_IO_PERM_READ; #ifdef VBOX_WITH_STATISTICS if (!fBulk) STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemRead)); else STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMemBulkRead)); #else RT_NOREF2(pThis, fBulk); #endif } } /** * Memory access transaction from a device. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param idDevice The device ID (bus, device, function). * @param uIova The I/O virtual address being accessed. * @param cbIova The size of the access. * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX. * @param pGCPhysSpa Where to store the translated system physical address. * @param pcbContiguous Where to store the number of contiguous bytes translated * and permission-checked. * * @thread Any. */ static DECLCALLBACK(int) iommuAmdMemAccess(PPDMDEVINS pDevIns, uint16_t idDevice, uint64_t uIova, size_t cbIova, uint32_t fFlags, PRTGCPHYS pGCPhysSpa, size_t *pcbContiguous) { /* Validate. */ AssertPtr(pDevIns); AssertPtr(pGCPhysSpa); Assert(cbIova > 0); Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK)); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis); if (Ctrl.n.u1IommuEn) { IOMMUOP enmOp; uint8_t fPerm; iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, false /* fBulk */); LogFlowFunc(("%s: idDevice=%#x uIova=%#RX64 cb=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, uIova, cbIova)); int rc; #ifdef IOMMU_WITH_IOTLBE_CACHE /* Lookup the IOVA from the cache. */ rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous); if (rc == VINF_SUCCESS) { /* All pages in the access were found in the cache with sufficient permissions. */ Assert(*pcbContiguous == cbIova); Assert(*pGCPhysSpa != NIL_RTGCPHYS); STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull); return VINF_SUCCESS; } if (rc != VERR_OUT_OF_RANGE) { /* likely */ } else { /* Access stopped since translations resulted in non-contiguous memory, let caller resume access. */ Assert(*pcbContiguous > 0 && *pcbContiguous < cbIova); STAM_COUNTER_INC(&pThis->StatAccessCacheNonContig); return VINF_SUCCESS; } /* * Access incomplete as not all pages were in the cache. * Or permissions were denied for the access (which typically doesn't happen) * so go through the slower path and raise the required event. */ AssertMsg(*pcbContiguous < cbIova, ("Invalid size: cbContiguous=%zu cbIova=%zu\n", *pcbContiguous, cbIova)); uIova += *pcbContiguous; cbIova -= *pcbContiguous; /* We currently are including any permission denied pages as cache misses too.*/ STAM_COUNTER_INC(&pThis->StatAccessCacheMiss); #endif /* Lookup the IOVA from the device table. */ rc = iommuAmdDteLookup(pDevIns, idDevice, uIova, cbIova, fPerm, enmOp, pGCPhysSpa, pcbContiguous); if (RT_SUCCESS(rc)) { /* likely */ } else { Assert(rc != VERR_OUT_OF_RANGE); LogFunc(("DTE lookup failed! idDevice=%#x uIova=%#RX64 fPerm=%u cbIova=%zu rc=%#Rrc\n", idDevice, uIova, fPerm, cbIova, rc)); } return rc; } /* Addresses are forwarded without translation when the IOMMU is disabled. */ *pGCPhysSpa = uIova; *pcbContiguous = cbIova; return VINF_SUCCESS; } /** * Memory access bulk (one or more 4K pages) request from a device. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param idDevice The device ID (bus, device, function). * @param cIovas The number of addresses being accessed. * @param pauIovas The I/O virtual addresses for each page being accessed. * @param fFlags The access flags, see PDMIOMMU_MEM_F_XXX. * @param paGCPhysSpa Where to store the translated physical addresses. * * @thread Any. */ static DECLCALLBACK(int) iommuAmdMemBulkAccess(PPDMDEVINS pDevIns, uint16_t idDevice, size_t cIovas, uint64_t const *pauIovas, uint32_t fFlags, PRTGCPHYS paGCPhysSpa) { /* Validate. */ AssertPtr(pDevIns); Assert(cIovas > 0); AssertPtr(pauIovas); AssertPtr(paGCPhysSpa); Assert(!(fFlags & ~PDMIOMMU_MEM_F_VALID_MASK)); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis); if (Ctrl.n.u1IommuEn) { IOMMUOP enmOp; uint8_t fPerm; iommuAmdMemAccessGetPermAndOp(pThis, fFlags, &enmOp, &fPerm, true /* fBulk */); LogFlowFunc(("%s: idDevice=%#x cIovas=%zu\n", iommuAmdMemAccessGetPermName(fPerm), idDevice, cIovas)); for (size_t i = 0; i < cIovas; i++) { int rc; size_t cbContig; #ifdef IOMMU_WITH_IOTLBE_CACHE /* Lookup the IOVA from the IOTLB cache. */ rc = iommuAmdIotlbCacheLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], &cbContig); if (rc == VINF_SUCCESS) { Assert(cbContig == X86_PAGE_SIZE); Assert(paGCPhysSpa[i] != NIL_RTGCPHYS); STAM_COUNTER_INC(&pThis->StatAccessCacheHitFull); continue; } Assert(rc == VERR_NOT_FOUND || rc == VERR_IOMMU_ADDR_ACCESS_DENIED); STAM_COUNTER_INC(&pThis->StatAccessCacheMiss); #endif /* Lookup the IOVA from the device table. */ rc = iommuAmdDteLookup(pDevIns, idDevice, pauIovas[i], X86_PAGE_SIZE, fPerm, enmOp, &paGCPhysSpa[i], &cbContig); if (RT_SUCCESS(rc)) { /* likely */ } else { LogFunc(("Failed! idDevice=%#x uIova=%#RX64 fPerm=%u rc=%Rrc\n", idDevice, pauIovas[i], fPerm, rc)); return rc; } Assert(cbContig == X86_PAGE_SIZE); } } else { /* Addresses are forwarded without translation when the IOMMU is disabled. */ for (size_t i = 0; i < cIovas; i++) paGCPhysSpa[i] = pauIovas[i]; } return VINF_SUCCESS; } /** * Reads an interrupt remapping table entry from guest memory given its DTE. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param idDevice The device ID (bus, device, function). * @param pDte The device table entry. * @param GCPhysIn The source MSI address (used for reporting errors). * @param uDataIn The source MSI data. * @param enmOp The IOMMU operation being performed. * @param pIrte Where to store the interrupt remapping table entry. * * @thread Any. */ static int iommuAmdIrteRead(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, RTGCPHYS GCPhysIn, uint32_t uDataIn, IOMMUOP enmOp, PIRTE_T pIrte) { /* Ensure the IRTE length is valid. */ Assert(pDte->n.u4IntrTableLength < IOMMU_DTE_INTR_TAB_LEN_MAX); RTGCPHYS const GCPhysIntrTable = pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK; uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte); uint16_t const offIrte = IOMMU_GET_IRTE_OFF(uDataIn); RTGCPHYS const GCPhysIrte = GCPhysIntrTable + offIrte; /* Ensure the IRTE falls completely within the interrupt table. */ if (offIrte + sizeof(IRTE_T) <= cbIntrTable) { /* likely */ } else { LogFunc(("IRTE exceeds table length (GCPhysIntrTable=%#RGp cbIntrTable=%u offIrte=%#x uDataIn=%#x) -> IOPF\n", GCPhysIntrTable, cbIntrTable, offIrte, uDataIn)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, GCPhysIn, false /* fPresent */, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, NULL /* pIrte */, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteAddrInvalid); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } /* Read the IRTE from memory. */ Assert(!(GCPhysIrte & 3)); int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysIrte, pIrte, sizeof(*pIrte)); if (RT_SUCCESS(rc)) return VINF_SUCCESS; /** @todo The IOMMU spec. does not tell what kind of error is reported in this * situation. Is it an I/O page fault or a device table hardware error? * There's no interrupt table hardware error event, but it's unclear what * we should do here. */ LogFunc(("Failed to read interrupt table entry at %#RGp. rc=%Rrc -> ???\n", GCPhysIrte, rc)); return VERR_IOMMU_IPE_4; } /** * Remaps the interrupt using the interrupt remapping table. * * @returns VBox status code. * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param pDte The device table entry. * @param enmOp The IOMMU operation being performed. * @param pMsiIn The source MSI. * @param pMsiOut Where to store the remapped MSI. * * @thread Any. */ static int iommuAmdIntrRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCDTE_T pDte, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) { Assert(pDte->n.u2IntrCtrl == IOMMU_INTR_CTRL_REMAP); IRTE_T Irte; uint32_t const uMsiInData = pMsiIn->Data.u32; int rc = iommuAmdIrteRead(pDevIns, idDevice, pDte, pMsiIn->Addr.u64, uMsiInData, enmOp, &Irte); if (RT_SUCCESS(rc)) { if (Irte.n.u1RemapEnable) { if (!Irte.n.u1GuestMode) { if (Irte.n.u3IntrType <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO) { iommuAmdIrteRemapMsi(pMsiIn, pMsiOut, &Irte); #ifdef IOMMU_WITH_IRTE_CACHE iommuAmdIrteCacheAdd(pDevIns, idDevice, IOMMU_GET_IRTE_OFF(uMsiInData), &Irte); #endif return VINF_SUCCESS; } LogFunc(("Interrupt type (%#x) invalid -> IOPF\n", Irte.n.u3IntrType)); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable, true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRsvdIntType); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } LogFunc(("Guest mode not supported -> IOPF\n")); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable, true /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRsvdNotZero); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } LogFunc(("Remapping disabled -> IOPF\n")); EVT_IO_PAGE_FAULT_T EvtIoPageFault; iommuAmdIoPageFaultEventInit(idDevice, pDte->n.u16DomainId, pMsiIn->Addr.u64, Irte.n.u1RemapEnable, false /* fRsvdNotZero */, false /* fPermDenied */, enmOp, &EvtIoPageFault); iommuAmdIoPageFaultEventRaiseWithDte(pDevIns, pDte, &Irte, enmOp, &EvtIoPageFault, kIoPageFaultType_IrteRemapEn); return VERR_IOMMU_ADDR_TRANSLATION_FAILED; } return rc; } /** * Looks up an MSI interrupt from the interrupt remapping table. * * @returns VBox status code. * @param pDevIns The IOMMU instance data. * @param idDevice The device ID (bus, device, function). * @param enmOp The IOMMU operation being performed. * @param pMsiIn The source MSI. * @param pMsiOut Where to store the remapped MSI. * * @thread Any. */ static int iommuAmdIntrTableLookup(PPDMDEVINS pDevIns, uint16_t idDevice, IOMMUOP enmOp, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) { LogFlowFunc(("idDevice=%#x (%#x:%#x:%#x) enmOp=%u\n", idDevice, ((idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK), ((idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK), (idDevice & VBOX_PCI_DEVFN_FUN_MASK), enmOp)); /* Read the device table entry from memory. */ DTE_T Dte; int rc = iommuAmdDteRead(pDevIns, idDevice, enmOp, &Dte); if (RT_SUCCESS(rc)) { #ifdef IOMMU_WITH_IRTE_CACHE iommuAmdDteCacheAdd(pDevIns, idDevice, &Dte, 0 /* fFlags */); #endif /* If the DTE is not valid, all interrupts are forwarded without remapping. */ if (Dte.n.u1IntrMapValid) { /* Validate bits 255:128 of the device table entry when DTE.IV is 1. */ uint64_t const fRsvd0 = Dte.au64[2] & ~IOMMU_DTE_QWORD_2_VALID_MASK; uint64_t const fRsvd1 = Dte.au64[3] & ~IOMMU_DTE_QWORD_3_VALID_MASK; if (RT_LIKELY(!fRsvd0 && !fRsvd1)) { /* likely */ } else { LogFunc(("Invalid reserved bits in DTE (u64[2]=%#RX64 u64[3]=%#RX64) -> Illegal DTE\n", fRsvd0, fRsvd1)); EVT_ILLEGAL_DTE_T Event; iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event); iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdNotZero); return VERR_IOMMU_INTR_REMAP_FAILED; } /* * LINT0/LINT1 pins cannot be driven by PCI(e) devices. Perhaps for a Southbridge * that's connected through HyperTransport it might be possible; but for us, it * doesn't seem we need to specially handle these pins. */ /* * Validate the MSI source address. * * 64-bit MSIs are supported by the PCI and AMD IOMMU spec. However as far as the * CPU is concerned, the MSI region is fixed and we must ensure no other device * claims the region as I/O space. * * See PCI spec. 6.1.4. "Message Signaled Interrupt (MSI) Support". * See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support". * See Intel spec. 10.11.1 "Message Address Register Format". */ if ((pMsiIn->Addr.u64 & VBOX_MSI_ADDR_ADDR_MASK) == VBOX_MSI_ADDR_BASE) { /* * The IOMMU remaps fixed and arbitrated interrupts using the IRTE. * See AMD IOMMU spec. "2.2.5.1 Interrupt Remapping Tables, Guest Virtual APIC Not Enabled". */ uint8_t const u8DeliveryMode = pMsiIn->Data.n.u3DeliveryMode; bool fPassThru = false; switch (u8DeliveryMode) { case VBOX_MSI_DELIVERY_MODE_FIXED: case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO: { uint8_t const uIntrCtrl = Dte.n.u2IntrCtrl; if (uIntrCtrl == IOMMU_INTR_CTRL_REMAP) { /* Validate the encoded interrupt table length when IntCtl specifies remapping. */ uint8_t const uIntrTabLen = Dte.n.u4IntrTableLength; if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX) { /* * We don't support guest interrupt remapping yet. When we do, we'll need to * check Ctrl.u1GstVirtApicEn and use the guest Virtual APIC Table Root Pointer * in the DTE rather than the Interrupt Root Table Pointer. Since the caller * already reads the control register, add that as a parameter when we eventually * support guest interrupt remapping. For now, just assert. */ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); NOREF(pThis); return iommuAmdIntrRemap(pDevIns, idDevice, &Dte, enmOp, pMsiIn, pMsiOut); } LogFunc(("Invalid interrupt table length %#x -> Illegal DTE\n", uIntrTabLen)); EVT_ILLEGAL_DTE_T Event; iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, false /* fRsvdNotZero */, enmOp, &Event); iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntTabLen); return VERR_IOMMU_INTR_REMAP_FAILED; } if (uIntrCtrl == IOMMU_INTR_CTRL_FWD_UNMAPPED) { fPassThru = true; break; } if (uIntrCtrl == IOMMU_INTR_CTRL_TARGET_ABORT) { LogRelMax(10, ("%s: Remapping disallowed for fixed/arbitrated interrupt %#x -> Target abort\n", IOMMU_LOG_PFX, pMsiIn->Data.n.u8Vector)); iommuAmdSetPciTargetAbort(pDevIns); return VERR_IOMMU_INTR_REMAP_DENIED; } Assert(uIntrCtrl == IOMMU_INTR_CTRL_RSVD); /* Paranoia. */ LogRelMax(10, ("%s: IntCtl mode invalid %#x -> Illegal DTE\n", IOMMU_LOG_PFX, uIntrCtrl)); EVT_ILLEGAL_DTE_T Event; iommuAmdIllegalDteEventInit(idDevice, pMsiIn->Addr.u64, true /* fRsvdNotZero */, enmOp, &Event); iommuAmdIllegalDteEventRaise(pDevIns, enmOp, &Event, kIllegalDteType_RsvdIntCtl); return VERR_IOMMU_INTR_REMAP_FAILED; } /* SMIs are passed through unmapped. We don't implement SMI filters. */ case VBOX_MSI_DELIVERY_MODE_SMI: fPassThru = true; break; case VBOX_MSI_DELIVERY_MODE_NMI: fPassThru = Dte.n.u1NmiPassthru; break; case VBOX_MSI_DELIVERY_MODE_INIT: fPassThru = Dte.n.u1InitPassthru; break; case VBOX_MSI_DELIVERY_MODE_EXT_INT: fPassThru = Dte.n.u1ExtIntPassthru; break; default: { LogRelMax(10, ("%s: MSI data delivery mode invalid %#x -> Target abort\n", IOMMU_LOG_PFX, u8DeliveryMode)); iommuAmdSetPciTargetAbort(pDevIns); return VERR_IOMMU_INTR_REMAP_FAILED; } } /* * For those other than fixed and arbitrated interrupts, destination mode must be 0 (physical). * See AMD IOMMU spec. The note below Table 19: "IOMMU Controls and Actions for Upstream Interrupts". */ if ( u8DeliveryMode <= VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO || !pMsiIn->Addr.n.u1DestMode) { if (fPassThru) { *pMsiOut = *pMsiIn; return VINF_SUCCESS; } LogRelMax(10, ("%s: Remapping/passthru disallowed for interrupt %#x -> Target abort\n", IOMMU_LOG_PFX, pMsiIn->Data.n.u8Vector)); } else LogRelMax(10, ("%s: Logical destination mode invalid for delivery mode %#x\n -> Target abort\n", IOMMU_LOG_PFX, u8DeliveryMode)); iommuAmdSetPciTargetAbort(pDevIns); return VERR_IOMMU_INTR_REMAP_DENIED; } else { /** @todo should be cause a PCI target abort here? */ LogRelMax(10, ("%s: MSI address region invalid %#RX64\n", IOMMU_LOG_PFX, pMsiIn->Addr.u64)); return VERR_IOMMU_INTR_REMAP_FAILED; } } else { LogFlowFunc(("DTE interrupt map not valid\n")); *pMsiOut = *pMsiIn; return VINF_SUCCESS; } } LogFunc(("Failed to read device table entry. idDevice=%#x rc=%Rrc\n", idDevice, rc)); return VERR_IOMMU_INTR_REMAP_FAILED; } /** * Interrupt remap request from a device. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param idDevice The device ID (bus, device, function). * @param pMsiIn The source MSI. * @param pMsiOut Where to store the remapped MSI. */ static DECLCALLBACK(int) iommuAmdMsiRemap(PPDMDEVINS pDevIns, uint16_t idDevice, PCMSIMSG pMsiIn, PMSIMSG pMsiOut) { /* Validate. */ Assert(pDevIns); Assert(pMsiIn); Assert(pMsiOut); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); /* Interrupts are forwarded with remapping when the IOMMU is disabled. */ IOMMU_CTRL_T const Ctrl = iommuAmdGetCtrlUnlocked(pThis); if (Ctrl.n.u1IommuEn) { STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMsiRemap)); int rc; #ifdef IOMMU_WITH_IRTE_CACHE STAM_PROFILE_ADV_START(&pThis->StatProfIrteCacheLookup, a); rc = iommuAmdIrteCacheLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut); STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteCacheLookup, a); if (RT_SUCCESS(rc)) { STAM_COUNTER_INC(&pThis->StatIntrCacheHit); return VINF_SUCCESS; } STAM_COUNTER_INC(&pThis->StatIntrCacheMiss); #endif STAM_PROFILE_ADV_START(&pThis->StatProfIrteLookup, a); rc = iommuAmdIntrTableLookup(pDevIns, idDevice, IOMMUOP_INTR_REQ, pMsiIn, pMsiOut); STAM_PROFILE_ADV_STOP(&pThis->StatProfIrteLookup, a); return rc; } *pMsiOut = *pMsiIn; return VINF_SUCCESS; } /** * @callback_method_impl{FNIOMMMIONEWWRITE} */ static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioWrite(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void const *pv, unsigned cb) { NOREF(pvUser); Assert(cb == 4 || cb == 8); Assert(!(off & (cb - 1))); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioWrite)); NOREF(pThis); uint64_t const uValue = cb == 8 ? *(uint64_t const *)pv : *(uint32_t const *)pv; return iommuAmdRegisterWrite(pDevIns, off, cb, uValue); } /** * @callback_method_impl{FNIOMMMIONEWREAD} */ static DECLCALLBACK(VBOXSTRICTRC) iommuAmdMmioRead(PPDMDEVINS pDevIns, void *pvUser, RTGCPHYS off, void *pv, unsigned cb) { NOREF(pvUser); Assert(cb == 4 || cb == 8); Assert(!(off & (cb - 1))); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); STAM_COUNTER_INC(&pThis->CTX_SUFF_Z(StatMmioRead)); NOREF(pThis); uint64_t uResult; VBOXSTRICTRC rcStrict = iommuAmdRegisterRead(pDevIns, off, &uResult); if (rcStrict == VINF_SUCCESS) { if (cb == 8) *(uint64_t *)pv = uResult; else *(uint32_t *)pv = (uint32_t)uResult; } return rcStrict; } #ifdef IN_RING3 /** * Processes an IOMMU command. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param pCmd The command to process. * @param GCPhysCmd The system physical address of the command. * @param pEvtError Where to store the error event in case of failures. * * @thread Command thread. */ static int iommuAmdR3CmdProcess(PPDMDEVINS pDevIns, PCCMD_GENERIC_T pCmd, RTGCPHYS GCPhysCmd, PEVT_GENERIC_T pEvtError) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); STAM_COUNTER_INC(&pThis->StatCmd); uint8_t const bCmd = pCmd->n.u4Opcode; switch (bCmd) { case IOMMU_CMD_COMPLETION_WAIT: { STAM_COUNTER_INC(&pThis->StatCmdCompWait); PCCMD_COMWAIT_T pCmdComWait = (PCCMD_COMWAIT_T)pCmd; AssertCompile(sizeof(*pCmdComWait) == sizeof(*pCmd)); /* Validate reserved bits in the command. */ if (!(pCmdComWait->au64[0] & ~IOMMU_CMD_COM_WAIT_QWORD_0_VALID_MASK)) { /* If Completion Store is requested, write the StoreData to the specified address. */ if (pCmdComWait->n.u1Store) { RTGCPHYS const GCPhysStore = RT_MAKE_U64(pCmdComWait->n.u29StoreAddrLo << 3, pCmdComWait->n.u20StoreAddrHi); uint64_t const u64Data = pCmdComWait->n.u64StoreData; int rc = PDMDevHlpPCIPhysWrite(pDevIns, GCPhysStore, &u64Data, sizeof(u64Data)); if (RT_FAILURE(rc)) { LogFunc(("Cmd(%#x): Failed to write StoreData (%#RX64) to %#RGp, rc=%Rrc\n", bCmd, u64Data, GCPhysStore, rc)); iommuAmdCmdHwErrorEventInit(GCPhysStore, (PEVT_CMD_HW_ERR_T)pEvtError); return VERR_IOMMU_CMD_HW_ERROR; } } /* If the command requests an interrupt and completion wait interrupts are enabled, raise it. */ if (pCmdComWait->n.u1Interrupt) { IOMMU_LOCK(pDevIns, pThisR3); ASMAtomicOrU64(&pThis->Status.u64, IOMMU_STATUS_COMPLETION_WAIT_INTR); bool const fRaiseInt = pThis->Ctrl.n.u1CompWaitIntrEn; IOMMU_UNLOCK(pDevIns, pThisR3); if (fRaiseInt) iommuAmdMsiInterruptRaise(pDevIns); } return VINF_SUCCESS; } iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_INVALID_FORMAT; } case IOMMU_CMD_INV_DEV_TAB_ENTRY: { STAM_COUNTER_INC(&pThis->StatCmdInvDte); PCCMD_INV_DTE_T pCmdInvDte = (PCCMD_INV_DTE_T)pCmd; AssertCompile(sizeof(*pCmdInvDte) == sizeof(*pCmd)); /* Validate reserved bits in the command. */ if ( !(pCmdInvDte->au64[0] & ~IOMMU_CMD_INV_DTE_QWORD_0_VALID_MASK) && !(pCmdInvDte->au64[1] & ~IOMMU_CMD_INV_DTE_QWORD_1_VALID_MASK)) { #ifdef IOMMU_WITH_DTE_CACHE iommuAmdDteCacheRemove(pDevIns, pCmdInvDte->n.u16DevId); #endif return VINF_SUCCESS; } iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_INVALID_FORMAT; } case IOMMU_CMD_INV_IOMMU_PAGES: { STAM_COUNTER_INC(&pThis->StatCmdInvIommuPages); PCCMD_INV_IOMMU_PAGES_T pCmdInvPages = (PCCMD_INV_IOMMU_PAGES_T)pCmd; AssertCompile(sizeof(*pCmdInvPages) == sizeof(*pCmd)); /* Validate reserved bits in the command. */ if ( !(pCmdInvPages->au64[0] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_0_VALID_MASK) && !(pCmdInvPages->au64[1] & ~IOMMU_CMD_INV_IOMMU_PAGES_QWORD_1_VALID_MASK)) { #ifdef IOMMU_WITH_IOTLBE_CACHE uint64_t const uIova = RT_MAKE_U64(pCmdInvPages->n.u20AddrLo << X86_PAGE_4K_SHIFT, pCmdInvPages->n.u32AddrHi); uint16_t const idDomain = pCmdInvPages->n.u16DomainId; uint8_t cShift; if (!pCmdInvPages->n.u1Size) cShift = X86_PAGE_4K_SHIFT; else { /* Find the first clear bit starting from bit 12 to 64 of the I/O virtual address. */ unsigned const uFirstZeroBit = ASMBitLastSetU64(~(uIova >> X86_PAGE_4K_SHIFT)); cShift = X86_PAGE_4K_SHIFT + uFirstZeroBit; /* * For the address 0x7ffffffffffff000, cShift would be 76 (12+64) and the code below * would do the right thing by clearing the entire cache for the specified domain ID. * * However, for the address 0xfffffffffffff000, cShift would be computed as 12. * IOMMU behavior is undefined in this case, so it's safe to invalidate just one page. * A debug-time assert is in place here to let us know if any software tries this. * * See AMD IOMMU spec. 2.4.3 "INVALIDATE_IOMMU_PAGES". * See AMD IOMMU spec. Table 14: "Example Page Size Encodings". */ Assert(uIova != UINT64_C(0xfffffffffffff000)); } /* * Validate invalidation size. * See AMD IOMMU spec. 2.2.3 "I/O Page Tables for Host Translations". */ if ( cShift == 12 /* 4K */ || cShift == 13 /* 8K */ || cShift == 14 /* 16K */ || cShift == 20 /* 1M */ || cShift == 22 /* 4M */ || cShift == 32 /* 4G */) { /* Remove the range of I/O virtual addresses requesting to be invalidated. */ size_t const cbIova = RT_BIT_64(cShift); iommuAmdIotlbRemoveRange(pDevIns, idDomain, uIova, cbIova); } else { /* * The guest provided size is invalid or exceeds the largest, meaningful page size. * In such situations we must remove all ranges for the specified domain ID. */ iommuAmdIotlbRemoveDomainId(pDevIns, idDomain); } #endif return VINF_SUCCESS; } iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_INVALID_FORMAT; } case IOMMU_CMD_INV_IOTLB_PAGES: { STAM_COUNTER_INC(&pThis->StatCmdInvIotlbPages); uint32_t const uCapHdr = PDMPciDevGetDWord(pDevIns->apPciDevs[0], IOMMU_PCI_OFF_CAP_HDR); if (RT_BF_GET(uCapHdr, IOMMU_BF_CAPHDR_IOTLB_SUP)) { /** @todo IOMMU: Implement remote IOTLB invalidation. */ return VERR_NOT_IMPLEMENTED; } iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_NOT_SUPPORTED; } case IOMMU_CMD_INV_INTR_TABLE: { STAM_COUNTER_INC(&pThis->StatCmdInvIntrTable); PCCMD_INV_INTR_TABLE_T pCmdInvIntrTable = (PCCMD_INV_INTR_TABLE_T)pCmd; AssertCompile(sizeof(*pCmdInvIntrTable) == sizeof(*pCmd)); /* Validate reserved bits in the command. */ if ( !(pCmdInvIntrTable->au64[0] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_0_VALID_MASK) && !(pCmdInvIntrTable->au64[1] & ~IOMMU_CMD_INV_INTR_TABLE_QWORD_1_VALID_MASK)) { #ifdef IOMMU_WITH_IRTE_CACHE iommuAmdIrteCacheRemove(pDevIns, pCmdInvIntrTable->u.u16DevId); #endif return VINF_SUCCESS; } iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_INVALID_FORMAT; } case IOMMU_CMD_PREFETCH_IOMMU_PAGES: { /* Linux doesn't use prefetching of IOMMU pages, so we don't bother for now. */ STAM_COUNTER_INC(&pThis->StatCmdPrefIommuPages); Assert(!pThis->ExtFeat.n.u1PrefetchSup); iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_NOT_SUPPORTED; } case IOMMU_CMD_COMPLETE_PPR_REQ: { STAM_COUNTER_INC(&pThis->StatCmdCompletePprReq); /* We don't support PPR requests yet. */ Assert(!pThis->ExtFeat.n.u1PprSup); iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_NOT_SUPPORTED; } case IOMMU_CMD_INV_IOMMU_ALL: { STAM_COUNTER_INC(&pThis->StatCmdInvIommuAll); if (pThis->ExtFeat.n.u1InvAllSup) { PCCMD_INV_IOMMU_ALL_T pCmdInvAll = (PCCMD_INV_IOMMU_ALL_T)pCmd; AssertCompile(sizeof(*pCmdInvAll) == sizeof(*pCmd)); /* Validate reserved bits in the command. */ if ( !(pCmdInvAll->au64[0] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_0_VALID_MASK) && !(pCmdInvAll->au64[1] & ~IOMMU_CMD_INV_IOMMU_ALL_QWORD_1_VALID_MASK)) { #ifdef IOMMU_WITH_DTE_CACHE iommuAmdDteCacheRemoveAll(pDevIns); #endif #ifdef IOMMU_WITH_IOTLBE_CACHE iommuAmdIotlbRemoveAll(pDevIns); #endif return VINF_SUCCESS; } iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_INVALID_FORMAT; } iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_NOT_SUPPORTED; } } STAM_COUNTER_DEC(&pThis->StatCmd); LogFunc(("Cmd(%#x): Unrecognized\n", bCmd)); iommuAmdIllegalCmdEventInit(GCPhysCmd, (PEVT_ILLEGAL_CMD_ERR_T)pEvtError); return VERR_IOMMU_CMD_NOT_SUPPORTED; } /** * The IOMMU command thread. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param pThread The command thread. */ static DECLCALLBACK(int) iommuAmdR3CmdThread(PPDMDEVINS pDevIns, PPDMTHREAD pThread) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); if (pThread->enmState == PDMTHREADSTATE_INITIALIZING) return VINF_SUCCESS; /* * Pre-allocate the maximum command buffer size supported by the IOMMU. * This avoid trashing the heap as well as not wasting time allocating * and freeing buffers while processing commands. */ size_t const cbMaxCmdBuf = sizeof(CMD_GENERIC_T) * iommuAmdGetBufMaxEntries(15); void *pvCmds = RTMemAllocZ(cbMaxCmdBuf); AssertPtrReturn(pvCmds, VERR_NO_MEMORY); while (pThread->enmState == PDMTHREADSTATE_RUNNING) { /* * Sleep perpetually until we are woken up to process commands. */ bool const fSignaled = ASMAtomicXchgBool(&pThis->fCmdThreadSignaled, false); if (!fSignaled) { int rc = PDMDevHlpSUPSemEventWaitNoResume(pDevIns, pThis->hEvtCmdThread, RT_INDEFINITE_WAIT); AssertLogRelMsgReturn(RT_SUCCESS(rc) || rc == VERR_INTERRUPTED, ("%Rrc\n", rc), rc); if (RT_UNLIKELY(pThread->enmState != PDMTHREADSTATE_RUNNING)) break; Log4Func(("Woken up with rc=%Rrc\n", rc)); ASMAtomicWriteBool(&pThis->fCmdThreadSignaled, false); } /* * Fetch and process IOMMU commands. */ /** @todo r=ramshankar: We currently copy all commands from guest memory into a * temporary host buffer before processing them as a batch. If we want to * save on host memory a bit, we could (once PGM has the necessary APIs) * lock the page mappings page mappings and access them directly. */ IOMMU_LOCK(pDevIns, pThisR3); if (pThis->Status.n.u1CmdBufRunning) { /* Get the offsets we need to read commands from memory (circular buffer offset). */ uint32_t const cbCmdBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); uint32_t const offTail = pThis->CmdBufTailPtr.n.off; uint32_t offHead = pThis->CmdBufHeadPtr.n.off; /* Validate. */ Assert(!(offHead & ~IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK)); Assert(offHead < cbCmdBuf); Assert(cbCmdBuf <= cbMaxCmdBuf); if (offHead != offTail) { /* Read the entire command buffer from memory (avoids multiple PGM calls). */ RTGCPHYS const GCPhysCmdBufBase = pThis->CmdBufBaseAddr.n.u40Base << X86_PAGE_4K_SHIFT; IOMMU_UNLOCK(pDevIns, pThisR3); int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysCmdBufBase, pvCmds, cbCmdBuf); IOMMU_LOCK(pDevIns, pThisR3); if (RT_SUCCESS(rc)) { /* Indicate to software we've fetched all commands from the buffer. */ pThis->CmdBufHeadPtr.n.off = offTail; /* Allow IOMMU to do other work while we process commands. */ IOMMU_UNLOCK(pDevIns, pThisR3); /* Process the fetched commands. */ EVT_GENERIC_T EvtError; do { PCCMD_GENERIC_T pCmd = (PCCMD_GENERIC_T)((uintptr_t)pvCmds + offHead); rc = iommuAmdR3CmdProcess(pDevIns, pCmd, GCPhysCmdBufBase + offHead, &EvtError); if (RT_FAILURE(rc)) { if ( rc == VERR_IOMMU_CMD_NOT_SUPPORTED || rc == VERR_IOMMU_CMD_INVALID_FORMAT) { Assert(EvtError.n.u4EvtCode == IOMMU_EVT_ILLEGAL_CMD_ERROR); iommuAmdIllegalCmdEventRaise(pDevIns, (PCEVT_ILLEGAL_CMD_ERR_T)&EvtError); } else if (rc == VERR_IOMMU_CMD_HW_ERROR) { Assert(EvtError.n.u4EvtCode == IOMMU_EVT_COMMAND_HW_ERROR); LogFunc(("Raising command hardware error. Cmd=%#x -> COMMAND_HW_ERROR\n", pCmd->n.u4Opcode)); iommuAmdCmdHwErrorEventRaise(pDevIns, (PCEVT_CMD_HW_ERR_T)&EvtError); } break; } /* Move to the next command in the circular buffer. */ offHead = (offHead + sizeof(CMD_GENERIC_T)) % cbCmdBuf; } while (offHead != offTail); } else { LogFunc(("Failed to read command at %#RGp. rc=%Rrc -> COMMAND_HW_ERROR\n", GCPhysCmdBufBase, rc)); EVT_CMD_HW_ERR_T EvtCmdHwErr; iommuAmdCmdHwErrorEventInit(GCPhysCmdBufBase, &EvtCmdHwErr); iommuAmdCmdHwErrorEventRaise(pDevIns, &EvtCmdHwErr); IOMMU_UNLOCK(pDevIns, pThisR3); } } else IOMMU_UNLOCK(pDevIns, pThisR3); } else IOMMU_UNLOCK(pDevIns, pThisR3); } RTMemFree(pvCmds); LogFlowFunc(("Command thread terminating\n")); return VINF_SUCCESS; } /** * Wakes up the command thread so it can respond to a state change. * * @returns VBox status code. * @param pDevIns The IOMMU device instance. * @param pThread The command thread. */ static DECLCALLBACK(int) iommuAmdR3CmdThreadWakeUp(PPDMDEVINS pDevIns, PPDMTHREAD pThread) { RT_NOREF(pThread); LogFlowFunc(("\n")); PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); return PDMDevHlpSUPSemEventSignal(pDevIns, pThis->hEvtCmdThread); } /** * @callback_method_impl{FNPCICONFIGREAD} */ static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t *pu32Value) { /** @todo IOMMU: PCI config read stat counter. */ VBOXSTRICTRC rcStrict = PDMDevHlpPCIConfigRead(pDevIns, pPciDev, uAddress, cb, pu32Value); Log3Func(("uAddress=%#x (cb=%u) -> %#x. rc=%Rrc\n", uAddress, cb, *pu32Value, VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } /** * Sets up the IOMMU MMIO region (usually in response to an IOMMU base address * register write). * * @returns VBox status code. * @param pDevIns The IOMMU instance data. * * @remarks Call this function only when the IOMMU BAR is enabled. */ static int iommuAmdR3MmioSetup(PPDMDEVINS pDevIns) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); Assert(pThis->IommuBar.n.u1Enable); Assert(pThis->hMmio != NIL_IOMMMIOHANDLE); /* Paranoia. Ensure we have a valid IOM MMIO handle. */ Assert(!pThis->ExtFeat.n.u1PerfCounterSup); /* Base is 16K aligned when performance counters aren't supported. */ RTGCPHYS const GCPhysMmioBase = RT_MAKE_U64(pThis->IommuBar.au32[0] & 0xffffc000, pThis->IommuBar.au32[1]); RTGCPHYS const GCPhysMmioBasePrev = PDMDevHlpMmioGetMappingAddress(pDevIns, pThis->hMmio); /* If the MMIO region is already mapped at the specified address, we're done. */ Assert(GCPhysMmioBase != NIL_RTGCPHYS); if (GCPhysMmioBasePrev == GCPhysMmioBase) return VINF_SUCCESS; /* Unmap the previous MMIO region (which is at a different address). */ if (GCPhysMmioBasePrev != NIL_RTGCPHYS) { LogFlowFunc(("Unmapping previous MMIO region at %#RGp\n", GCPhysMmioBasePrev)); int rc = PDMDevHlpMmioUnmap(pDevIns, pThis->hMmio); if (RT_FAILURE(rc)) { LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBasePrev, rc)); return rc; } } /* Map the newly specified MMIO region. */ LogFlowFunc(("Mapping MMIO region at %#RGp\n", GCPhysMmioBase)); int rc = PDMDevHlpMmioMap(pDevIns, pThis->hMmio, GCPhysMmioBase); if (RT_FAILURE(rc)) { LogFunc(("Failed to unmap MMIO region at %#RGp. rc=%Rrc\n", GCPhysMmioBase, rc)); return rc; } return VINF_SUCCESS; } /** * @callback_method_impl{FNPCICONFIGWRITE} */ static DECLCALLBACK(VBOXSTRICTRC) iommuAmdR3PciConfigWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, uint32_t uAddress, unsigned cb, uint32_t u32Value) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); /* * Discard writes to read-only registers that are specific to the IOMMU. * Other common PCI registers are handled by the generic code, see devpciR3IsConfigByteWritable(). * See PCI spec. 6.1. "Configuration Space Organization". */ switch (uAddress) { case IOMMU_PCI_OFF_CAP_HDR: /* All bits are read-only. */ case IOMMU_PCI_OFF_RANGE_REG: /* We don't have any devices integrated with the IOMMU. */ case IOMMU_PCI_OFF_MISCINFO_REG_0: /* We don't support MSI-X. */ case IOMMU_PCI_OFF_MISCINFO_REG_1: /* We don't support guest-address translation. */ { LogFunc(("PCI config write (%#RX32) to read-only register %#x -> Ignored\n", u32Value, uAddress)); return VINF_SUCCESS; } } PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); IOMMU_LOCK(pDevIns, pThisR3); VBOXSTRICTRC rcStrict; switch (uAddress) { case IOMMU_PCI_OFF_BASE_ADDR_REG_LO: { if (!pThis->IommuBar.n.u1Enable) { pThis->IommuBar.au32[0] = u32Value & IOMMU_BAR_VALID_MASK; if (pThis->IommuBar.n.u1Enable) rcStrict = iommuAmdR3MmioSetup(pDevIns); else rcStrict = VINF_SUCCESS; } else { LogFunc(("Writing Base Address (Lo) when it's already enabled -> Ignored\n")); rcStrict = VINF_SUCCESS; } break; } case IOMMU_PCI_OFF_BASE_ADDR_REG_HI: { if (!pThis->IommuBar.n.u1Enable) { AssertCompile((IOMMU_BAR_VALID_MASK >> 32) == 0xffffffff); pThis->IommuBar.au32[1] = u32Value; } else LogFunc(("Writing Base Address (Hi) when it's already enabled -> Ignored\n")); rcStrict = VINF_SUCCESS; break; } case IOMMU_PCI_OFF_MSI_CAP_HDR: { u32Value |= RT_BIT(23); /* 64-bit MSI addressess must always be enabled for IOMMU. */ RT_FALL_THRU(); } default: { rcStrict = PDMDevHlpPCIConfigWrite(pDevIns, pPciDev, uAddress, cb, u32Value); break; } } IOMMU_UNLOCK(pDevIns, pThisR3); Log3Func(("uAddress=%#x (cb=%u) with %#x. rc=%Rrc\n", uAddress, cb, u32Value, VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } /** * @callback_method_impl{FNDBGFHANDLERDEV} */ static DECLCALLBACK(void) iommuAmdR3DbgInfo(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) { PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); bool fVerbose; if ( pszArgs && !strncmp(pszArgs, RT_STR_TUPLE("verbose"))) fVerbose = true; else fVerbose = false; pHlp->pfnPrintf(pHlp, "AMD-IOMMU:\n"); /* Device Table Base Addresses (all segments). */ for (unsigned i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++) { DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i]; pHlp->pfnPrintf(pHlp, " Device Table BAR %u = %#RX64\n", i, DevTabBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Size = %#x (%u bytes)\n", DevTabBar.n.u9Size, IOMMU_GET_DEV_TAB_LEN(&DevTabBar)); pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT); } } /* Command Buffer Base Address Register. */ { CMD_BUF_BAR_T const CmdBufBar = pThis->CmdBufBaseAddr; uint8_t const uEncodedLen = CmdBufBar.n.u4Len; uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); pHlp->pfnPrintf(pHlp, " Command Buffer BAR = %#RX64\n", CmdBufBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", CmdBufBar.n.u40Base << X86_PAGE_4K_SHIFT); pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, cEntries, cbBuffer); } } /* Event Log Base Address Register. */ { EVT_LOG_BAR_T const EvtLogBar = pThis->EvtLogBaseAddr; uint8_t const uEncodedLen = EvtLogBar.n.u4Len; uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); pHlp->pfnPrintf(pHlp, " Event Log BAR = %#RX64\n", EvtLogBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", EvtLogBar.n.u40Base << X86_PAGE_4K_SHIFT); pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, cEntries, cbBuffer); } } /* IOMMU Control Register. */ { IOMMU_CTRL_T const Ctrl = pThis->Ctrl; pHlp->pfnPrintf(pHlp, " Control = %#RX64\n", Ctrl.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " IOMMU enable = %RTbool\n", Ctrl.n.u1IommuEn); pHlp->pfnPrintf(pHlp, " HT Tunnel translation enable = %RTbool\n", Ctrl.n.u1HtTunEn); pHlp->pfnPrintf(pHlp, " Event log enable = %RTbool\n", Ctrl.n.u1EvtLogEn); pHlp->pfnPrintf(pHlp, " Event log interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn); pHlp->pfnPrintf(pHlp, " Completion wait interrupt enable = %RTbool\n", Ctrl.n.u1EvtIntrEn); pHlp->pfnPrintf(pHlp, " Invalidation timeout = %u\n", Ctrl.n.u3InvTimeOut); pHlp->pfnPrintf(pHlp, " Pass posted write = %RTbool\n", Ctrl.n.u1PassPW); pHlp->pfnPrintf(pHlp, " Respose Pass posted write = %RTbool\n", Ctrl.n.u1ResPassPW); pHlp->pfnPrintf(pHlp, " Coherent = %RTbool\n", Ctrl.n.u1Coherent); pHlp->pfnPrintf(pHlp, " Isochronous = %RTbool\n", Ctrl.n.u1Isoc); pHlp->pfnPrintf(pHlp, " Command buffer enable = %RTbool\n", Ctrl.n.u1CmdBufEn); pHlp->pfnPrintf(pHlp, " PPR log enable = %RTbool\n", Ctrl.n.u1PprLogEn); pHlp->pfnPrintf(pHlp, " PPR interrupt enable = %RTbool\n", Ctrl.n.u1PprIntrEn); pHlp->pfnPrintf(pHlp, " PPR enable = %RTbool\n", Ctrl.n.u1PprEn); pHlp->pfnPrintf(pHlp, " Guest translation eanble = %RTbool\n", Ctrl.n.u1GstTranslateEn); pHlp->pfnPrintf(pHlp, " Guest virtual-APIC enable = %RTbool\n", Ctrl.n.u1GstVirtApicEn); pHlp->pfnPrintf(pHlp, " CRW = %#x\n", Ctrl.n.u4Crw); pHlp->pfnPrintf(pHlp, " SMI filter enable = %RTbool\n", Ctrl.n.u1SmiFilterEn); pHlp->pfnPrintf(pHlp, " Self-writeback disable = %RTbool\n", Ctrl.n.u1SelfWriteBackDis); pHlp->pfnPrintf(pHlp, " SMI filter log enable = %RTbool\n", Ctrl.n.u1SmiFilterLogEn); pHlp->pfnPrintf(pHlp, " Guest virtual-APIC mode enable = %#x\n", Ctrl.n.u3GstVirtApicModeEn); pHlp->pfnPrintf(pHlp, " Guest virtual-APIC GA log enable = %RTbool\n", Ctrl.n.u1GstLogEn); pHlp->pfnPrintf(pHlp, " Guest virtual-APIC interrupt enable = %RTbool\n", Ctrl.n.u1GstIntrEn); pHlp->pfnPrintf(pHlp, " Dual PPR log enable = %#x\n", Ctrl.n.u2DualPprLogEn); pHlp->pfnPrintf(pHlp, " Dual event log enable = %#x\n", Ctrl.n.u2DualEvtLogEn); pHlp->pfnPrintf(pHlp, " Device table segmentation enable = %#x\n", Ctrl.n.u3DevTabSegEn); pHlp->pfnPrintf(pHlp, " Privilege abort enable = %#x\n", Ctrl.n.u2PrivAbortEn); pHlp->pfnPrintf(pHlp, " PPR auto response enable = %RTbool\n", Ctrl.n.u1PprAutoRespEn); pHlp->pfnPrintf(pHlp, " MARC enable = %RTbool\n", Ctrl.n.u1MarcEn); pHlp->pfnPrintf(pHlp, " Block StopMark enable = %RTbool\n", Ctrl.n.u1BlockStopMarkEn); pHlp->pfnPrintf(pHlp, " PPR auto response always-on enable = %RTbool\n", Ctrl.n.u1PprAutoRespAlwaysOnEn); pHlp->pfnPrintf(pHlp, " Domain IDPNE = %RTbool\n", Ctrl.n.u1DomainIDPNE); pHlp->pfnPrintf(pHlp, " Enhanced PPR handling = %RTbool\n", Ctrl.n.u1EnhancedPpr); pHlp->pfnPrintf(pHlp, " Host page table access/dirty bit update = %#x\n", Ctrl.n.u2HstAccDirtyBitUpdate); pHlp->pfnPrintf(pHlp, " Guest page table dirty bit disable = %RTbool\n", Ctrl.n.u1GstDirtyUpdateDis); pHlp->pfnPrintf(pHlp, " x2APIC enable = %RTbool\n", Ctrl.n.u1X2ApicEn); pHlp->pfnPrintf(pHlp, " x2APIC interrupt enable = %RTbool\n", Ctrl.n.u1X2ApicIntrGenEn); pHlp->pfnPrintf(pHlp, " Guest page table access bit update = %RTbool\n", Ctrl.n.u1GstAccessUpdateDis); } } /* Exclusion Base Address Register. */ { IOMMU_EXCL_RANGE_BAR_T const ExclRangeBar = pThis->ExclRangeBaseAddr; pHlp->pfnPrintf(pHlp, " Exclusion BAR = %#RX64\n", ExclRangeBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Exclusion enable = %RTbool\n", ExclRangeBar.n.u1ExclEnable); pHlp->pfnPrintf(pHlp, " Allow all devices = %RTbool\n", ExclRangeBar.n.u1AllowAll); pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", ExclRangeBar.n.u40ExclRangeBase << X86_PAGE_4K_SHIFT); } } /* Exclusion Range Limit Register. */ { IOMMU_EXCL_RANGE_LIMIT_T const ExclRangeLimit = pThis->ExclRangeLimit; pHlp->pfnPrintf(pHlp, " Exclusion Range Limit = %#RX64\n", ExclRangeLimit.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Range limit = %#RX64\n", (ExclRangeLimit.n.u40ExclRangeLimit << X86_PAGE_4K_SHIFT) | X86_PAGE_4K_OFFSET_MASK); } } /* Extended Feature Register. */ { IOMMU_EXT_FEAT_T ExtFeat = pThis->ExtFeat; pHlp->pfnPrintf(pHlp, " Extended Feature Register = %#RX64\n", ExtFeat.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Prefetch support = %RTbool\n", ExtFeat.n.u1PrefetchSup); pHlp->pfnPrintf(pHlp, " PPR support = %RTbool\n", ExtFeat.n.u1PprSup); pHlp->pfnPrintf(pHlp, " x2APIC support = %RTbool\n", ExtFeat.n.u1X2ApicSup); pHlp->pfnPrintf(pHlp, " NX and privilege level support = %RTbool\n", ExtFeat.n.u1NoExecuteSup); pHlp->pfnPrintf(pHlp, " Guest translation support = %RTbool\n", ExtFeat.n.u1GstTranslateSup); pHlp->pfnPrintf(pHlp, " Invalidate-All command support = %RTbool\n", ExtFeat.n.u1InvAllSup); pHlp->pfnPrintf(pHlp, " Guest virtual-APIC support = %RTbool\n", ExtFeat.n.u1GstVirtApicSup); pHlp->pfnPrintf(pHlp, " Hardware error register support = %RTbool\n", ExtFeat.n.u1HwErrorSup); pHlp->pfnPrintf(pHlp, " Performance counters support = %RTbool\n", ExtFeat.n.u1PerfCounterSup); pHlp->pfnPrintf(pHlp, " Host address translation size = %#x\n", ExtFeat.n.u2HostAddrTranslateSize); pHlp->pfnPrintf(pHlp, " Guest address translation size = %#x\n", ExtFeat.n.u2GstAddrTranslateSize); pHlp->pfnPrintf(pHlp, " Guest CR3 root table level support = %#x\n", ExtFeat.n.u2GstCr3RootTblLevel); pHlp->pfnPrintf(pHlp, " SMI filter register support = %#x\n", ExtFeat.n.u2SmiFilterSup); pHlp->pfnPrintf(pHlp, " SMI filter register count = %#x\n", ExtFeat.n.u3SmiFilterCount); pHlp->pfnPrintf(pHlp, " Guest virtual-APIC modes support = %#x\n", ExtFeat.n.u3GstVirtApicModeSup); pHlp->pfnPrintf(pHlp, " Dual PPR log support = %#x\n", ExtFeat.n.u2DualPprLogSup); pHlp->pfnPrintf(pHlp, " Dual event log support = %#x\n", ExtFeat.n.u2DualEvtLogSup); pHlp->pfnPrintf(pHlp, " Maximum PASID = %#x\n", ExtFeat.n.u5MaxPasidSup); pHlp->pfnPrintf(pHlp, " User/supervisor page protection support = %RTbool\n", ExtFeat.n.u1UserSupervisorSup); pHlp->pfnPrintf(pHlp, " Device table segments supported = %#x (%u)\n", ExtFeat.n.u2DevTabSegSup, g_acDevTabSegs[ExtFeat.n.u2DevTabSegSup]); pHlp->pfnPrintf(pHlp, " PPR log overflow early warning support = %RTbool\n", ExtFeat.n.u1PprLogOverflowWarn); pHlp->pfnPrintf(pHlp, " PPR auto response support = %RTbool\n", ExtFeat.n.u1PprAutoRespSup); pHlp->pfnPrintf(pHlp, " MARC support = %#x\n", ExtFeat.n.u2MarcSup); pHlp->pfnPrintf(pHlp, " Block StopMark message support = %RTbool\n", ExtFeat.n.u1BlockStopMarkSup); pHlp->pfnPrintf(pHlp, " Performance optimization support = %RTbool\n", ExtFeat.n.u1PerfOptSup); pHlp->pfnPrintf(pHlp, " MSI capability MMIO access support = %RTbool\n", ExtFeat.n.u1MsiCapMmioSup); pHlp->pfnPrintf(pHlp, " Guest I/O protection support = %RTbool\n", ExtFeat.n.u1GstIoSup); pHlp->pfnPrintf(pHlp, " Host access support = %RTbool\n", ExtFeat.n.u1HostAccessSup); pHlp->pfnPrintf(pHlp, " Enhanced PPR handling support = %RTbool\n", ExtFeat.n.u1EnhancedPprSup); pHlp->pfnPrintf(pHlp, " Attribute forward supported = %RTbool\n", ExtFeat.n.u1AttrForwardSup); pHlp->pfnPrintf(pHlp, " Host dirty support = %RTbool\n", ExtFeat.n.u1HostDirtySup); pHlp->pfnPrintf(pHlp, " Invalidate IOTLB type support = %RTbool\n", ExtFeat.n.u1InvIoTlbTypeSup); pHlp->pfnPrintf(pHlp, " Guest page table access bit hw disable = %RTbool\n", ExtFeat.n.u1GstUpdateDisSup); pHlp->pfnPrintf(pHlp, " Force physical dest for remapped intr. = %RTbool\n", ExtFeat.n.u1ForcePhysDstSup); } } /* PPR Log Base Address Register. */ { PPR_LOG_BAR_T PprLogBar = pThis->PprLogBaseAddr; uint8_t const uEncodedLen = PprLogBar.n.u4Len; uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); pHlp->pfnPrintf(pHlp, " PPR Log BAR = %#RX64\n", PprLogBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", PprLogBar.n.u40Base << X86_PAGE_4K_SHIFT); pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, cEntries, cbBuffer); } } /* Hardware Event (Hi) Register. */ { IOMMU_HW_EVT_HI_T HwEvtHi = pThis->HwEvtHi; pHlp->pfnPrintf(pHlp, " Hardware Event (Hi) = %#RX64\n", HwEvtHi.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " First operand = %#RX64\n", HwEvtHi.n.u60FirstOperand); pHlp->pfnPrintf(pHlp, " Event code = %#RX8\n", HwEvtHi.n.u4EvtCode); } } /* Hardware Event (Lo) Register. */ pHlp->pfnPrintf(pHlp, " Hardware Event (Lo) = %#RX64\n", pThis->HwEvtLo); /* Hardware Event Status. */ { IOMMU_HW_EVT_STATUS_T HwEvtStatus = pThis->HwEvtStatus; pHlp->pfnPrintf(pHlp, " Hardware Event Status = %#RX64\n", HwEvtStatus.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Valid = %RTbool\n", HwEvtStatus.n.u1Valid); pHlp->pfnPrintf(pHlp, " Overflow = %RTbool\n", HwEvtStatus.n.u1Overflow); } } /* Guest Virtual-APIC Log Base Address Register. */ { GALOG_BAR_T const GALogBar = pThis->GALogBaseAddr; uint8_t const uEncodedLen = GALogBar.n.u4Len; uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); pHlp->pfnPrintf(pHlp, " Guest Log BAR = %#RX64\n", GALogBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", GALogBar.n.u40Base << X86_PAGE_4K_SHIFT); pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, cEntries, cbBuffer); } } /* Guest Virtual-APIC Log Tail Address Register. */ { GALOG_TAIL_ADDR_T GALogTail = pThis->GALogTailAddr; pHlp->pfnPrintf(pHlp, " Guest Log Tail Address = %#RX64\n", GALogTail.u64); if (fVerbose) pHlp->pfnPrintf(pHlp, " Tail address = %#RX64\n", GALogTail.n.u40GALogTailAddr); } /* PPR Log B Base Address Register. */ { PPR_LOG_B_BAR_T PprLogBBar = pThis->PprLogBBaseAddr; uint8_t const uEncodedLen = PprLogBBar.n.u4Len; uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); pHlp->pfnPrintf(pHlp, " PPR Log B BAR = %#RX64\n", PprLogBBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", PprLogBBar.n.u40Base << X86_PAGE_4K_SHIFT); pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, cEntries, cbBuffer); } } /* Event Log B Base Address Register. */ { EVT_LOG_B_BAR_T EvtLogBBar = pThis->EvtLogBBaseAddr; uint8_t const uEncodedLen = EvtLogBBar.n.u4Len; uint32_t const cEntries = iommuAmdGetBufMaxEntries(uEncodedLen); uint32_t const cbBuffer = iommuAmdGetTotalBufLength(uEncodedLen); pHlp->pfnPrintf(pHlp, " Event Log B BAR = %#RX64\n", EvtLogBBar.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Base address = %#RX64\n", EvtLogBBar.n.u40Base << X86_PAGE_4K_SHIFT); pHlp->pfnPrintf(pHlp, " Length = %u (%u entries, %u bytes)\n", uEncodedLen, cEntries, cbBuffer); } } /* Device-Specific Feature Extension Register. */ { DEV_SPECIFIC_FEAT_T const DevSpecificFeat = pThis->DevSpecificFeat; pHlp->pfnPrintf(pHlp, " Device-specific Feature = %#RX64\n", DevSpecificFeat.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Feature = %#RX32\n", DevSpecificFeat.n.u24DevSpecFeat); pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificFeat.n.u4RevMinor); pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificFeat.n.u4RevMajor); } } /* Device-Specific Control Extension Register. */ { DEV_SPECIFIC_CTRL_T const DevSpecificCtrl = pThis->DevSpecificCtrl; pHlp->pfnPrintf(pHlp, " Device-specific Control = %#RX64\n", DevSpecificCtrl.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Control = %#RX32\n", DevSpecificCtrl.n.u24DevSpecCtrl); pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificCtrl.n.u4RevMinor); pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificCtrl.n.u4RevMajor); } } /* Device-Specific Status Extension Register. */ { DEV_SPECIFIC_STATUS_T const DevSpecificStatus = pThis->DevSpecificStatus; pHlp->pfnPrintf(pHlp, " Device-specific Status = %#RX64\n", DevSpecificStatus.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Status = %#RX32\n", DevSpecificStatus.n.u24DevSpecStatus); pHlp->pfnPrintf(pHlp, " Minor revision ID = %#x\n", DevSpecificStatus.n.u4RevMinor); pHlp->pfnPrintf(pHlp, " Major revision ID = %#x\n", DevSpecificStatus.n.u4RevMajor); } } /* Miscellaneous Information Register (Lo and Hi). */ { MSI_MISC_INFO_T const MiscInfo = pThis->MiscInfo; pHlp->pfnPrintf(pHlp, " Misc. Info. Register = %#RX64\n", MiscInfo.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Event Log MSI number = %#x\n", MiscInfo.n.u5MsiNumEvtLog); pHlp->pfnPrintf(pHlp, " Guest Virtual-Address Size = %#x\n", MiscInfo.n.u3GstVirtAddrSize); pHlp->pfnPrintf(pHlp, " Physical Address Size = %#x\n", MiscInfo.n.u7PhysAddrSize); pHlp->pfnPrintf(pHlp, " Virtual-Address Size = %#x\n", MiscInfo.n.u7VirtAddrSize); pHlp->pfnPrintf(pHlp, " HT Transport ATS Range Reserved = %RTbool\n", MiscInfo.n.u1HtAtsResv); pHlp->pfnPrintf(pHlp, " PPR MSI number = %#x\n", MiscInfo.n.u5MsiNumPpr); pHlp->pfnPrintf(pHlp, " GA Log MSI number = %#x\n", MiscInfo.n.u5MsiNumGa); } } /* MSI Capability Header. */ { MSI_CAP_HDR_T MsiCapHdr; MsiCapHdr.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_CAP_HDR); pHlp->pfnPrintf(pHlp, " MSI Capability Header = %#RX32\n", MsiCapHdr.u32); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiCapHdr.n.u8MsiCapId); pHlp->pfnPrintf(pHlp, " Capability Ptr (PCI config offset) = %#x\n", MsiCapHdr.n.u8MsiCapPtr); pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", MsiCapHdr.n.u1MsiEnable); pHlp->pfnPrintf(pHlp, " Multi-message capability = %#x\n", MsiCapHdr.n.u3MsiMultiMessCap); pHlp->pfnPrintf(pHlp, " Multi-message enable = %#x\n", MsiCapHdr.n.u3MsiMultiMessEn); } } /* MSI Address Register (Lo and Hi). */ { uint32_t const uMsiAddrLo = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO); uint32_t const uMsiAddrHi = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI); MSIADDR MsiAddr; MsiAddr.u64 = RT_MAKE_U64(uMsiAddrLo, uMsiAddrHi); pHlp->pfnPrintf(pHlp, " MSI Address = %#RX64\n", MsiAddr.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Destination mode = %#x\n", MsiAddr.n.u1DestMode); pHlp->pfnPrintf(pHlp, " Redirection hint = %#x\n", MsiAddr.n.u1RedirHint); pHlp->pfnPrintf(pHlp, " Destination Id = %#x\n", MsiAddr.n.u8DestId); pHlp->pfnPrintf(pHlp, " Address = %#RX32\n", MsiAddr.n.u12Addr); pHlp->pfnPrintf(pHlp, " Address (Hi) / Rsvd? = %#RX32\n", MsiAddr.n.u32Rsvd0); } } /* MSI Data. */ { MSIDATA MsiData; MsiData.u32 = PDMPciDevGetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA); pHlp->pfnPrintf(pHlp, " MSI Data = %#RX32\n", MsiData.u32); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", MsiData.n.u8Vector, MsiData.n.u8Vector); pHlp->pfnPrintf(pHlp, " Delivery mode = %#x\n", MsiData.n.u3DeliveryMode); pHlp->pfnPrintf(pHlp, " Level = %#x\n", MsiData.n.u1Level); pHlp->pfnPrintf(pHlp, " Trigger mode = %s\n", MsiData.n.u1TriggerMode ? "level" : "edge"); } } /* MSI Mapping Capability Header (HyperTransport, reporting all 0s currently). */ { MSI_MAP_CAP_HDR_T MsiMapCapHdr; MsiMapCapHdr.u32 = 0; pHlp->pfnPrintf(pHlp, " MSI Mapping Capability Header = %#RX32\n", MsiMapCapHdr.u32); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Capability ID = %#x\n", MsiMapCapHdr.n.u8MsiMapCapId); pHlp->pfnPrintf(pHlp, " Map enable = %RTbool\n", MsiMapCapHdr.n.u1MsiMapEn); pHlp->pfnPrintf(pHlp, " Map fixed = %RTbool\n", MsiMapCapHdr.n.u1MsiMapFixed); pHlp->pfnPrintf(pHlp, " Map capability type = %#x\n", MsiMapCapHdr.n.u5MapCapType); } } /* Performance Optimization Control Register. */ { IOMMU_PERF_OPT_CTRL_T const PerfOptCtrl = pThis->PerfOptCtrl; pHlp->pfnPrintf(pHlp, " Performance Optimization Control = %#RX32\n", PerfOptCtrl.u32); if (fVerbose) pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PerfOptCtrl.n.u1PerfOptEn); } /* XT (x2APIC) General Interrupt Control Register. */ { IOMMU_XT_GEN_INTR_CTRL_T const XtGenIntrCtrl = pThis->XtGenIntrCtrl; pHlp->pfnPrintf(pHlp, " XT General Interrupt Control = %#RX64\n", XtGenIntrCtrl.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n", !XtGenIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical"); pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n", RT_MAKE_U64(XtGenIntrCtrl.n.u24X2ApicIntrDstLo, XtGenIntrCtrl.n.u7X2ApicIntrDstHi)); pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGenIntrCtrl.n.u8X2ApicIntrVector); pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n", !XtGenIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated"); } } /* XT (x2APIC) PPR Interrupt Control Register. */ { IOMMU_XT_PPR_INTR_CTRL_T const XtPprIntrCtrl = pThis->XtPprIntrCtrl; pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtPprIntrCtrl.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n", !XtPprIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical"); pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n", RT_MAKE_U64(XtPprIntrCtrl.n.u24X2ApicIntrDstLo, XtPprIntrCtrl.n.u7X2ApicIntrDstHi)); pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtPprIntrCtrl.n.u8X2ApicIntrVector); pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n", !XtPprIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated"); } } /* XT (X2APIC) GA Log Interrupt Control Register. */ { IOMMU_XT_GALOG_INTR_CTRL_T const XtGALogIntrCtrl = pThis->XtGALogIntrCtrl; pHlp->pfnPrintf(pHlp, " XT PPR Interrupt Control = %#RX64\n", XtGALogIntrCtrl.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Interrupt destination mode = %s\n", !XtGALogIntrCtrl.n.u1X2ApicIntrDstMode ? "physical" : "logical"); pHlp->pfnPrintf(pHlp, " Interrupt destination = %#RX64\n", RT_MAKE_U64(XtGALogIntrCtrl.n.u24X2ApicIntrDstLo, XtGALogIntrCtrl.n.u7X2ApicIntrDstHi)); pHlp->pfnPrintf(pHlp, " Interrupt vector = %#x\n", XtGALogIntrCtrl.n.u8X2ApicIntrVector); pHlp->pfnPrintf(pHlp, " Interrupt delivery mode = %s\n", !XtGALogIntrCtrl.n.u8X2ApicIntrVector ? "fixed" : "arbitrated"); } } /* MARC Registers. */ { for (unsigned i = 0; i < RT_ELEMENTS(pThis->aMarcApers); i++) { pHlp->pfnPrintf(pHlp, " MARC Aperature %u:\n", i); MARC_APER_BAR_T const MarcAperBar = pThis->aMarcApers[i].Base; pHlp->pfnPrintf(pHlp, " Base = %#RX64\n", MarcAperBar.n.u40MarcBaseAddr << X86_PAGE_4K_SHIFT); MARC_APER_RELOC_T const MarcAperReloc = pThis->aMarcApers[i].Reloc; pHlp->pfnPrintf(pHlp, " Reloc = %#RX64 (addr: %#RX64, read-only: %RTbool, enable: %RTbool)\n", MarcAperReloc.u64, MarcAperReloc.n.u40MarcRelocAddr << X86_PAGE_4K_SHIFT, MarcAperReloc.n.u1ReadOnly, MarcAperReloc.n.u1RelocEn); MARC_APER_LEN_T const MarcAperLen = pThis->aMarcApers[i].Length; pHlp->pfnPrintf(pHlp, " Length = %u pages\n", MarcAperLen.n.u40MarcLength); } } /* Reserved Register. */ pHlp->pfnPrintf(pHlp, " Reserved Register = %#RX64\n", pThis->RsvdReg); /* Command Buffer Head Pointer Register. */ { CMD_BUF_HEAD_PTR_T const CmdBufHeadPtr = pThis->CmdBufHeadPtr; pHlp->pfnPrintf(pHlp, " Command Buffer Head Pointer = %#RX64 (off: %#x)\n", CmdBufHeadPtr.u64, CmdBufHeadPtr.n.off); } /* Command Buffer Tail Pointer Register. */ { CMD_BUF_HEAD_PTR_T const CmdBufTailPtr = pThis->CmdBufTailPtr; pHlp->pfnPrintf(pHlp, " Command Buffer Tail Pointer = %#RX64 (off: %#x)\n", CmdBufTailPtr.u64, CmdBufTailPtr.n.off); } /* Event Log Head Pointer Register. */ { EVT_LOG_HEAD_PTR_T const EvtLogHeadPtr = pThis->EvtLogHeadPtr; pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogHeadPtr.u64, EvtLogHeadPtr.n.off); } /* Event Log Tail Pointer Register. */ { EVT_LOG_TAIL_PTR_T const EvtLogTailPtr = pThis->EvtLogTailPtr; pHlp->pfnPrintf(pHlp, " Event Log Head Pointer = %#RX64 (off: %#x)\n", EvtLogTailPtr.u64, EvtLogTailPtr.n.off); } /* Status Register. */ { IOMMU_STATUS_T const Status = pThis->Status; pHlp->pfnPrintf(pHlp, " Status Register = %#RX64\n", Status.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Event log overflow = %RTbool\n", Status.n.u1EvtOverflow); pHlp->pfnPrintf(pHlp, " Event log interrupt = %RTbool\n", Status.n.u1EvtLogIntr); pHlp->pfnPrintf(pHlp, " Completion wait interrupt = %RTbool\n", Status.n.u1CompWaitIntr); pHlp->pfnPrintf(pHlp, " Event log running = %RTbool\n", Status.n.u1EvtLogRunning); pHlp->pfnPrintf(pHlp, " Command buffer running = %RTbool\n", Status.n.u1CmdBufRunning); pHlp->pfnPrintf(pHlp, " PPR overflow = %RTbool\n", Status.n.u1PprOverflow); pHlp->pfnPrintf(pHlp, " PPR interrupt = %RTbool\n", Status.n.u1PprIntr); pHlp->pfnPrintf(pHlp, " PPR log running = %RTbool\n", Status.n.u1PprLogRunning); pHlp->pfnPrintf(pHlp, " Guest log running = %RTbool\n", Status.n.u1GstLogRunning); pHlp->pfnPrintf(pHlp, " Guest log interrupt = %RTbool\n", Status.n.u1GstLogIntr); pHlp->pfnPrintf(pHlp, " PPR log B overflow = %RTbool\n", Status.n.u1PprOverflowB); pHlp->pfnPrintf(pHlp, " PPR log active = %RTbool\n", Status.n.u1PprLogActive); pHlp->pfnPrintf(pHlp, " Event log B overflow = %RTbool\n", Status.n.u1EvtOverflowB); pHlp->pfnPrintf(pHlp, " Event log active = %RTbool\n", Status.n.u1EvtLogActive); pHlp->pfnPrintf(pHlp, " PPR log B overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarlyB); pHlp->pfnPrintf(pHlp, " PPR log overflow early warning = %RTbool\n", Status.n.u1PprOverflowEarly); } } /* PPR Log Head Pointer. */ { PPR_LOG_HEAD_PTR_T const PprLogHeadPtr = pThis->PprLogHeadPtr; pHlp->pfnPrintf(pHlp, " PPR Log Head Pointer = %#RX64 (off: %#x)\n", PprLogHeadPtr.u64, PprLogHeadPtr.n.off); } /* PPR Log Tail Pointer. */ { PPR_LOG_TAIL_PTR_T const PprLogTailPtr = pThis->PprLogTailPtr; pHlp->pfnPrintf(pHlp, " PPR Log Tail Pointer = %#RX64 (off: %#x)\n", PprLogTailPtr.u64, PprLogTailPtr.n.off); } /* Guest Virtual-APIC Log Head Pointer. */ { GALOG_HEAD_PTR_T const GALogHeadPtr = pThis->GALogHeadPtr; pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Head Pointer = %#RX64 (off: %#x)\n", GALogHeadPtr.u64, GALogHeadPtr.n.u12GALogPtr); } /* Guest Virtual-APIC Log Tail Pointer. */ { GALOG_HEAD_PTR_T const GALogTailPtr = pThis->GALogTailPtr; pHlp->pfnPrintf(pHlp, " Guest Virtual-APIC Log Tail Pointer = %#RX64 (off: %#x)\n", GALogTailPtr.u64, GALogTailPtr.n.u12GALogPtr); } /* PPR Log B Head Pointer. */ { PPR_LOG_B_HEAD_PTR_T const PprLogBHeadPtr = pThis->PprLogBHeadPtr; pHlp->pfnPrintf(pHlp, " PPR Log B Head Pointer = %#RX64 (off: %#x)\n", PprLogBHeadPtr.u64, PprLogBHeadPtr.n.off); } /* PPR Log B Tail Pointer. */ { PPR_LOG_B_TAIL_PTR_T const PprLogBTailPtr = pThis->PprLogBTailPtr; pHlp->pfnPrintf(pHlp, " PPR Log B Tail Pointer = %#RX64 (off: %#x)\n", PprLogBTailPtr.u64, PprLogBTailPtr.n.off); } /* Event Log B Head Pointer. */ { EVT_LOG_B_HEAD_PTR_T const EvtLogBHeadPtr = pThis->EvtLogBHeadPtr; pHlp->pfnPrintf(pHlp, " Event Log B Head Pointer = %#RX64 (off: %#x)\n", EvtLogBHeadPtr.u64, EvtLogBHeadPtr.n.off); } /* Event Log B Tail Pointer. */ { EVT_LOG_B_TAIL_PTR_T const EvtLogBTailPtr = pThis->EvtLogBTailPtr; pHlp->pfnPrintf(pHlp, " Event Log B Tail Pointer = %#RX64 (off: %#x)\n", EvtLogBTailPtr.u64, EvtLogBTailPtr.n.off); } /* PPR Log Auto Response Register. */ { PPR_LOG_AUTO_RESP_T const PprLogAutoResp = pThis->PprLogAutoResp; pHlp->pfnPrintf(pHlp, " PPR Log Auto Response Register = %#RX64\n", PprLogAutoResp.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Code = %#x\n", PprLogAutoResp.n.u4AutoRespCode); pHlp->pfnPrintf(pHlp, " Mask Gen. = %RTbool\n", PprLogAutoResp.n.u1AutoRespMaskGen); } } /* PPR Log Overflow Early Warning Indicator Register. */ { PPR_LOG_OVERFLOW_EARLY_T const PprLogOverflowEarly = pThis->PprLogOverflowEarly; pHlp->pfnPrintf(pHlp, " PPR Log overflow early warning = %#RX64\n", PprLogOverflowEarly.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogOverflowEarly.n.u15Threshold); pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogOverflowEarly.n.u1IntrEn); pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogOverflowEarly.n.u1Enable); } } /* PPR Log Overflow Early Warning Indicator Register. */ { PPR_LOG_OVERFLOW_EARLY_T const PprLogBOverflowEarly = pThis->PprLogBOverflowEarly; pHlp->pfnPrintf(pHlp, " PPR Log B overflow early warning = %#RX64\n", PprLogBOverflowEarly.u64); if (fVerbose) { pHlp->pfnPrintf(pHlp, " Threshold = %#x\n", PprLogBOverflowEarly.n.u15Threshold); pHlp->pfnPrintf(pHlp, " Interrupt enable = %RTbool\n", PprLogBOverflowEarly.n.u1IntrEn); pHlp->pfnPrintf(pHlp, " Enable = %RTbool\n", PprLogBOverflowEarly.n.u1Enable); } } } /** * Dumps the DTE via the info callback helper. * * @param pHlp The info helper. * @param pDte The device table entry. * @param pszPrefix The string prefix. */ static void iommuAmdR3DbgInfoDteWorker(PCDBGFINFOHLP pHlp, PCDTE_T pDte, const char *pszPrefix) { AssertReturnVoid(pHlp); AssertReturnVoid(pDte); AssertReturnVoid(pszPrefix); pHlp->pfnPrintf(pHlp, "%sValid = %RTbool\n", pszPrefix, pDte->n.u1Valid); pHlp->pfnPrintf(pHlp, "%sTranslation Valid = %RTbool\n", pszPrefix, pDte->n.u1TranslationValid); pHlp->pfnPrintf(pHlp, "%sHost Access Dirty = %#x\n", pszPrefix, pDte->n.u2Had); pHlp->pfnPrintf(pHlp, "%sPaging Mode = %u\n", pszPrefix, pDte->n.u3Mode); pHlp->pfnPrintf(pHlp, "%sPage Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix, pDte->n.u40PageTableRootPtrLo, pDte->n.u40PageTableRootPtrLo << 12); pHlp->pfnPrintf(pHlp, "%sPPR enable = %RTbool\n", pszPrefix, pDte->n.u1Ppr); pHlp->pfnPrintf(pHlp, "%sGuest PPR Resp w/ PASID = %RTbool\n", pszPrefix, pDte->n.u1GstPprRespPasid); pHlp->pfnPrintf(pHlp, "%sGuest I/O Prot Valid = %RTbool\n", pszPrefix, pDte->n.u1GstIoValid); pHlp->pfnPrintf(pHlp, "%sGuest Translation Valid = %RTbool\n", pszPrefix, pDte->n.u1GstTranslateValid); pHlp->pfnPrintf(pHlp, "%sGuest Levels Translated = %#x\n", pszPrefix, pDte->n.u2GstMode); pHlp->pfnPrintf(pHlp, "%sGuest Root Page Table Ptr = %#x %#x %#x (addr=%#RGp)\n", pszPrefix, pDte->n.u3GstCr3TableRootPtrLo, pDte->n.u16GstCr3TableRootPtrMid, pDte->n.u21GstCr3TableRootPtrHi, (pDte->n.u21GstCr3TableRootPtrHi << 31) | (pDte->n.u16GstCr3TableRootPtrMid << 15) | (pDte->n.u3GstCr3TableRootPtrLo << 12)); pHlp->pfnPrintf(pHlp, "%sI/O Read = %s\n", pszPrefix, pDte->n.u1IoRead ? "allowed" : "denied"); pHlp->pfnPrintf(pHlp, "%sI/O Write = %s\n", pszPrefix, pDte->n.u1IoWrite ? "allowed" : "denied"); pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd0); pHlp->pfnPrintf(pHlp, "%sDomain ID = %u (%#x)\n", pszPrefix, pDte->n.u16DomainId, pDte->n.u16DomainId); pHlp->pfnPrintf(pHlp, "%sIOTLB Enable = %RTbool\n", pszPrefix, pDte->n.u1IoTlbEnable); pHlp->pfnPrintf(pHlp, "%sSuppress I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressPfEvents); pHlp->pfnPrintf(pHlp, "%sSuppress all I/O PFs = %RTbool\n", pszPrefix, pDte->n.u1SuppressAllPfEvents); pHlp->pfnPrintf(pHlp, "%sPort I/O Control = %#x\n", pszPrefix, pDte->n.u2IoCtl); pHlp->pfnPrintf(pHlp, "%sIOTLB Cache Hint = %s\n", pszPrefix, pDte->n.u1Cache ? "no caching" : "cache"); pHlp->pfnPrintf(pHlp, "%sSnoop Disable = %RTbool\n", pszPrefix, pDte->n.u1SnoopDisable); pHlp->pfnPrintf(pHlp, "%sAllow Exclusion = %RTbool\n", pszPrefix, pDte->n.u1AllowExclusion); pHlp->pfnPrintf(pHlp, "%sSysMgt Message Enable = %RTbool\n", pszPrefix, pDte->n.u2SysMgt); pHlp->pfnPrintf(pHlp, "%sInterrupt Map Valid = %RTbool\n", pszPrefix, pDte->n.u1IntrMapValid); uint8_t const uIntrTabLen = pDte->n.u4IntrTableLength; if (uIntrTabLen < IOMMU_DTE_INTR_TAB_LEN_MAX) { uint16_t const cEntries = IOMMU_DTE_GET_INTR_TAB_ENTRIES(pDte); uint16_t const cbIntrTable = IOMMU_DTE_GET_INTR_TAB_LEN(pDte); pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (%u entries, %u bytes)\n", pszPrefix, uIntrTabLen, cEntries, cbIntrTable); } else pHlp->pfnPrintf(pHlp, "%sInterrupt Table Length = %#x (invalid!)\n", pszPrefix, uIntrTabLen); pHlp->pfnPrintf(pHlp, "%sIgnore Unmapped Interrupts = %RTbool\n", pszPrefix, pDte->n.u1IgnoreUnmappedIntrs); pHlp->pfnPrintf(pHlp, "%sInterrupt Table Root Ptr = %#RX64 (addr=%#RGp)\n", pszPrefix, pDte->n.u46IntrTableRootPtr, pDte->au64[2] & IOMMU_DTE_IRTE_ROOT_PTR_MASK); pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u4Rsvd0); pHlp->pfnPrintf(pHlp, "%sINIT passthru = %RTbool\n", pszPrefix, pDte->n.u1InitPassthru); pHlp->pfnPrintf(pHlp, "%sExtInt passthru = %RTbool\n", pszPrefix, pDte->n.u1ExtIntPassthru); pHlp->pfnPrintf(pHlp, "%sNMI passthru = %RTbool\n", pszPrefix, pDte->n.u1NmiPassthru); pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u1Rsvd2); pHlp->pfnPrintf(pHlp, "%sInterrupt Control = %#x\n", pszPrefix, pDte->n.u2IntrCtrl); pHlp->pfnPrintf(pHlp, "%sLINT0 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint0Passthru); pHlp->pfnPrintf(pHlp, "%sLINT1 passthru = %RTbool\n", pszPrefix, pDte->n.u1Lint1Passthru); pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u32Rsvd0); pHlp->pfnPrintf(pHlp, "%sReserved (MBZ) = %#x\n", pszPrefix, pDte->n.u22Rsvd0); pHlp->pfnPrintf(pHlp, "%sAttribute Override Valid = %RTbool\n", pszPrefix, pDte->n.u1AttrOverride); pHlp->pfnPrintf(pHlp, "%sMode0FC = %#x\n", pszPrefix, pDte->n.u1Mode0FC); pHlp->pfnPrintf(pHlp, "%sSnoop Attribute = %#x\n", pszPrefix, pDte->n.u8SnoopAttr); pHlp->pfnPrintf(pHlp, "\n"); } /** * @callback_method_impl{FNDBGFHANDLERDEV} */ static DECLCALLBACK(void) iommuAmdR3DbgInfoDte(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) { if (pszArgs) { uint16_t idDevice = 0; int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &idDevice); if (RT_SUCCESS(rc)) { DTE_T Dte; rc = iommuAmdDteRead(pDevIns, idDevice, IOMMUOP_TRANSLATE_REQ, &Dte); if (RT_SUCCESS(rc)) { pHlp->pfnPrintf(pHlp, "DTE for device %#x\n", idDevice); iommuAmdR3DbgInfoDteWorker(pHlp, &Dte, " "); return; } pHlp->pfnPrintf(pHlp, "Failed to read DTE for device ID %u (%#x). rc=%Rrc\n", idDevice, idDevice, rc); } else pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit device ID. rc=%Rrc\n", rc); } else pHlp->pfnPrintf(pHlp, "Missing device ID.\n"); } # ifdef IOMMU_WITH_DTE_CACHE /** * @callback_method_impl{FNDBGFHANDLERDEV} */ static DECLCALLBACK(void) iommuAmdR3DbgInfoDteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) { RT_NOREF(pszArgs); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const cDteCache = RT_ELEMENTS(pThis->aDeviceIds); pHlp->pfnPrintf(pHlp, "DTE Cache: Capacity=%u entries\n", cDteCache); for (uint16_t i = 0; i < cDteCache; i++) { uint16_t const idDevice = pThis->aDeviceIds[i]; if (idDevice) { pHlp->pfnPrintf(pHlp, " Entry[%u]: Device=%#x (BDF %02x:%02x.%d)\n", i, idDevice, (idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK, (idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK, idDevice & VBOX_PCI_DEVFN_FUN_MASK); PCDTECACHE pDteCache = &pThis->aDteCache[i]; pHlp->pfnPrintf(pHlp, " Flags = %#x\n", pDteCache->fFlags); pHlp->pfnPrintf(pHlp, " Domain Id = %u\n", pDteCache->idDomain); pHlp->pfnPrintf(pHlp, "\n"); } } IOMMU_CACHE_UNLOCK(pDevIns, pThis); } # endif /* IOMMU_WITH_DTE_CACHE */ # ifdef IOMMU_WITH_IOTLBE_CACHE /** * @callback_method_impl{FNDBGFHANDLERDEV} */ static DECLCALLBACK(void) iommuAmdR3DbgInfoIotlb(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) { if (pszArgs) { uint16_t idDomain = 0; int rc = RTStrToUInt16Full(pszArgs, 0 /* uBase */, &idDomain); if (RT_SUCCESS(rc)) { pHlp->pfnPrintf(pHlp, "IOTLBEs for domain %u (%#x):\n", idDomain, idDomain); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); IOTLBEINFOARG Args; Args.pIommuR3 = pThisR3; Args.pHlp = pHlp; Args.idDomain = idDomain; IOMMU_CACHE_LOCK(pDevIns, pThis); RTAvlU64DoWithAll(&pThisR3->TreeIotlbe, true /* fFromLeft */, iommuAmdR3IotlbEntryInfo, &Args); IOMMU_CACHE_UNLOCK(pDevIns, pThis); } else pHlp->pfnPrintf(pHlp, "Failed to parse a valid 16-bit domain ID. rc=%Rrc\n", rc); } else pHlp->pfnPrintf(pHlp, "Missing domain ID.\n"); } # endif /* IOMMU_WITH_IOTLBE_CACHE */ # ifdef IOMMU_WITH_IRTE_CACHE /** * Gets the interrupt type name for an interrupt type in the IRTE. * * @returns The interrupt type name. * @param uIntrType The interrupt type (as specified in the IRTE). */ static const char *iommuAmdIrteGetIntrTypeName(uint8_t uIntrType) { switch (uIntrType) { case VBOX_MSI_DELIVERY_MODE_FIXED: return "Fixed"; case VBOX_MSI_DELIVERY_MODE_LOWEST_PRIO: return "Arbitrated"; default: return ""; } } /** * @callback_method_impl{FNDBGFHANDLERDEV} */ static DECLCALLBACK(void) iommuAmdR3DbgInfoIrteCache(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) { RT_NOREF(pszArgs); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); IOMMU_CACHE_LOCK(pDevIns, pThis); uint16_t const cIrteCache = RT_ELEMENTS(pThis->aIrteCache); pHlp->pfnPrintf(pHlp, "IRTE Cache: Capacity=%u entries\n", cIrteCache); for (uint16_t idxIrte = 0; idxIrte < cIrteCache; idxIrte++) { PCIRTECACHE pIrteCache = &pThis->aIrteCache[idxIrte]; uint32_t const uKey = pIrteCache->uKey; if (uKey != IOMMU_IRTE_CACHE_KEY_NIL) { uint16_t const idDevice = IOMMU_IRTE_CACHE_KEY_GET_DEVICE_ID(uKey); uint16_t const offIrte = IOMMU_IRTE_CACHE_KEY_GET_OFF(uKey); pHlp->pfnPrintf(pHlp, " Entry[%u]: Offset=%#x Device=%#x (BDF %02x:%02x.%d)\n", idxIrte, offIrte, idDevice, (idDevice >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK, (idDevice >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK, idDevice & VBOX_PCI_DEVFN_FUN_MASK); PCIRTE_T pIrte = &pIrteCache->Irte; pHlp->pfnPrintf(pHlp, " Remap Enable = %RTbool\n", pIrte->n.u1RemapEnable); pHlp->pfnPrintf(pHlp, " Suppress IOPF = %RTbool\n", pIrte->n.u1SuppressIoPf); pHlp->pfnPrintf(pHlp, " Interrupt Type = %#x (%s)\n", pIrte->n.u3IntrType, iommuAmdIrteGetIntrTypeName(pIrte->n.u3IntrType)); pHlp->pfnPrintf(pHlp, " Request EOI = %RTbool\n", pIrte->n.u1ReqEoi); pHlp->pfnPrintf(pHlp, " Destination mode = %s\n", pIrte->n.u1DestMode ? "Logical" : "Physical"); pHlp->pfnPrintf(pHlp, " Destination Id = %u\n", pIrte->n.u8Dest); pHlp->pfnPrintf(pHlp, " Vector = %#x (%u)\n", pIrte->n.u8Vector, pIrte->n.u8Vector); pHlp->pfnPrintf(pHlp, "\n"); } } IOMMU_CACHE_UNLOCK(pDevIns, pThis); } # endif /* IOMMU_WITH_IRTE_CACHE */ /** * @callback_method_impl{FNDBGFHANDLERDEV} */ static DECLCALLBACK(void) iommuAmdR3DbgInfoDevTabs(PPDMDEVINS pDevIns, PCDBGFINFOHLP pHlp, const char *pszArgs) { RT_NOREF(pszArgs); PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PCPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); NOREF(pPciDev); uint8_t cSegments = 0; for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++) { DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i]; RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT; if (GCPhysDevTab) ++cSegments; } pHlp->pfnPrintf(pHlp, "AMD-IOMMU device tables with address translations enabled:\n"); pHlp->pfnPrintf(pHlp, " DTE Segments=%u\n", cSegments); if (!cSegments) return; for (uint8_t i = 0; i < RT_ELEMENTS(pThis->aDevTabBaseAddrs); i++) { DEV_TAB_BAR_T const DevTabBar = pThis->aDevTabBaseAddrs[i]; RTGCPHYS const GCPhysDevTab = DevTabBar.n.u40Base << X86_PAGE_4K_SHIFT; if (GCPhysDevTab) { uint32_t const cbDevTab = IOMMU_GET_DEV_TAB_LEN(&DevTabBar); uint32_t const cDtes = cbDevTab / sizeof(DTE_T); void *pvDevTab = RTMemAllocZ(cbDevTab); if (RT_LIKELY(pvDevTab)) { int rc = PDMDevHlpPCIPhysRead(pDevIns, GCPhysDevTab, pvDevTab, cbDevTab); if (RT_SUCCESS(rc)) { for (uint32_t idxDte = 0; idxDte < cDtes; idxDte++) { PCDTE_T pDte = (PCDTE_T)((uintptr_t)pvDevTab + idxDte * sizeof(DTE_T)); if ( pDte->n.u1Valid && pDte->n.u1TranslationValid && pDte->n.u3Mode != 0) { pHlp->pfnPrintf(pHlp, " DTE %u (BDF %02x:%02x.%d)\n", idxDte, (idxDte >> VBOX_PCI_BUS_SHIFT) & VBOX_PCI_BUS_MASK, (idxDte >> VBOX_PCI_DEVFN_DEV_SHIFT) & VBOX_PCI_DEVFN_DEV_MASK, idxDte & VBOX_PCI_DEVFN_FUN_MASK); iommuAmdR3DbgInfoDteWorker(pHlp, pDte, " "); pHlp->pfnPrintf(pHlp, "\n"); } } pHlp->pfnPrintf(pHlp, "\n"); } else { pHlp->pfnPrintf(pHlp, " Failed to read table at %#RGp of size %zu bytes. rc=%Rrc!\n", GCPhysDevTab, cbDevTab, rc); } RTMemFree(pvDevTab); } else { pHlp->pfnPrintf(pHlp, " Allocating %zu bytes for reading the device table failed!\n", cbDevTab); return; } } } } /** * @callback_method_impl{FNSSMDEVSAVEEXEC} */ static DECLCALLBACK(int) iommuAmdR3SaveExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) { PCIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; LogFlowFunc(("\n")); /* First, save ExtFeat and other registers that cannot be modified by the guest. */ pHlp->pfnSSMPutU64(pSSM, pThis->ExtFeat.u64); pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificFeat.u64); pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificCtrl.u64); pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificStatus.u64); pHlp->pfnSSMPutU64(pSSM, pThis->MiscInfo.u64); pHlp->pfnSSMPutU64(pSSM, pThis->RsvdReg); /* Next, save all registers that can be modified by the guest. */ pHlp->pfnSSMPutU64(pSSM, pThis->IommuBar.u64); uint8_t const cDevTabBaseAddrs = RT_ELEMENTS(pThis->aDevTabBaseAddrs); pHlp->pfnSSMPutU8(pSSM, cDevTabBaseAddrs); for (uint8_t i = 0; i < cDevTabBaseAddrs; i++) pHlp->pfnSSMPutU64(pSSM, pThis->aDevTabBaseAddrs[i].u64); AssertReturn(pThis->CmdBufBaseAddr.n.u4Len >= 8, VERR_IOMMU_IPE_4); pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufBaseAddr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBaseAddr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->Ctrl.u64); pHlp->pfnSSMPutU64(pSSM, pThis->ExclRangeBaseAddr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->ExclRangeLimit.u64); #if 0 pHlp->pfnSSMPutU64(pSSM, pThis->ExtFeat.u64); /* read-only, done already (above). */ #endif pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBaseAddr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtHi.u64); pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtLo); pHlp->pfnSSMPutU64(pSSM, pThis->HwEvtStatus.u64); pHlp->pfnSSMPutU64(pSSM, pThis->GALogBaseAddr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->GALogTailAddr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBBaseAddr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBBaseAddr.u64); #if 0 pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificFeat.u64); /* read-only, done already (above). */ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificCtrl.u64); /* read-only, done already (above). */ pHlp->pfnSSMPutU64(pSSM, pThis->DevSpecificStatus.u64); /* read-only, done already (above). */ pHlp->pfnSSMPutU64(pSSM, pThis->MiscInfo.u64); /* read-only, done already (above). */ #endif pHlp->pfnSSMPutU32(pSSM, pThis->PerfOptCtrl.u32); pHlp->pfnSSMPutU64(pSSM, pThis->XtGenIntrCtrl.u64); pHlp->pfnSSMPutU64(pSSM, pThis->XtPprIntrCtrl.u64); pHlp->pfnSSMPutU64(pSSM, pThis->XtGALogIntrCtrl.u64); size_t const cMarcApers = RT_ELEMENTS(pThis->aMarcApers); pHlp->pfnSSMPutU8(pSSM, cMarcApers); for (size_t i = 0; i < cMarcApers; i++) { pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Base.u64); pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Reloc.u64); pHlp->pfnSSMPutU64(pSSM, pThis->aMarcApers[i].Length.u64); } #if 0 pHlp->pfnSSMPutU64(pSSM, pThis->RsvdReg); /* read-only, done already (above). */ #endif pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufHeadPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->CmdBufTailPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogHeadPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogTailPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->Status.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogHeadPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogTailPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->GALogHeadPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->GALogTailPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBHeadPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBTailPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBHeadPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->EvtLogBTailPtr.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogAutoResp.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogOverflowEarly.u64); pHlp->pfnSSMPutU64(pSSM, pThis->PprLogBOverflowEarly.u64); return pHlp->pfnSSMPutU32(pSSM, UINT32_MAX); } /** * @callback_method_impl{FNSSMDEVLOADEXEC} */ static DECLCALLBACK(int) iommuAmdR3LoadExec(PPDMDEVINS pDevIns, PSSMHANDLE pSSM, uint32_t uVersion, uint32_t uPass) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PCPDMDEVHLPR3 pHlp = pDevIns->pHlpR3; int const rcErr = VERR_SSM_UNEXPECTED_DATA; LogFlowFunc(("\n")); /* Validate. */ AssertReturn(uPass == SSM_PASS_FINAL, VERR_WRONG_ORDER); if (uVersion != IOMMU_SAVED_STATE_VERSION) { LogRel(("%s: Invalid saved-state version %#x\n", IOMMU_LOG_PFX, uVersion)); return VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION; } /* Load ExtFeat and other read-only registers first. */ int rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExtFeat.u64); AssertRCReturn(rc, rc); AssertLogRelMsgReturn(pThis->ExtFeat.n.u2HostAddrTranslateSize < 0x3, ("ExtFeat.HATS register invalid %#RX64\n", pThis->ExtFeat.u64), rcErr); pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificFeat.u64); pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificCtrl.u64); pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificStatus.u64); pHlp->pfnSSMGetU64(pSSM, &pThis->MiscInfo.u64); pHlp->pfnSSMGetU64(pSSM, &pThis->RsvdReg); /* IOMMU base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->IommuBar.u64); AssertRCReturn(rc, rc); pThis->IommuBar.u64 &= IOMMU_BAR_VALID_MASK; /* Device table base address registers. */ uint8_t cDevTabBaseAddrs; rc = pHlp->pfnSSMGetU8(pSSM, &cDevTabBaseAddrs); AssertRCReturn(rc, rc); AssertLogRelMsgReturn(cDevTabBaseAddrs > 0 && cDevTabBaseAddrs <= RT_ELEMENTS(pThis->aDevTabBaseAddrs), ("Device table segment count invalid %#x\n", cDevTabBaseAddrs), rcErr); AssertCompile(RT_ELEMENTS(pThis->aDevTabBaseAddrs) == RT_ELEMENTS(g_auDevTabSegMaxSizes)); for (uint8_t i = 0; i < cDevTabBaseAddrs; i++) { rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aDevTabBaseAddrs[i].u64); AssertRCReturn(rc, rc); pThis->aDevTabBaseAddrs[i].u64 &= IOMMU_DEV_TAB_BAR_VALID_MASK; uint16_t const uSegSize = pThis->aDevTabBaseAddrs[i].n.u9Size; uint16_t const uMaxSegSize = g_auDevTabSegMaxSizes[i]; AssertLogRelMsgReturn(uSegSize <= uMaxSegSize, ("Device table [%u] segment size invalid %u (max %u)\n", i, uSegSize, uMaxSegSize), rcErr); } /* Command buffer base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufBaseAddr.u64); AssertRCReturn(rc, rc); pThis->CmdBufBaseAddr.u64 &= IOMMU_CMD_BUF_BAR_VALID_MASK; AssertLogRelMsgReturn(pThis->CmdBufBaseAddr.n.u4Len >= 8, ("Command buffer base address invalid %#RX64\n", pThis->CmdBufBaseAddr.u64), rcErr); /* Event log base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBaseAddr.u64); AssertRCReturn(rc, rc); pThis->EvtLogBaseAddr.u64 &= IOMMU_EVT_LOG_BAR_VALID_MASK; AssertLogRelMsgReturn(pThis->EvtLogBaseAddr.n.u4Len >= 8, ("Event log base address invalid %#RX64\n", pThis->EvtLogBaseAddr.u64), rcErr); /* Control register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->Ctrl.u64); AssertRCReturn(rc, rc); pThis->Ctrl.u64 &= IOMMU_CTRL_VALID_MASK; AssertLogRelMsgReturn(pThis->Ctrl.n.u3DevTabSegEn <= pThis->ExtFeat.n.u2DevTabSegSup, ("Control register invalid %#RX64\n", pThis->Ctrl.u64), rcErr); /* Exclusion range base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExclRangeBaseAddr.u64); AssertRCReturn(rc, rc); pThis->ExclRangeBaseAddr.u64 &= IOMMU_EXCL_RANGE_BAR_VALID_MASK; /* Exclusion range limit register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->ExclRangeLimit.u64); AssertRCReturn(rc, rc); pThis->ExclRangeLimit.u64 &= IOMMU_EXCL_RANGE_LIMIT_VALID_MASK; pThis->ExclRangeLimit.u64 |= UINT64_C(0xfff); #if 0 pHlp->pfnSSMGetU64(pSSM, &pThis->ExtFeat.u64); /* read-only, done already (above). */ #endif /* PPR log base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBaseAddr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprSup); /* Hardware event (Hi) register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtHi.u64); AssertRCReturn(rc, rc); /* Hardware event (Lo) register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtLo); AssertRCReturn(rc, rc); /* Hardware event status register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->HwEvtStatus.u64); AssertRCReturn(rc, rc); pThis->HwEvtStatus.u64 &= IOMMU_HW_EVT_STATUS_VALID_MASK; /* Guest Virtual-APIC log base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogBaseAddr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); /* Guest Virtual-APIC log tail address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogTailAddr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); /* PPR log-B base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBBaseAddr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprSup); /* Event log-B base address register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBBaseAddr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u2DualPprLogSup); #if 0 pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificFeat.u64); /* read-only, done already (above). */ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificCtrl.u64); /* read-only, done already (above). */ pHlp->pfnSSMGetU64(pSSM, &pThis->DevSpecificStatus.u64); /* read-only, done already (above). */ pHlp->pfnSSMGetU64(pSSM, &pThis->MiscInfo.u64); /* read-only, done already (above). */ #endif /* Performance optimization control register. */ rc = pHlp->pfnSSMGetU32(pSSM, &pThis->PerfOptCtrl.u32); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PerfOptSup); /* x2APIC registers. */ { Assert(!pThis->ExtFeat.n.u1X2ApicSup); /* x2APIC general interrupt control register. */ pHlp->pfnSSMGetU64(pSSM, &pThis->XtGenIntrCtrl.u64); AssertRCReturn(rc, rc); /* x2APIC PPR interrupt control register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->XtPprIntrCtrl.u64); AssertRCReturn(rc, rc); /* x2APIC GA log interrupt control register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->XtGALogIntrCtrl.u64); AssertRCReturn(rc, rc); } /* MARC (Memory Access and Routing) registers. */ { uint8_t cMarcApers; rc = pHlp->pfnSSMGetU8(pSSM, &cMarcApers); AssertRCReturn(rc, rc); AssertLogRelMsgReturn(cMarcApers > 0 && cMarcApers <= RT_ELEMENTS(pThis->aMarcApers), ("MARC register count invalid %#x\n", cMarcApers), rcErr); for (uint8_t i = 0; i < cMarcApers; i++) { rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Base.u64); AssertRCReturn(rc, rc); rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Reloc.u64); AssertRCReturn(rc, rc); rc = pHlp->pfnSSMGetU64(pSSM, &pThis->aMarcApers[i].Length.u64); AssertRCReturn(rc, rc); } Assert(!pThis->ExtFeat.n.u2MarcSup); } #if 0 pHlp->pfnSSMGetU64(pSSM, &pThis->RsvdReg); /* read-only, done already (above). */ #endif /* Command buffer head pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufHeadPtr.u64); AssertRCReturn(rc, rc); { /* * IOMMU behavior is undefined when software writes a value outside the buffer length. * In our emulation, since we ignore the write entirely (see iommuAmdCmdBufHeadPtr_w) * we shouldn't see such values in the saved state. */ uint32_t const offBuf = pThis->CmdBufHeadPtr.u64 & IOMMU_CMD_BUF_HEAD_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); Assert(cbBuf <= _512K); AssertLogRelMsgReturn(offBuf < cbBuf, ("Command buffer head pointer invalid %#x\n", pThis->CmdBufHeadPtr.u64), rcErr); } /* Command buffer tail pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->CmdBufTailPtr.u64); AssertRCReturn(rc, rc); { uint32_t const offBuf = pThis->CmdBufTailPtr.u64 & IOMMU_CMD_BUF_TAIL_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->CmdBufBaseAddr.n.u4Len); Assert(cbBuf <= _512K); AssertLogRelMsgReturn(offBuf < cbBuf, ("Command buffer tail pointer invalid %#x\n", pThis->CmdBufTailPtr.u64), rcErr); } /* Event log head pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogHeadPtr.u64); AssertRCReturn(rc, rc); { uint32_t const offBuf = pThis->EvtLogHeadPtr.u64 & IOMMU_EVT_LOG_HEAD_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); Assert(cbBuf <= _512K); AssertLogRelMsgReturn(offBuf < cbBuf, ("Event log head pointer invalid %#x\n", pThis->EvtLogHeadPtr.u64), rcErr); } /* Event log tail pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogTailPtr.u64); AssertRCReturn(rc, rc); { uint32_t const offBuf = pThis->EvtLogTailPtr.u64 & IOMMU_EVT_LOG_TAIL_PTR_VALID_MASK; uint32_t const cbBuf = iommuAmdGetTotalBufLength(pThis->EvtLogBaseAddr.n.u4Len); Assert(cbBuf <= _512K); AssertLogRelMsgReturn(offBuf < cbBuf, ("Event log tail pointer invalid %#x\n", pThis->EvtLogTailPtr.u64), rcErr); } /* Status register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->Status.u64); AssertRCReturn(rc, rc); pThis->Status.u64 &= IOMMU_STATUS_VALID_MASK; /* PPR log head pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogHeadPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprSup); /* PPR log tail pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogTailPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprSup); /* Guest Virtual-APIC log head pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogHeadPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); /* Guest Virtual-APIC log tail pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->GALogTailPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1GstVirtApicSup); /* PPR log-B head pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBHeadPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprSup); /* PPR log-B head pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBTailPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprSup); /* Event log-B head pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBHeadPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u2DualEvtLogSup); /* Event log-B tail pointer register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->EvtLogBTailPtr.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u2DualEvtLogSup); /* PPR log auto response register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogAutoResp.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprAutoRespSup); /* PPR log overflow early indicator register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogOverflowEarly.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprLogOverflowWarn); /* PPR log-B overflow early indicator register. */ rc = pHlp->pfnSSMGetU64(pSSM, &pThis->PprLogBOverflowEarly.u64); AssertRCReturn(rc, rc); Assert(!pThis->ExtFeat.n.u1PprLogOverflowWarn); /* End marker. */ { uint32_t uEndMarker; rc = pHlp->pfnSSMGetU32(pSSM, &uEndMarker); AssertLogRelMsgRCReturn(rc, ("Failed to read end marker. rc=%Rrc\n", rc), VERR_SSM_DATA_UNIT_FORMAT_CHANGED); AssertLogRelMsgReturn(uEndMarker == UINT32_MAX, ("End marker invalid (%#x expected %#x)\n", uEndMarker, UINT32_MAX), rcErr); } return rc; } /** * @callback_method_impl{FNSSMDEVLOADDONE} */ static DECLCALLBACK(int) iommuAmdR3LoadDone(PPDMDEVINS pDevIns, PSSMHANDLE pSSM) { PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); RT_NOREF(pSSM); LogFlowFunc(("\n")); /* Sanity. */ AssertPtrReturn(pThis, VERR_INVALID_POINTER); AssertPtrReturn(pThisR3, VERR_INVALID_POINTER); int rc; IOMMU_LOCK(pDevIns, pThisR3); /* Map MMIO regions if the IOMMU BAR is enabled. */ if (pThis->IommuBar.n.u1Enable) rc = iommuAmdR3MmioSetup(pDevIns); else rc = VINF_SUCCESS; /* Wake up the command thread if commands need processing. */ iommuAmdCmdThreadWakeUpIfNeeded(pDevIns); IOMMU_UNLOCK(pDevIns, pThisR3); LogRel(("%s: Restored: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX, pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor, pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor, pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor, pThis->ExtFeat.u64)); return rc; } /** * @interface_method_impl{PDMDEVREG,pfnReset} */ static DECLCALLBACK(void) iommuAmdR3Reset(PPDMDEVINS pDevIns) { /* * Resets read-write portion of the IOMMU state. * * NOTE! State not initialized here is expected to be initialized during * device construction and remain read-only through the lifetime of the VM. */ PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); LogFlowFunc(("\n")); IOMMU_LOCK(pDevIns, pThisR3); RT_ZERO(pThis->aDevTabBaseAddrs); pThis->CmdBufBaseAddr.u64 = 0; pThis->CmdBufBaseAddr.n.u4Len = 8; pThis->EvtLogBaseAddr.u64 = 0; pThis->EvtLogBaseAddr.n.u4Len = 8; pThis->Ctrl.u64 = 0; pThis->Ctrl.n.u1Coherent = 1; Assert(!pThis->ExtFeat.n.u1BlockStopMarkSup); pThis->ExclRangeBaseAddr.u64 = 0; pThis->ExclRangeLimit.u64 = 0; pThis->PprLogBaseAddr.u64 = 0; pThis->PprLogBaseAddr.n.u4Len = 8; pThis->HwEvtHi.u64 = 0; pThis->HwEvtLo = 0; pThis->HwEvtStatus.u64 = 0; pThis->GALogBaseAddr.u64 = 0; pThis->GALogBaseAddr.n.u4Len = 8; pThis->GALogTailAddr.u64 = 0; pThis->PprLogBBaseAddr.u64 = 0; pThis->PprLogBBaseAddr.n.u4Len = 8; pThis->EvtLogBBaseAddr.u64 = 0; pThis->EvtLogBBaseAddr.n.u4Len = 8; pThis->PerfOptCtrl.u32 = 0; pThis->XtGenIntrCtrl.u64 = 0; pThis->XtPprIntrCtrl.u64 = 0; pThis->XtGALogIntrCtrl.u64 = 0; RT_ZERO(pThis->aMarcApers); pThis->CmdBufHeadPtr.u64 = 0; pThis->CmdBufTailPtr.u64 = 0; pThis->EvtLogHeadPtr.u64 = 0; pThis->EvtLogTailPtr.u64 = 0; pThis->Status.u64 = 0; pThis->PprLogHeadPtr.u64 = 0; pThis->PprLogTailPtr.u64 = 0; pThis->GALogHeadPtr.u64 = 0; pThis->GALogTailPtr.u64 = 0; pThis->PprLogBHeadPtr.u64 = 0; pThis->PprLogBTailPtr.u64 = 0; pThis->EvtLogBHeadPtr.u64 = 0; pThis->EvtLogBTailPtr.u64 = 0; pThis->PprLogAutoResp.u64 = 0; pThis->PprLogOverflowEarly.u64 = 0; pThis->PprLogBOverflowEarly.u64 = 0; pThis->IommuBar.u64 = 0; PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0); PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0); PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER); IOMMU_UNLOCK(pDevIns, pThisR3); #ifdef IOMMU_WITH_DTE_CACHE iommuAmdDteCacheRemoveAll(pDevIns); #endif #ifdef IOMMU_WITH_IOTLBE_CACHE iommuAmdIotlbRemoveAll(pDevIns); #endif #ifdef IOMMU_WITH_IRTE_CACHE iommuAmdIrteCacheRemoveAll(pDevIns); #endif } /** * @interface_method_impl{PDMDEVREG,pfnDestruct} */ static DECLCALLBACK(int) iommuAmdR3Destruct(PPDMDEVINS pDevIns) { PDMDEV_CHECK_VERSIONS_RETURN_QUIET(pDevIns); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); LogFlowFunc(("\n")); IOMMU_LOCK(pDevIns, pThisR3); if (pThis->hEvtCmdThread != NIL_SUPSEMEVENT) { PDMDevHlpSUPSemEventClose(pDevIns, pThis->hEvtCmdThread); pThis->hEvtCmdThread = NIL_SUPSEMEVENT; } #ifdef IOMMU_WITH_IOTLBE_CACHE if (pThisR3->paIotlbes) { PDMDevHlpMMHeapFree(pDevIns, pThisR3->paIotlbes); pThisR3->paIotlbes = NULL; pThisR3->idxUnusedIotlbe = 0; } #endif IOMMU_UNLOCK(pDevIns, pThisR3); return VINF_SUCCESS; } /** * @interface_method_impl{PDMDEVREG,pfnConstruct} */ static DECLCALLBACK(int) iommuAmdR3Construct(PPDMDEVINS pDevIns, int iInstance, PCFGMNODE pCfg) { PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); RT_NOREF(pCfg); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUR3 pThisR3 = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUR3); pThis->u32Magic = IOMMU_MAGIC; pThisR3->pDevInsR3 = pDevIns; LogFlowFunc(("iInstance=%d\n", iInstance)); /* * Register the IOMMU with PDM. */ PDMIOMMUREGR3 IommuReg; RT_ZERO(IommuReg); IommuReg.u32Version = PDM_IOMMUREGCC_VERSION; IommuReg.pfnMemAccess = iommuAmdMemAccess; IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess; IommuReg.pfnMsiRemap = iommuAmdMsiRemap; IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION; int rc = PDMDevHlpIommuRegister(pDevIns, &IommuReg, &pThisR3->CTX_SUFF(pIommuHlp), &pThis->idxIommu); if (RT_FAILURE(rc)) return PDMDEV_SET_ERROR(pDevIns, rc, N_("Failed to register ourselves as an IOMMU device")); if (pThisR3->CTX_SUFF(pIommuHlp)->u32Version != PDM_IOMMUHLPR3_VERSION) return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, N_("IOMMU helper version mismatch; got %#x expected %#x"), pThisR3->CTX_SUFF(pIommuHlp)->u32Version, PDM_IOMMUHLPR3_VERSION); if (pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd != PDM_IOMMUHLPR3_VERSION) return PDMDevHlpVMSetError(pDevIns, VERR_VERSION_MISMATCH, RT_SRC_POS, N_("IOMMU helper end-version mismatch; got %#x expected %#x"), pThisR3->CTX_SUFF(pIommuHlp)->u32TheEnd, PDM_IOMMUHLPR3_VERSION); /* * We will use PDM's critical section (via helpers) for the IOMMU device. */ rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); AssertRCReturn(rc, rc); /* * Initialize read-only PCI configuration space. */ PPDMPCIDEV pPciDev = pDevIns->apPciDevs[0]; PDMPCIDEV_ASSERT_VALID(pDevIns, pPciDev); /* Header. */ PDMPciDevSetVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* AMD */ PDMPciDevSetDeviceId(pPciDev, IOMMU_PCI_DEVICE_ID); /* VirtualBox IOMMU device */ PDMPciDevSetCommand(pPciDev, VBOX_PCI_COMMAND_MASTER); /* Enable bus master (as we directly access main memory) */ PDMPciDevSetStatus(pPciDev, VBOX_PCI_STATUS_CAP_LIST); /* Capability list supported */ PDMPciDevSetRevisionId(pPciDev, IOMMU_PCI_REVISION_ID); /* VirtualBox specific device implementation revision */ PDMPciDevSetClassBase(pPciDev, VBOX_PCI_CLASS_SYSTEM); /* System Base Peripheral */ PDMPciDevSetClassSub(pPciDev, VBOX_PCI_SUB_SYSTEM_IOMMU); /* IOMMU */ PDMPciDevSetClassProg(pPciDev, 0x0); /* IOMMU Programming interface */ PDMPciDevSetHeaderType(pPciDev, 0x0); /* Single function, type 0 */ PDMPciDevSetSubSystemId(pPciDev, IOMMU_PCI_DEVICE_ID); /* AMD */ PDMPciDevSetSubSystemVendorId(pPciDev, IOMMU_PCI_VENDOR_ID); /* VirtualBox IOMMU device */ PDMPciDevSetCapabilityList(pPciDev, IOMMU_PCI_OFF_CAP_HDR); /* Offset into capability registers */ PDMPciDevSetInterruptPin(pPciDev, 0x1); /* INTA#. */ PDMPciDevSetInterruptLine(pPciDev, 0x0); /* For software compatibility; no effect on hardware */ /* Capability Header. */ /* NOTE! Fields (e.g, EFR) must match what we expose in the ACPI tables. */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_CAP_HDR, RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_ID, 0xf) /* RO - Secure Device capability block */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_PTR, IOMMU_PCI_OFF_MSI_CAP_HDR) /* RO - Next capability offset */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_TYPE, 0x3) /* RO - IOMMU capability block */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_REV, 0x1) /* RO - IOMMU interface revision */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_IOTLB_SUP, 0x0) /* RO - Remote IOTLB support */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_HT_TUNNEL, 0x0) /* RO - HyperTransport Tunnel support */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_NP_CACHE, 0x0) /* RO - Cache NP page table entries */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_EFR_SUP, 0x1) /* RO - Extended Feature Register support */ | RT_BF_MAKE(IOMMU_BF_CAPHDR_CAP_EXT, 0x1)); /* RO - Misc. Information Register support */ /* Base Address Register. */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_LO, 0x0); /* RW - Base address (Lo) and enable bit */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_BASE_ADDR_REG_HI, 0x0); /* RW - Base address (Hi) */ /* IOMMU Range Register. */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_RANGE_REG, 0x0); /* RW - Range register (implemented as RO by us) */ /* Misc. Information Register. */ /* NOTE! Fields (e.g, GVA size) must match what we expose in the ACPI tables. */ uint32_t const uMiscInfoReg0 = RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM, 0) /* RO - MSI number */ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_GVA_SIZE, 2) /* RO - Guest Virt. Addr size (2=48 bits) */ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_PA_SIZE, 48) /* RO - Physical Addr size (48 bits) */ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_VA_SIZE, 64) /* RO - Virt. Addr size (64 bits) */ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_HT_ATS_RESV, 0) /* RW - HT ATS reserved */ | RT_BF_MAKE(IOMMU_BF_MISCINFO_0_MSI_NUM_PPR, 0); /* RW - PPR interrupt number */ uint32_t const uMiscInfoReg1 = 0; PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_0, uMiscInfoReg0); PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MISCINFO_REG_1, uMiscInfoReg1); /* MSI Capability Header register. */ PDMMSIREG MsiReg; RT_ZERO(MsiReg); MsiReg.cMsiVectors = 1; MsiReg.iMsiCapOffset = IOMMU_PCI_OFF_MSI_CAP_HDR; MsiReg.iMsiNextOffset = 0; /* IOMMU_PCI_OFF_MSI_MAP_CAP_HDR */ MsiReg.fMsi64bit = 1; /* 64-bit addressing support is mandatory; See AMD IOMMU spec. 2.8 "IOMMU Interrupt Support". */ /* MSI Address (Lo, Hi) and MSI data are read-write PCI config registers handled by our generic PCI config space code. */ #if 0 /* MSI Address Lo. */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_LO, 0); /* RW - MSI message address (Lo) */ /* MSI Address Hi. */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_ADDR_HI, 0); /* RW - MSI message address (Hi) */ /* MSI Data. */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_DATA, 0); /* RW - MSI data */ #endif #if 0 /** @todo IOMMU: I don't know if we need to support this, enable later if * required. */ /* MSI Mapping Capability Header register. */ PDMPciDevSetDWord(pPciDev, IOMMU_PCI_OFF_MSI_MAP_CAP_HDR, RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_ID, 0x8) /* RO - Capability ID */ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_PTR, 0x0) /* RO - Offset to next capability (NULL) */ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_EN, 0x1) /* RO - MSI mapping capability enable */ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_FIXED, 0x1) /* RO - MSI mapping range is fixed */ | RT_BF_MAKE(IOMMU_BF_MSI_MAP_CAPHDR_CAP_TYPE, 0x15)); /* RO - MSI mapping capability */ /* When implementing don't forget to copy this to its MMIO shadow register (MsiMapCapHdr) in iommuAmdR3Init. */ #endif /* * Register the PCI function with PDM. */ rc = PDMDevHlpPCIRegister(pDevIns, pPciDev); AssertLogRelRCReturn(rc, rc); /* * Register MSI support for the PCI device. * This must be done -after- registering it as a PCI device! */ rc = PDMDevHlpPCIRegisterMsi(pDevIns, &MsiReg); AssertRCReturn(rc, rc); /* * Intercept PCI config. space accesses. */ rc = PDMDevHlpPCIInterceptConfigAccesses(pDevIns, pPciDev, iommuAmdR3PciConfigRead, iommuAmdR3PciConfigWrite); AssertLogRelRCReturn(rc, rc); /* * Create the MMIO region. * Mapping of the region is done when software configures it via PCI config space. */ rc = PDMDevHlpMmioCreate(pDevIns, IOMMU_MMIO_REGION_SIZE, pPciDev, 0 /* iPciRegion */, iommuAmdMmioWrite, iommuAmdMmioRead, NULL /* pvUser */, IOMMMIO_FLAGS_READ_DWORD_QWORD | IOMMMIO_FLAGS_WRITE_DWORD_QWORD_READ_MISSING | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_READ | IOMMMIO_FLAGS_DBGSTOP_ON_COMPLICATED_WRITE, "AMD-IOMMU", &pThis->hMmio); AssertLogRelRCReturn(rc, rc); /* * Register saved state handlers. */ rc = PDMDevHlpSSMRegisterEx(pDevIns, IOMMU_SAVED_STATE_VERSION, sizeof(IOMMU), NULL /* pszBefore */, NULL /* pfnLivePrep */, NULL /* pfnLiveExec */, NULL /* pfnLiveVote */, NULL /* pfnSavePrep */, iommuAmdR3SaveExec, NULL /* pfnSaveDone */, NULL /* pfnLoadPrep */, iommuAmdR3LoadExec, iommuAmdR3LoadDone); AssertLogRelRCReturn(rc, rc); /* * Register debugger info items. */ PDMDevHlpDBGFInfoRegister(pDevIns, "iommu", "Display IOMMU state.", iommuAmdR3DbgInfo); PDMDevHlpDBGFInfoRegister(pDevIns, "iommudte", "Display the DTE for a device (from memory). Arguments: DeviceID.", iommuAmdR3DbgInfoDte); PDMDevHlpDBGFInfoRegister(pDevIns, "iommudevtabs", "Display I/O device tables with translation enabled.", iommuAmdR3DbgInfoDevTabs); #ifdef IOMMU_WITH_IOTLBE_CACHE PDMDevHlpDBGFInfoRegister(pDevIns, "iommutlb", "Display IOTLBs for a domain. Arguments: DomainID.", iommuAmdR3DbgInfoIotlb); #endif #ifdef IOMMU_WITH_DTE_CACHE PDMDevHlpDBGFInfoRegister(pDevIns, "iommudtecache", "Display the DTE cache.", iommuAmdR3DbgInfoDteCache); #endif #ifdef IOMMU_WITH_IRTE_CACHE PDMDevHlpDBGFInfoRegister(pDevIns, "iommuirtecache", "Display the IRTE cache.", iommuAmdR3DbgInfoIrteCache); #endif # ifdef VBOX_WITH_STATISTICS /* * Statistics. */ PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadR3, STAMTYPE_COUNTER, "R3/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in R3"); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioReadRZ, STAMTYPE_COUNTER, "RZ/MmioRead", STAMUNIT_OCCURENCES, "Number of MMIO reads in RZ."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteR3, STAMTYPE_COUNTER, "R3/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in R3."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMmioWriteRZ, STAMTYPE_COUNTER, "RZ/MmioWrite", STAMUNIT_OCCURENCES, "Number of MMIO writes in RZ."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapR3, STAMTYPE_COUNTER, "R3/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in R3."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMsiRemapRZ, STAMTYPE_COUNTER, "RZ/MsiRemap", STAMUNIT_OCCURENCES, "Number of interrupt remap requests in RZ."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadR3, STAMTYPE_COUNTER, "R3/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in R3."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemReadRZ, STAMTYPE_COUNTER, "RZ/MemRead", STAMUNIT_OCCURENCES, "Number of memory read translation requests in RZ."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteR3, STAMTYPE_COUNTER, "R3/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in R3."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemWriteRZ, STAMTYPE_COUNTER, "RZ/MemWrite", STAMUNIT_OCCURENCES, "Number of memory write translation requests in RZ."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadR3, STAMTYPE_COUNTER, "R3/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in R3."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkReadRZ, STAMTYPE_COUNTER, "RZ/MemBulkRead", STAMUNIT_OCCURENCES, "Number of memory bulk read translation requests in RZ."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteR3, STAMTYPE_COUNTER, "R3/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in R3."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatMemBulkWriteRZ, STAMTYPE_COUNTER, "RZ/MemBulkWrite", STAMUNIT_OCCURENCES, "Number of memory bulk write translation requests in RZ."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmd, STAMTYPE_COUNTER, "R3/Commands", STAMUNIT_OCCURENCES, "Number of commands processed (total)."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompWait, STAMTYPE_COUNTER, "R3/Commands/CompWait", STAMUNIT_OCCURENCES, "Number of Completion Wait commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvDte, STAMTYPE_COUNTER, "R3/Commands/InvDte", STAMUNIT_OCCURENCES, "Number of Invalidate DTE commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuPages, STAMTYPE_COUNTER, "R3/Commands/InvIommuPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU Pages commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIotlbPages, STAMTYPE_COUNTER, "R3/Commands/InvIotlbPages", STAMUNIT_OCCURENCES, "Number of Invalidate IOTLB Pages commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIntrTable, STAMTYPE_COUNTER, "R3/Commands/InvIntrTable", STAMUNIT_OCCURENCES, "Number of Invalidate Interrupt Table commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdPrefIommuPages, STAMTYPE_COUNTER, "R3/Commands/PrefIommuPages", STAMUNIT_OCCURENCES, "Number of Prefetch IOMMU Pages commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdCompletePprReq, STAMTYPE_COUNTER, "R3/Commands/CompletePprReq", STAMUNIT_OCCURENCES, "Number of Complete PPR Requests commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatCmdInvIommuAll, STAMTYPE_COUNTER, "R3/Commands/InvIommuAll", STAMUNIT_OCCURENCES, "Number of Invalidate IOMMU All commands processed."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeCached, STAMTYPE_COUNTER, "IOTLB/Cached", STAMUNIT_OCCURENCES, "Number of IOTLB entries in the cache."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIotlbeLazyEvictReuse, STAMTYPE_COUNTER, "IOTLB/LazyEvictReuse", STAMUNIT_OCCURENCES, "Number of IOTLB entries reused after lazy eviction."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfDteLookup, STAMTYPE_PROFILE, "Profile/DteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling DTE lookup."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIotlbeLookup, STAMTYPE_PROFILE, "Profile/IotlbeLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IOTLBE lookup."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteLookup, STAMTYPE_PROFILE, "Profile/IrteLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE lookup."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatProfIrteCacheLookup, STAMTYPE_PROFILE, "Profile/IrteCacheLookup", STAMUNIT_TICKS_PER_CALL, "Profiling IRTE cache lookup."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHit, STAMTYPE_COUNTER, "MemAccess/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheMiss, STAMTYPE_COUNTER, "MemAccess/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheHitFull, STAMTYPE_COUNTER, "MemAccess/CacheHitFull", STAMUNIT_OCCURENCES, "Number of accesses that was entirely in the cache."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCacheNonContig, STAMTYPE_COUNTER, "MemAccess/CacheNonContig", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in non-contiguous translated regions."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessCachePermDenied, STAMTYPE_COUNTER, "MemAccess/CacheAddrDenied", STAMUNIT_OCCURENCES, "Number of cache accesses that resulted in denied permissions."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDteNonContig, STAMTYPE_COUNTER, "MemAccess/DteNonContig", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in non-contiguous translated regions."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatAccessDtePermDenied, STAMTYPE_COUNTER, "MemAccess/DtePermDenied", STAMUNIT_OCCURENCES, "Number of DTE accesses that resulted in denied permissions."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheHit, STAMTYPE_COUNTER, "Interrupt/CacheHit", STAMUNIT_OCCURENCES, "Number of cache hits."); PDMDevHlpSTAMRegister(pDevIns, &pThis->StatIntrCacheMiss, STAMTYPE_COUNTER, "Interrupt/CacheMiss", STAMUNIT_OCCURENCES, "Number of cache misses."); # endif /* * Create the command thread and its event semaphore. */ char szDevIommu[64]; RT_ZERO(szDevIommu); RTStrPrintf(szDevIommu, sizeof(szDevIommu), "IOMMU-%u", iInstance); rc = PDMDevHlpThreadCreate(pDevIns, &pThisR3->pCmdThread, pThis, iommuAmdR3CmdThread, iommuAmdR3CmdThreadWakeUp, 0 /* cbStack */, RTTHREADTYPE_IO, szDevIommu); AssertLogRelRCReturn(rc, rc); rc = PDMDevHlpSUPSemEventCreate(pDevIns, &pThis->hEvtCmdThread); AssertLogRelRCReturn(rc, rc); #ifdef IOMMU_WITH_DTE_CACHE /* * Initialize the critsect of the cache. */ rc = PDMDevHlpCritSectInit(pDevIns, &pThis->CritSectCache, RT_SRC_POS, "IOMMUCache-#%u", pDevIns->iInstance); AssertLogRelRCReturn(rc, rc); /* Several places in this code relies on this basic assumption - assert it! */ AssertCompile(RT_ELEMENTS(pThis->aDeviceIds) == RT_ELEMENTS(pThis->aDteCache)); #endif #ifdef IOMMU_WITH_IOTLBE_CACHE /* * Allocate IOTLB entries. * This is allocated upfront since we expect a relatively small number of entries, * is more cache-line efficient and easier to track least recently used entries for * eviction when the cache is full. This also avoids unpredictable behavior during * the lifetime of the VM if the hyperheap gets full. */ size_t const cbIotlbes = sizeof(IOTLBE) * IOMMU_IOTLBE_MAX; pThisR3->paIotlbes = (PIOTLBE)PDMDevHlpMMHeapAllocZ(pDevIns, cbIotlbes); if (!pThisR3->paIotlbes) return PDMDevHlpVMSetError(pDevIns, VERR_NO_MEMORY, RT_SRC_POS, N_("Failed to allocate %zu bytes from the hyperheap for the IOTLB cache."), cbIotlbes); RTListInit(&pThisR3->LstLruIotlbe); LogRel(("%s: Allocated %zu bytes from the hyperheap for the IOTLB cache\n", IOMMU_LOG_PFX, cbIotlbes)); #endif /* * Initialize read-only registers. * NOTE! Fields here must match their corresponding field in the ACPI tables. */ /* Don't remove the commented lines below as it lets us see all features at a glance. */ pThis->ExtFeat.u64 = 0; //pThis->ExtFeat.n.u1PrefetchSup = 0; //pThis->ExtFeat.n.u1PprSup = 0; //pThis->ExtFeat.n.u1X2ApicSup = 0; //pThis->ExtFeat.n.u1NoExecuteSup = 0; //pThis->ExtFeat.n.u1GstTranslateSup = 0; pThis->ExtFeat.n.u1InvAllSup = 1; //pThis->ExtFeat.n.u1GstVirtApicSup = 0; pThis->ExtFeat.n.u1HwErrorSup = 1; //pThis->ExtFeat.n.u1PerfCounterSup = 0; AssertCompile((IOMMU_MAX_HOST_PT_LEVEL & 0x3) < 3); pThis->ExtFeat.n.u2HostAddrTranslateSize = (IOMMU_MAX_HOST_PT_LEVEL & 0x3); //pThis->ExtFeat.n.u2GstAddrTranslateSize = 0; /* Requires GstTranslateSup */ //pThis->ExtFeat.n.u2GstCr3RootTblLevel = 0; /* Requires GstTranslateSup */ //pThis->ExtFeat.n.u2SmiFilterSup = 0; //pThis->ExtFeat.n.u3SmiFilterCount = 0; //pThis->ExtFeat.n.u3GstVirtApicModeSup = 0; /* Requires GstVirtApicSup */ //pThis->ExtFeat.n.u2DualPprLogSup = 0; //pThis->ExtFeat.n.u2DualEvtLogSup = 0; //pThis->ExtFeat.n.u5MaxPasidSup = 0; /* Requires GstTranslateSup */ //pThis->ExtFeat.n.u1UserSupervisorSup = 0; AssertCompile(IOMMU_MAX_DEV_TAB_SEGMENTS <= 3); pThis->ExtFeat.n.u2DevTabSegSup = IOMMU_MAX_DEV_TAB_SEGMENTS; //pThis->ExtFeat.n.u1PprLogOverflowWarn = 0; //pThis->ExtFeat.n.u1PprAutoRespSup = 0; //pThis->ExtFeat.n.u2MarcSup = 0; //pThis->ExtFeat.n.u1BlockStopMarkSup = 0; //pThis->ExtFeat.n.u1PerfOptSup = 0; pThis->ExtFeat.n.u1MsiCapMmioSup = 1; //pThis->ExtFeat.n.u1GstIoSup = 0; //pThis->ExtFeat.n.u1HostAccessSup = 0; //pThis->ExtFeat.n.u1EnhancedPprSup = 0; //pThis->ExtFeat.n.u1AttrForwardSup = 0; //pThis->ExtFeat.n.u1HostDirtySup = 0; //pThis->ExtFeat.n.u1InvIoTlbTypeSup = 0; //pThis->ExtFeat.n.u1GstUpdateDisSup = 0; //pThis->ExtFeat.n.u1ForcePhysDstSup = 0; pThis->DevSpecificFeat.u64 = 0; pThis->DevSpecificFeat.n.u4RevMajor = IOMMU_DEVSPEC_FEAT_MAJOR_VERSION; pThis->DevSpecificFeat.n.u4RevMinor = IOMMU_DEVSPEC_FEAT_MINOR_VERSION; pThis->DevSpecificCtrl.u64 = 0; pThis->DevSpecificCtrl.n.u4RevMajor = IOMMU_DEVSPEC_CTRL_MAJOR_VERSION; pThis->DevSpecificCtrl.n.u4RevMinor = IOMMU_DEVSPEC_CTRL_MINOR_VERSION; pThis->DevSpecificStatus.u64 = 0; pThis->DevSpecificStatus.n.u4RevMajor = IOMMU_DEVSPEC_STATUS_MAJOR_VERSION; pThis->DevSpecificStatus.n.u4RevMinor = IOMMU_DEVSPEC_STATUS_MINOR_VERSION; pThis->MiscInfo.u64 = RT_MAKE_U64(uMiscInfoReg0, uMiscInfoReg1); pThis->RsvdReg = 0; /* * Initialize parts of the IOMMU state as it would during reset. * Also initializes non-zero initial values like IRTE cache keys. * Must be called -after- initializing PCI config. space registers. */ iommuAmdR3Reset(pDevIns); LogRel(("%s: DSFX=%u.%u DSCX=%u.%u DSSX=%u.%u ExtFeat=%#RX64\n", IOMMU_LOG_PFX, pThis->DevSpecificFeat.n.u4RevMajor, pThis->DevSpecificFeat.n.u4RevMinor, pThis->DevSpecificCtrl.n.u4RevMajor, pThis->DevSpecificCtrl.n.u4RevMinor, pThis->DevSpecificStatus.n.u4RevMajor, pThis->DevSpecificStatus.n.u4RevMinor, pThis->ExtFeat.u64)); return VINF_SUCCESS; } #else /** * @callback_method_impl{PDMDEVREGR0,pfnConstruct} */ static DECLCALLBACK(int) iommuAmdRZConstruct(PPDMDEVINS pDevIns) { PDMDEV_CHECK_VERSIONS_RETURN(pDevIns); PIOMMU pThis = PDMDEVINS_2_DATA(pDevIns, PIOMMU); PIOMMUCC pThisCC = PDMDEVINS_2_DATA_CC(pDevIns, PIOMMUCC); pThisCC->CTX_SUFF(pDevIns) = pDevIns; /* We will use PDM's critical section (via helpers) for the IOMMU device. */ int rc = PDMDevHlpSetDeviceCritSect(pDevIns, PDMDevHlpCritSectGetNop(pDevIns)); AssertRCReturn(rc, rc); /* Set up the MMIO RZ handlers. */ rc = PDMDevHlpMmioSetUpContext(pDevIns, pThis->hMmio, iommuAmdMmioWrite, iommuAmdMmioRead, NULL /* pvUser */); AssertRCReturn(rc, rc); /* Set up the IOMMU RZ callbacks. */ PDMIOMMUREGCC IommuReg; RT_ZERO(IommuReg); IommuReg.u32Version = PDM_IOMMUREGCC_VERSION; IommuReg.idxIommu = pThis->idxIommu; IommuReg.pfnMemAccess = iommuAmdMemAccess; IommuReg.pfnMemBulkAccess = iommuAmdMemBulkAccess; IommuReg.pfnMsiRemap = iommuAmdMsiRemap; IommuReg.u32TheEnd = PDM_IOMMUREGCC_VERSION; rc = PDMDevHlpIommuSetUpContext(pDevIns, &IommuReg, &pThisCC->CTX_SUFF(pIommuHlp)); AssertRCReturn(rc, rc); AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp), VERR_IOMMU_IPE_1); AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32Version == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH); AssertReturn(pThisCC->CTX_SUFF(pIommuHlp)->u32TheEnd == CTX_MID(PDM_IOMMUHLP,_VERSION), VERR_VERSION_MISMATCH); AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnLock, VERR_INVALID_POINTER); AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnUnlock, VERR_INVALID_POINTER); AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnLockIsOwner, VERR_INVALID_POINTER); AssertPtrReturn(pThisCC->CTX_SUFF(pIommuHlp)->pfnSendMsi, VERR_INVALID_POINTER); return VINF_SUCCESS; } #endif /** * The device registration structure. */ const PDMDEVREG g_DeviceIommuAmd = { /* .u32Version = */ PDM_DEVREG_VERSION, /* .uReserved0 = */ 0, /* .szName = */ "iommu-amd", /* .fFlags = */ PDM_DEVREG_FLAGS_DEFAULT_BITS | PDM_DEVREG_FLAGS_RZ | PDM_DEVREG_FLAGS_NEW_STYLE, /* .fClass = */ PDM_DEVREG_CLASS_PCI_BUILTIN, /* .cMaxInstances = */ 1, /* .uSharedVersion = */ 42, /* .cbInstanceShared = */ sizeof(IOMMU), /* .cbInstanceCC = */ sizeof(IOMMUCC), /* .cbInstanceRC = */ sizeof(IOMMURC), /* .cMaxPciDevices = */ 1, /* .cMaxMsixVectors = */ 0, /* .pszDescription = */ "IOMMU (AMD)", #if defined(IN_RING3) /* .pszRCMod = */ "VBoxDDRC.rc", /* .pszR0Mod = */ "VBoxDDR0.r0", /* .pfnConstruct = */ iommuAmdR3Construct, /* .pfnDestruct = */ iommuAmdR3Destruct, /* .pfnRelocate = */ NULL, /* .pfnMemSetup = */ NULL, /* .pfnPowerOn = */ NULL, /* .pfnReset = */ iommuAmdR3Reset, /* .pfnSuspend = */ NULL, /* .pfnResume = */ NULL, /* .pfnAttach = */ NULL, /* .pfnDetach = */ NULL, /* .pfnQueryInterface = */ NULL, /* .pfnInitComplete = */ NULL, /* .pfnPowerOff = */ NULL, /* .pfnSoftReset = */ NULL, /* .pfnReserved0 = */ NULL, /* .pfnReserved1 = */ NULL, /* .pfnReserved2 = */ NULL, /* .pfnReserved3 = */ NULL, /* .pfnReserved4 = */ NULL, /* .pfnReserved5 = */ NULL, /* .pfnReserved6 = */ NULL, /* .pfnReserved7 = */ NULL, #elif defined(IN_RING0) /* .pfnEarlyConstruct = */ NULL, /* .pfnConstruct = */ iommuAmdRZConstruct, /* .pfnDestruct = */ NULL, /* .pfnFinalDestruct = */ NULL, /* .pfnRequest = */ NULL, /* .pfnReserved0 = */ NULL, /* .pfnReserved1 = */ NULL, /* .pfnReserved2 = */ NULL, /* .pfnReserved3 = */ NULL, /* .pfnReserved4 = */ NULL, /* .pfnReserved5 = */ NULL, /* .pfnReserved6 = */ NULL, /* .pfnReserved7 = */ NULL, #elif defined(IN_RC) /* .pfnConstruct = */ iommuAmdRZConstruct, /* .pfnReserved0 = */ NULL, /* .pfnReserved1 = */ NULL, /* .pfnReserved2 = */ NULL, /* .pfnReserved3 = */ NULL, /* .pfnReserved4 = */ NULL, /* .pfnReserved5 = */ NULL, /* .pfnReserved6 = */ NULL, /* .pfnReserved7 = */ NULL, #else # error "Not in IN_RING3, IN_RING0 or IN_RC!" #endif /* .u32VersionEnd = */ PDM_DEVREG_VERSION }; #endif /* !VBOX_DEVICE_STRUCT_TESTCASE */