AMD IOMMU driver

This driver is functionally equivalent to the in-tree Intel DMAR code.
It handles busdma and interrupt remapping from the host OS.  There is no
integration with bhyve, and the stub iommu drivers in bhyve code cannot
coexist with this driver (planned).

The biggest architectural problem with the code is that the AMD IOMMU
units are enumerated as PCIe-attached security devices, which is much
later after HPET and IOAPIC drivers attached and actived interrupts.
Because of this, HPET FSB interrupts and IOAPIC interrupts are always
identity-mapped.

The code is of late alpha quality.  By default the driver is disabled.
To enable for testing, set in loader.conf:
hw.amdiommu.enable=1
hw.iommu.dma=1 <- to enable iommu busdma
hw.iommu.ir=1 <- to enable interrupt remapping

Discussed with:	emaste
Sponsored by:	Advanced Micro Devices (AMD)
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D47256
This commit is contained in:
Konstantin Belousov 2024-05-12 13:20:11 +03:00
parent 0a9bec1744
commit 0f5116d7ef
8 changed files with 3563 additions and 0 deletions

View File

@ -344,6 +344,12 @@ x86/cpufreq/hwpstate_amd.c optional cpufreq
x86/cpufreq/hwpstate_intel.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/cpufreq/powernow.c optional cpufreq
x86/iommu/amd_cmd.c optional acpi iommu pci
x86/iommu/amd_ctx.c optional acpi iommu pci
x86/iommu/amd_drv.c optional acpi iommu pci
x86/iommu/amd_event.c optional acpi iommu pci
x86/iommu/amd_idpgtbl.c optional acpi iommu pci
x86/iommu/amd_intrmap.c optional acpi iommu pci
x86/iommu/intel_ctx.c optional acpi iommu pci
x86/iommu/intel_drv.c optional acpi iommu pci
x86/iommu/intel_fault.c optional acpi iommu pci

360
sys/x86/iommu/amd_cmd.c Normal file
View File

@ -0,0 +1,360 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_acpi.h"
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/memdesc.h>
#include <sys/module.h>
#include <sys/rman.h>
#include <sys/taskqueue.h>
#include <sys/time.h>
#include <sys/tree.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/acpica/acpivar.h>
#include <dev/pci/pcireg.h>
#include <machine/bus.h>
#include <machine/cpu.h>
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/amd_reg.h>
#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/amd_iommu.h>
static void
amdiommu_enable_cmdbuf(struct amdiommu_unit *unit)
{
AMDIOMMU_ASSERT_LOCKED(unit);
unit->hw_ctrl |= AMDIOMMU_CTRL_CMDBUF_EN;
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
}
static void
amdiommu_disable_cmdbuf(struct amdiommu_unit *unit)
{
AMDIOMMU_ASSERT_LOCKED(unit);
unit->hw_ctrl &= ~AMDIOMMU_CTRL_CMDBUF_EN;
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
}
static void
amdiommu_enable_qi_intr(struct iommu_unit *iommu)
{
struct amdiommu_unit *unit;
unit = IOMMU2AMD(iommu);
AMDIOMMU_ASSERT_LOCKED(unit);
unit->hw_ctrl |= AMDIOMMU_CTRL_COMWINT_EN;
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
AMDIOMMU_CMDEVS_COMWAITINT);
}
static void
amdiommu_disable_qi_intr(struct iommu_unit *iommu)
{
struct amdiommu_unit *unit;
unit = IOMMU2AMD(iommu);
AMDIOMMU_ASSERT_LOCKED(unit);
unit->hw_ctrl &= ~AMDIOMMU_CTRL_COMWINT_EN;
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
}
static void
amdiommu_cmd_advance_tail(struct iommu_unit *iommu)
{
struct amdiommu_unit *unit;
unit = IOMMU2AMD(iommu);
AMDIOMMU_ASSERT_LOCKED(unit);
amdiommu_write8(unit, AMDIOMMU_CMDBUF_TAIL, unit->x86c.inv_queue_tail);
}
static void
amdiommu_cmd_ensure(struct iommu_unit *iommu, int descr_count)
{
struct amdiommu_unit *unit;
uint64_t head;
int bytes;
unit = IOMMU2AMD(iommu);
AMDIOMMU_ASSERT_LOCKED(unit);
bytes = descr_count << AMDIOMMU_CMD_SZ_SHIFT;
for (;;) {
if (bytes <= unit->x86c.inv_queue_avail)
break;
/* refill */
head = amdiommu_read8(unit, AMDIOMMU_CMDBUF_HEAD);
head &= AMDIOMMU_CMDPTR_MASK;
unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail -
AMDIOMMU_CMD_SZ;
if (head <= unit->x86c.inv_queue_tail)
unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size;
if (bytes <= unit->x86c.inv_queue_avail)
break;
/*
* No space in the queue, do busy wait. Hardware must
* make a progress. But first advance the tail to
* inform the descriptor streamer about entries we
* might have already filled, otherwise they could
* clog the whole queue..
*
* See dmar_qi_invalidate_locked() for a discussion
* about data race prevention.
*/
amdiommu_cmd_advance_tail(iommu);
unit->x86c.inv_queue_full++;
cpu_spinwait();
}
unit->x86c.inv_queue_avail -= bytes;
}
static void
amdiommu_cmd_emit(struct amdiommu_unit *unit, const struct
amdiommu_cmd_generic *cmd)
{
AMDIOMMU_ASSERT_LOCKED(unit);
memcpy(unit->x86c.inv_queue + unit->x86c.inv_queue_tail, cmd,
sizeof(*cmd));
unit->x86c.inv_queue_tail += AMDIOMMU_CMD_SZ;
KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size,
("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail,
(uintmax_t)unit->x86c.inv_queue_size));
unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1;
}
static void
amdiommu_cmd_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq,
bool intr, bool memw, bool fence)
{
struct amdiommu_unit *unit;
struct amdiommu_cmd_completion_wait c;
unit = IOMMU2AMD(iommu);
AMDIOMMU_ASSERT_LOCKED(unit);
bzero(&c, sizeof(c));
c.op = AMDIOMMU_CMD_COMPLETION_WAIT;
if (memw) {
uint32_t x;
c.s = 1;
x = unit->x86c.inv_waitd_seq_hw_phys;
x >>= 3;
c.address0 = x;
x = unit->x86c.inv_waitd_seq_hw_phys >> 32;
c.address1 = x;
c.data0 = seq;
}
if (fence)
c.f = 1;
if (intr)
c.i = 1;
amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
}
static void
amdiommu_qi_invalidate_emit(struct iommu_domain *adomain, iommu_gaddr_t base,
iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait)
{
struct amdiommu_domain *domain;
struct amdiommu_unit *unit;
struct amdiommu_cmd_invalidate_iommu_pages c;
u_int isize;
domain = IODOM2DOM(adomain);
unit = domain->unit;
AMDIOMMU_ASSERT_LOCKED(unit);
bzero(&c, sizeof(c));
c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
c.domainid = domain->domain;
isize = IOMMU_PAGE_SIZE; /* XXXKIB handle superpages */
for (; size > 0; base += isize, size -= isize) {
amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
c.s = 0;
c.pde = 1;
c.address = base >> IOMMU_PAGE_SHIFT;
amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
}
iommu_qi_emit_wait_seq(AMD2IOMMU(unit), pseq, emit_wait);
}
void
amdiommu_qi_invalidate_all_pages_locked_nowait(struct amdiommu_domain *domain)
{
struct amdiommu_unit *unit;
struct amdiommu_cmd_invalidate_iommu_pages c;
unit = domain->unit;
AMDIOMMU_ASSERT_LOCKED(unit);
bzero(&c, sizeof(c));
c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES;
c.domainid = domain->domain;
/*
* The magic specified in the note for INVALIDATE_IOMMU_PAGES
* description.
*/
c.s = 1;
c.pde = 1;
c.address = 0x7ffffffffffff;
amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
}
void
amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu)
{
struct iommu_qi_genseq gseq;
amdiommu_cmd_ensure(iommu, 1);
iommu_qi_emit_wait_seq(iommu, &gseq, true);
IOMMU2AMD(iommu)->x86c.inv_seq_waiters++;
amdiommu_cmd_advance_tail(iommu);
iommu_qi_wait_for_seq(iommu, &gseq, true);
}
void
amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx)
{
struct amdiommu_cmd_invalidate_devtab_entry c;
amdiommu_cmd_ensure(AMD2IOMMU(CTX2AMD(ctx)), 1);
bzero(&c, sizeof(c));
c.op = AMDIOMMU_CMD_INVALIDATE_DEVTAB_ENTRY;
c.devid = ctx->context.rid;
amdiommu_cmd_emit(CTX2AMD(ctx), (struct amdiommu_cmd_generic *)&c);
}
void
amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx)
{
amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
}
void
amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
uint16_t devid)
{
struct amdiommu_cmd_invalidate_interrupt_table c;
AMDIOMMU_ASSERT_LOCKED(unit);
amdiommu_cmd_ensure(AMD2IOMMU(unit), 1);
bzero(&c, sizeof(c));
c.op = AMDIOMMU_CMD_INVALIDATE_INTERRUPT_TABLE;
c.devid = devid;
amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c);
}
void
amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, uint16_t devid)
{
amdiommu_qi_invalidate_ir_locked_nowait(unit, devid);
amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(unit));
}
static void
amdiommu_qi_task(void *arg, int pending __unused)
{
struct amdiommu_unit *unit;
unit = IOMMU2AMD(arg);
iommu_qi_drain_tlb_flush(AMD2IOMMU(unit));
AMDIOMMU_LOCK(unit);
if (unit->x86c.inv_seq_waiters > 0)
wakeup(&unit->x86c.inv_seq_waiters);
AMDIOMMU_UNLOCK(unit);
}
int
amdiommu_init_cmd(struct amdiommu_unit *unit)
{
uint64_t qi_sz, rv;
unit->x86c.qi_buf_maxsz = ilog2(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE);
unit->x86c.qi_cmd_sz = AMDIOMMU_CMD_SZ;
iommu_qi_common_init(AMD2IOMMU(unit), amdiommu_qi_task);
get_x86_iommu()->qi_ensure = amdiommu_cmd_ensure;
get_x86_iommu()->qi_emit_wait_descr = amdiommu_cmd_emit_wait_descr;
get_x86_iommu()->qi_advance_tail = amdiommu_cmd_advance_tail;
get_x86_iommu()->qi_invalidate_emit = amdiommu_qi_invalidate_emit;
rv = pmap_kextract((uintptr_t)unit->x86c.inv_queue);
/*
* See the description of the ComLen encoding for Command
* buffer Base Address Register.
*/
qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE) + 8;
rv |= qi_sz << AMDIOMMU_CMDBUF_BASE_SZSHIFT;
AMDIOMMU_LOCK(unit);
amdiommu_write8(unit, AMDIOMMU_CMDBUF_BASE, rv);
amdiommu_enable_cmdbuf(unit);
amdiommu_enable_qi_intr(AMD2IOMMU(unit));
AMDIOMMU_UNLOCK(unit);
return (0);
}
static void
amdiommu_fini_cmd_helper(struct iommu_unit *iommu)
{
amdiommu_disable_cmdbuf(IOMMU2AMD(iommu));
amdiommu_disable_qi_intr(iommu);
}
void
amdiommu_fini_cmd(struct amdiommu_unit *unit)
{
iommu_qi_common_fini(AMD2IOMMU(unit), amdiommu_fini_cmd_helper);
}

639
sys/x86/iommu/amd_ctx.c Normal file
View File

@ -0,0 +1,639 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/memdesc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/rman.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <sys/uio.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_map.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <machine/atomic.h>
#include <machine/bus.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/amd_reg.h>
#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/amd_iommu.h>
static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context");
static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain");
static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
struct amdiommu_domain *domain);
static struct amdiommu_dte *
amdiommu_get_dtep(struct amdiommu_ctx *ctx)
{
return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]);
}
void
amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep)
{
struct amdiommu_domain *domain;
struct amdiommu_unit *unit;
domain = IODOM2DOM(entry->domain);
unit = DOM2AMD(domain);
/*
* If "free" is false, then the IOTLB invalidation must be performed
* synchronously. Otherwise, the caller might free the entry before
* dmar_qi_task() is finished processing it.
*/
if (free) {
AMDIOMMU_LOCK(unit);
iommu_qi_invalidate_locked(&domain->iodom, entry, true);
AMDIOMMU_UNLOCK(unit);
} else {
iommu_qi_invalidate_sync(&domain->iodom, entry->start,
entry->end - entry->start, cansleep);
iommu_domain_free_entry(entry, false);
}
}
static bool
amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain,
struct iommu_map_entry *entry)
{
return (true); /* XXXKIB */
}
void
amdiommu_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep)
{
struct amdiommu_domain *domain;
struct amdiommu_unit *unit;
struct iommu_map_entry *entry, *entry1;
int error __diagused;
domain = IODOM2DOM(iodom);
unit = DOM2AMD(domain);
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("not mapped entry %p %p", domain, entry));
error = iodom->ops->unmap(iodom, entry,
cansleep ? IOMMU_PGF_WAITOK : 0);
KASSERT(error == 0, ("unmap %p error %d", domain, error));
}
if (TAILQ_EMPTY(entries))
return;
AMDIOMMU_LOCK(unit);
while ((entry = TAILQ_FIRST(entries)) != NULL) {
TAILQ_REMOVE(entries, entry, dmamap_link);
iommu_qi_invalidate_locked(&domain->iodom, entry,
amdiommu_domain_unload_emit_wait(domain, entry));
}
AMDIOMMU_UNLOCK(unit);
}
static void
amdiommu_domain_destroy(struct amdiommu_domain *domain)
{
struct iommu_domain *iodom;
struct amdiommu_unit *unit;
iodom = DOM2IODOM(domain);
KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
("unfinished unloads %p", domain));
KASSERT(LIST_EMPTY(&iodom->contexts),
("destroying dom %p with contexts", domain));
KASSERT(domain->ctx_cnt == 0,
("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
KASSERT(domain->refs == 0,
("destroying dom %p with refs %d", domain, domain->refs));
if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
AMDIOMMU_DOMAIN_LOCK(domain);
iommu_gas_fini_domain(iodom);
AMDIOMMU_DOMAIN_UNLOCK(domain);
}
if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
if (domain->pgtbl_obj != NULL)
AMDIOMMU_DOMAIN_PGLOCK(domain);
amdiommu_domain_free_pgtbl(domain);
}
iommu_domain_fini(iodom);
unit = DOM2AMD(domain);
free_unr(unit->domids, domain->domain);
free(domain, M_AMDIOMMU_DOMAIN);
}
static iommu_gaddr_t
lvl2addr(int lvl)
{
int x;
x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl;
/* Level 6 has only 8 bits for page table index */
if (x >= NBBY * sizeof(uint64_t))
return (-1ull);
return (1ull < (1ull << x));
}
static void
amdiommu_domain_init_pglvl(struct amdiommu_unit *unit,
struct amdiommu_domain *domain)
{
iommu_gaddr_t end;
int hats, i;
uint64_t efr_hats;
end = DOM2IODOM(domain)->end;
for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) {
if (lvl2addr(i) >= end && lvl2addr(i - 1) < end)
break;
}
domain->pglvl = i;
efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK;
switch (efr_hats) {
case AMDIOMMU_EFR_HATS_6LVL:
hats = 6;
break;
case AMDIOMMU_EFR_HATS_5LVL:
hats = 5;
break;
case AMDIOMMU_EFR_HATS_4LVL:
hats = 4;
break;
default:
printf("amdiommu%d: HATS %#jx (reserved) ignoring\n",
unit->iommu.unit, (uintmax_t)efr_hats);
return;
}
if (hats >= domain->pglvl)
return;
printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n",
unit->iommu.unit, domain->domain, hats, domain->pglvl);
domain->pglvl = hats;
domain->iodom.end = lvl2addr(hats);
}
static struct amdiommu_domain *
amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped)
{
struct amdiommu_domain *domain;
struct iommu_domain *iodom;
int error, id;
id = alloc_unr(unit->domids);
if (id == -1)
return (NULL);
domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO);
iodom = DOM2IODOM(domain);
domain->domain = id;
LIST_INIT(&iodom->contexts);
iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops);
domain->unit = unit;
domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
amdiommu_domain_init_pglvl(unit, domain);
iommu_gas_init_domain(DOM2IODOM(domain));
if (id_mapped) {
domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
} else {
error = amdiommu_domain_alloc_pgtbl(domain);
if (error != 0)
goto fail;
/* Disable local apic region access */
error = iommu_gas_reserve_region(iodom, 0xfee00000,
0xfeefffff + 1, &iodom->msi_entry);
if (error != 0)
goto fail;
}
return (domain);
fail:
amdiommu_domain_destroy(domain);
return (NULL);
}
static struct amdiommu_ctx *
amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid)
{
struct amdiommu_ctx *ctx;
ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
ctx->context.domain = DOM2IODOM(domain);
ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
ctx->context.rid = rid;
ctx->context.refs = 1;
return (ctx);
}
static void
amdiommu_ctx_link(struct amdiommu_ctx *ctx)
{
struct amdiommu_domain *domain;
domain = CTX2DOM(ctx);
IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
KASSERT(domain->refs >= domain->ctx_cnt,
("dom %p ref underflow %d %d", domain, domain->refs,
domain->ctx_cnt));
domain->refs++;
domain->ctx_cnt++;
LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link);
}
static void
amdiommu_ctx_unlink(struct amdiommu_ctx *ctx)
{
struct amdiommu_domain *domain;
domain = CTX2DOM(ctx);
IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
KASSERT(domain->refs > 0,
("domain %p ctx dtr refs %d", domain, domain->refs));
KASSERT(domain->ctx_cnt >= domain->refs,
("domain %p ctx dtr refs %d ctx_cnt %d", domain,
domain->refs, domain->ctx_cnt));
domain->refs--;
domain->ctx_cnt--;
LIST_REMOVE(&ctx->context, link);
}
struct amdiommu_ctx *
amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid)
{
struct amdiommu_domain *domain;
struct iommu_ctx *ctx;
AMDIOMMU_ASSERT_LOCKED(unit);
LIST_FOREACH(domain, &unit->domains, link) {
LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
if (ctx->rid == rid)
return (IOCTX2CTX(ctx));
}
}
return (NULL);
}
struct amdiommu_domain *
amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid)
{
struct amdiommu_domain *domain;
struct iommu_ctx *ctx;
AMDIOMMU_LOCK(unit);
LIST_FOREACH(domain, &unit->domains, link) {
LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
if (ctx->rid == rid)
break;
}
}
AMDIOMMU_UNLOCK(unit);
return (domain);
}
static void
amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx)
{
struct amdiommu_dte *dtep;
struct amdiommu_domain *domain;
AMDIOMMU_ASSERT_LOCKED(unit);
KASSERT(ctx->context.refs >= 1,
("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs));
/*
* If our reference is not last, only the dereference should
* be performed.
*/
if (ctx->context.refs > 1) {
ctx->context.refs--;
AMDIOMMU_UNLOCK(unit);
return;
}
KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
("lost ref on disabled ctx %p", ctx));
/*
* Otherwise, the device table entry must be cleared before
* the page table is destroyed.
*/
dtep = amdiommu_get_dtep(ctx);
dtep->v = 0;
atomic_thread_fence_rel();
memset(dtep, 0, sizeof(*dtep));
domain = CTX2DOM(ctx);
amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid);
amdiommu_qi_invalidate_all_pages_locked_nowait(domain);
amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
if (unit->irte_enabled)
amdiommu_ctx_fini_irte(ctx);
amdiommu_ctx_unlink(ctx);
free(ctx->context.tag, M_AMDIOMMU_CTX);
free(ctx, M_AMDIOMMU_CTX);
amdiommu_unref_domain_locked(unit, domain);
}
static void
amdiommu_free_ctx(struct amdiommu_ctx *ctx)
{
struct amdiommu_unit *unit;
unit = CTX2AMD(ctx);
AMDIOMMU_LOCK(unit);
amdiommu_free_ctx_locked(unit, ctx);
}
static void
amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
struct amdiommu_domain *domain)
{
AMDIOMMU_ASSERT_LOCKED(unit);
KASSERT(domain->refs >= 1,
("amdiommu%d domain %p refs %u", unit->iommu.unit, domain,
domain->refs));
KASSERT(domain->refs > domain->ctx_cnt,
("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit,
domain, domain->refs, domain->ctx_cnt));
if (domain->refs > 1) {
domain->refs--;
AMDIOMMU_UNLOCK(unit);
return;
}
LIST_REMOVE(domain, link);
AMDIOMMU_UNLOCK(unit);
taskqueue_drain(unit->iommu.delayed_taskqueue,
&domain->iodom.unload_task);
amdiommu_domain_destroy(domain);
}
static void
dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx,
vm_page_t pgtblr, uint8_t dte, uint32_t edte)
{
struct amdiommu_domain *domain;
struct amdiommu_unit *unit;
domain = CTX2DOM(ctx);
unit = DOM2AMD(domain);
dtep->tv = 1;
/* dtep->had not used for now */
dtep->ir = 1;
dtep->iw = 1;
dtep->domainid = domain->domain;
dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS;
/* fill device interrupt passing hints from IVHD. */
dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0;
dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0;
dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0;
dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4;
dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0;
dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0;
if (unit->irte_enabled) {
dtep->iv = 1;
dtep->i = 0;
dtep->inttablen = ilog2(unit->irte_nentries);
dtep->intrroot = pmap_kextract(unit->irte_x2apic ?
(vm_offset_t)ctx->irtx2 :
(vm_offset_t)ctx->irtb) >> 6;
dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP;
}
if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) {
dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1;
} else {
MPASS(domain->pglvl > 0 && domain->pglvl <=
AMDIOMMU_PGTBL_MAXLVL);
dtep->pgmode = domain->pglvl;
dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12;
}
atomic_thread_fence_rel();
dtep->v = 1;
}
static void
dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte)
{
struct amdiommu_dte *dtep;
struct amdiommu_unit *unit;
struct amdiommu_domain *domain;
int i;
domain = CTX2DOM(ctx);
unit = DOM2AMD(domain);
dtep = amdiommu_get_dtep(ctx);
KASSERT(dtep->v == 0,
("amdiommu%d initializing valid dte @%p %#jx",
CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep)));
if (iommu_is_buswide_ctx(AMD2IOMMU(unit),
PCI_RID2BUS(ctx->context.rid))) {
MPASS(!move);
for (i = 0; i <= PCI_BUSMAX; i++) {
dte_entry_init_one(&dtep[i], ctx, domain->pgtblr,
dte, edte);
}
} else {
dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte);
}
}
struct amdiommu_ctx *
amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid,
int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte)
{
struct amdiommu_domain *domain, *domain1;
struct amdiommu_ctx *ctx, *ctx1;
int bus, slot, func;
if (dev != NULL) {
bus = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
} else {
bus = PCI_RID2BUS(rid);
slot = PCI_RID2SLOT(rid);
func = PCI_RID2FUNC(rid);
}
AMDIOMMU_LOCK(unit);
KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) ||
(slot == 0 && func == 0),
("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit,
bus, slot, func));
ctx = amdiommu_find_ctx_locked(unit, rid);
if (ctx == NULL) {
/*
* Perform the allocations which require sleep or have
* higher chance to succeed if the sleep is allowed.
*/
AMDIOMMU_UNLOCK(unit);
domain1 = amdiommu_domain_alloc(unit, id_mapped);
if (domain1 == NULL)
return (NULL);
if (!id_mapped) {
/*
* XXXKIB IVMD seems to be less significant
* and less used on AMD than RMRR on Intel.
* Not implemented for now.
*/
}
ctx1 = amdiommu_ctx_alloc(domain1, rid);
amdiommu_ctx_init_irte(ctx1);
AMDIOMMU_LOCK(unit);
/*
* Recheck the contexts, other thread might have
* already allocated needed one.
*/
ctx = amdiommu_find_ctx_locked(unit, rid);
if (ctx == NULL) {
domain = domain1;
ctx = ctx1;
amdiommu_ctx_link(ctx);
ctx->context.tag->owner = dev;
iommu_device_tag_init(CTX2IOCTX(ctx), dev);
LIST_INSERT_HEAD(&unit->domains, domain, link);
dte_entry_init(ctx, false, dte, edte);
amdiommu_qi_invalidate_ctx_locked(ctx);
if (dev != NULL) {
device_printf(dev,
"amdiommu%d pci%d:%d:%d:%d rid %x domain %d "
"%s-mapped\n",
AMD2IOMMU(unit)->unit, unit->unit_dom,
bus, slot, func, rid, domain->domain,
id_mapped ? "id" : "re");
}
} else {
amdiommu_domain_destroy(domain1);
/* Nothing needs to be done to destroy ctx1. */
free(ctx1, M_AMDIOMMU_CTX);
domain = CTX2DOM(ctx);
ctx->context.refs++; /* tag referenced us */
}
} else {
domain = CTX2DOM(ctx);
if (ctx->context.tag->owner == NULL)
ctx->context.tag->owner = dev;
ctx->context.refs++; /* tag referenced us */
}
AMDIOMMU_UNLOCK(unit);
return (ctx);
}
struct iommu_ctx *
amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
bool id_mapped, bool rmrr_init)
{
struct amdiommu_unit *unit;
struct amdiommu_ctx *ret;
int error;
uint32_t edte;
uint16_t rid1;
uint8_t dte;
error = amdiommu_find_unit(dev, &unit, &rid1, &dte, &edte,
bootverbose);
if (error != 0)
return (NULL);
if (AMD2IOMMU(unit) != iommu) /* XXX complain loudly */
return (NULL);
ret = amdiommu_get_ctx_for_dev(unit, dev, rid1, pci_get_domain(dev),
id_mapped, rmrr_init, dte, edte);
return (CTX2IOCTX(ret));
}
void
amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
struct iommu_ctx *context)
{
struct amdiommu_unit *unit;
struct amdiommu_ctx *ctx;
unit = IOMMU2AMD(iommu);
ctx = IOCTX2CTX(context);
amdiommu_free_ctx_locked(unit, ctx);
}
void
amdiommu_free_ctx_method(struct iommu_ctx *context)
{
struct amdiommu_ctx *ctx;
ctx = IOCTX2CTX(context);
amdiommu_free_ctx(ctx);
}

1205
sys/x86/iommu/amd_drv.c Normal file

File diff suppressed because it is too large Load Diff

323
sys/x86/iommu/amd_event.c Normal file
View File

@ -0,0 +1,323 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_acpi.h"
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/memdesc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/rman.h>
#include <sys/rwlock.h>
#include <sys/smp.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/acpica/acpivar.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <machine/bus.h>
#include <machine/pci_cfgreg.h>
#include "pcib_if.h"
#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/cputypes.h>
#include <x86/apicreg.h>
#include <x86/apicvar.h>
#include <dev/iommu/iommu.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/amd_reg.h>
#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/amd_iommu.h>
static void
amdiommu_event_rearm_intr(struct amdiommu_unit *unit)
{
amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
AMDIOMMU_CMDEVS_EVLOGINT);
}
static void
amdiommu_event_log_inc_head(struct amdiommu_unit *unit)
{
unit->event_log_head++;
if (unit->event_log_head >= unit->event_log_size)
unit->event_log_head = 0;
}
static void
amdiommu_event_log_print(struct amdiommu_unit *unit,
const struct amdiommu_event_generic *evp, bool fancy)
{
printf("amdiommu%d: event type 0x%x 0x%08x 0x%08x 0x%08x 0x%08x\n",
unit->iommu.unit, evp->code, evp->w0, evp->ww1, evp->w2, evp->w3);
if (!fancy)
return;
AMDIOMMU_ASSERT_LOCKED(unit);
if (evp->code == AMDIOMMU_EV_ILL_DEV_TABLE_ENTRY) {
const struct amdiommu_event_ill_dev_table_entry *ev_dte_p;
const struct amdiommu_dte *dte;
const uint32_t *x;
int i;
ev_dte_p = (const struct
amdiommu_event_ill_dev_table_entry *)evp;
dte = &unit->dev_tbl[ev_dte_p->devid];
printf("\tIllegal Dev Tab Entry dte@%p:", dte);
for (i = 0, x = (const uint32_t *)dte; i < sizeof(*dte) /
sizeof(uint32_t); i++, x++)
printf(" 0x%08x", *x);
printf("\n");
} else if (evp->code == AMDIOMMU_EV_IO_PAGE_FAULT) {
const struct amdiommu_event_io_page_fault_entry *ev_iopf_p;
struct amdiommu_ctx *ctx;
device_t dev;
ev_iopf_p = (const struct
amdiommu_event_io_page_fault_entry *)evp;
printf("\tPage Fault rid %#x dom %d",
ev_iopf_p->devid, ev_iopf_p->pasid);
ctx = amdiommu_find_ctx_locked(unit, ev_iopf_p->devid);
if (ctx != NULL) {
dev = ctx->context.tag->owner;
if (dev != NULL)
printf(" %s", device_get_nameunit(dev));
}
printf("\n\t"
"gn %d nx %d us %d i %d pr %d rw %d pe %d rz %d tr %d"
"\n\tgaddr %#jx\n",
ev_iopf_p->gn, ev_iopf_p->nx, ev_iopf_p->us, ev_iopf_p->i,
ev_iopf_p->pr, ev_iopf_p->rw, ev_iopf_p->pe, ev_iopf_p->rz,
ev_iopf_p->tr,
(((uintmax_t)(ev_iopf_p->addr2)) << 32) |
ev_iopf_p->addr1);
}
}
static u_int
amdiommu_event_log_tail(struct amdiommu_unit *unit)
{
return (amdiommu_read8(unit, AMDIOMMU_EVNTLOG_TAIL) >>
AMDIOMMU_EV_SZ_SHIFT);
}
static u_int
amdiommu_event_copy_log_inc(u_int idx)
{
idx++;
if (idx == nitems(((struct amdiommu_unit *)NULL)->event_copy_log))
idx = 0;
return (idx);
}
static bool
amdiommu_event_copy_log_hasspace(struct amdiommu_unit *unit)
{
return (unit->event_copy_tail != amdiommu_event_copy_log_inc(
unit->event_copy_head));
}
void
amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status)
{
struct amdiommu_event_generic *evp;
u_int hw_tail, hw_tail1;
bool enqueue;
enqueue = (status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0;
hw_tail1 = amdiommu_event_log_tail(unit);
do {
hw_tail = hw_tail1;
for (; hw_tail != unit->event_log_head;
amdiommu_event_log_inc_head(unit)) {
evp = &unit->event_log[unit->event_log_head];
mtx_lock_spin(&unit->event_lock);
if (amdiommu_event_copy_log_hasspace(unit)) {
unit->event_copy_log[unit->event_copy_head] =
*evp;
unit->event_copy_head =
amdiommu_event_copy_log_inc(unit->
event_copy_head);
enqueue = true;
} else {
amdiommu_event_log_print(unit, evp, false);
}
mtx_unlock_spin(&unit->event_lock);
}
amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD,
unit->event_log_head << AMDIOMMU_EV_SZ_SHIFT);
hw_tail1 = amdiommu_event_log_tail(unit);
} while (hw_tail1 != hw_tail);
amdiommu_event_rearm_intr(unit);
if (enqueue)
taskqueue_enqueue(unit->event_taskqueue, &unit->event_task);
}
static void
amdiommu_event_task(void *arg, int pending __unused)
{
struct amdiommu_unit *unit;
uint64_t hwev_status, status;
struct amdiommu_event_generic hwev;
unit = arg;
AMDIOMMU_LOCK(unit);
if ((unit->efr & AMDIOMMU_EFR_HWEV_SUP) != 0) {
hwev_status = amdiommu_read8(unit, AMDIOMMU_HWEV_STATUS);
if ((hwev_status & AMDIOMMU_HWEVS_HEV) != 0) {
*(uint64_t *)&hwev = amdiommu_read8(unit,
AMDIOMMU_HWEV_LOWER);
*((uint64_t *)&hwev + 1) = amdiommu_read8(unit,
AMDIOMMU_HWEV_UPPER);
printf("amdiommu%d: hw event%s\n", unit->iommu.unit,
(hwev_status & AMDIOMMU_HWEVS_HEO) != 0 ?
" (overflown)" : "");
amdiommu_event_log_print(unit, &hwev, true);
amdiommu_write8(unit, AMDIOMMU_HWEV_STATUS,
hwev_status);
}
}
status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
if ((status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0) {
printf("amdiommu%d: event log overflow\n", unit->iommu.unit);
while ((status & AMDIOMMU_CMDEVS_EVLOGRUN) != 0) {
DELAY(1);
status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
}
unit->hw_ctrl &= ~AMDIOMMU_CTRL_EVNTLOG_EN;
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
unit->event_log_head = 0;
amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD, 0);
amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
AMDIOMMU_CMDEVS_EVOVRFLW); /* RW1C */
unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN;
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
amdiommu_event_rearm_intr(unit);
}
mtx_lock_spin(&unit->event_lock);
while (unit->event_copy_head != unit->event_copy_tail) {
mtx_unlock_spin(&unit->event_lock);
amdiommu_event_log_print(unit, &unit->event_copy_log[
unit->event_copy_tail], true);
mtx_lock_spin(&unit->event_lock);
unit->event_copy_tail = amdiommu_event_copy_log_inc(unit->
event_copy_tail);
}
mtx_unlock_spin(&unit->event_lock);
AMDIOMMU_UNLOCK(unit);
}
int
amdiommu_init_event(struct amdiommu_unit *unit)
{
uint64_t base_reg;
mtx_init(&unit->event_lock, "amdevl", NULL, MTX_SPIN);
/* event log entries */
unit->event_log_size = AMDIOMMU_EVNTLOG_MIN;
TUNABLE_INT_FETCH("hw.amdiommu.event_log_size", &unit->event_log_size);
if (unit->event_log_size < AMDIOMMU_EVNTLOG_MIN ||
unit->event_log_size > AMDIOMMU_EVNTLOG_MAX ||
!powerof2(unit->event_log_size))
panic("invalid hw.amdiommu.event_log_size");
unit->event_log = kmem_alloc_contig(AMDIOMMU_EV_SZ *
unit->event_log_size, M_WAITOK | M_ZERO, 0, ~0ull, PAGE_SIZE,
0, VM_MEMATTR_DEFAULT);
TASK_INIT(&unit->event_task, 0, amdiommu_event_task, unit);
unit->event_taskqueue = taskqueue_create_fast("amdiommuff", M_WAITOK,
taskqueue_thread_enqueue, &unit->event_taskqueue);
taskqueue_start_threads(&unit->event_taskqueue, 1, PI_AV,
"amdiommu%d event taskq", unit->iommu.unit);
base_reg = pmap_kextract((vm_offset_t)unit->event_log) |
(((uint64_t)0x8 + ilog2(unit->event_log_size /
AMDIOMMU_EVNTLOG_MIN)) << AMDIOMMU_EVNTLOG_BASE_SZSHIFT);
AMDIOMMU_LOCK(unit);
/*
* Re-arm before enabling interrupt, to not loose it when
* re-arming in the interrupt handler.
*/
amdiommu_event_rearm_intr(unit);
amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, base_reg);
unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN | AMDIOMMU_CTRL_EVENTINT_EN;
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
AMDIOMMU_UNLOCK(unit);
return (0);
}
void
amdiommu_fini_event(struct amdiommu_unit *unit)
{
AMDIOMMU_LOCK(unit);
unit->hw_ctrl &= ~(AMDIOMMU_CTRL_EVNTLOG_EN |
AMDIOMMU_CTRL_EVENTINT_EN);
amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl);
amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, 0);
AMDIOMMU_UNLOCK(unit);
taskqueue_drain(unit->event_taskqueue, &unit->event_task);
taskqueue_free(unit->event_taskqueue);
unit->event_taskqueue = NULL;
kmem_free(unit->event_log, unit->event_log_size * AMDIOMMU_EV_SZ);
unit->event_log = NULL;
unit->event_log_head = unit->event_log_tail = 0;
mtx_destroy(&unit->event_lock);
}

396
sys/x86/iommu/amd_idpgtbl.c Normal file
View File

@ -0,0 +1,396 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/domainset.h>
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/memdesc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/rman.h>
#include <sys/sf_buf.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <sys/uio.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_map.h>
#include <dev/pci/pcireg.h>
#include <machine/atomic.h>
#include <machine/bus.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/amd_reg.h>
#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/amd_iommu.h>
static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain,
iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte,
struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf);
static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain,
iommu_gaddr_t base, iommu_gaddr_t size, int flags,
struct iommu_map_entry *entry);
int
amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain)
{
vm_page_t m;
int dom;
KASSERT(domain->pgtbl_obj == NULL,
("already initialized %p", domain));
domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL);
if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0)
domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
AMDIOMMU_DOMAIN_PGLOCK(domain);
m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK |
IOMMU_PGF_ZERO | IOMMU_PGF_OBJL);
/* No implicit free of the top level page table page. */
vm_page_wire(m);
domain->pgtblr = m;
AMDIOMMU_DOMAIN_PGUNLOCK(domain);
AMDIOMMU_LOCK(domain->unit);
domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED;
AMDIOMMU_UNLOCK(domain->unit);
return (0);
}
void
amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain)
{
vm_object_t obj;
vm_page_t m;
obj = domain->pgtbl_obj;
if (obj == NULL) {
KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0,
("lost pagetable object domain %p", domain));
return;
}
AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
domain->pgtbl_obj = NULL;
domain->pgtblr = NULL;
/* Obliterate ref_counts */
VM_OBJECT_ASSERT_WLOCKED(obj);
for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
vm_page_clearref(m);
VM_OBJECT_WUNLOCK(obj);
vm_object_deallocate(obj);
}
static iommu_pte_t *
amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf)
{
iommu_pte_t *pte, *ptep;
struct sf_buf *sfp;
vm_page_t m;
vm_pindex_t idx, idx1;
idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl);
if (*sf != NULL && idx == *idxp) {
pte = (iommu_pte_t *)sf_buf_kva(*sf);
} else {
if (*sf != NULL)
iommu_unmap_pgtbl(*sf);
*idxp = idx;
retry:
pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf);
if (pte == NULL) {
KASSERT(lvl > 0,
("lost root page table page %p", domain));
/*
* Page table page does not exist, allocate
* it and create a pte in the preceeding page level
* to reference the allocated page table page.
*/
m = iommu_pgalloc(domain->pgtbl_obj, idx, flags |
IOMMU_PGF_ZERO);
if (m == NULL)
return (NULL);
vm_page_wire(m);
sfp = NULL;
ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1,
flags, &idx1, &sfp);
if (ptep == NULL) {
KASSERT(m->pindex != 0,
("loosing root page %p", domain));
vm_page_unwire_noq(m);
iommu_pgfree(domain->pgtbl_obj, m->pindex,
flags, NULL);
return (NULL);
}
ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR |
AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR |
((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT);
vm_page_wire(sf_buf_page(sfp));
vm_page_unwire_noq(m);
iommu_unmap_pgtbl(sfp);
/* Only executed once. */
goto retry;
}
}
pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl);
return (pte);
}
static int
amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags,
struct iommu_map_entry *entry)
{
iommu_pte_t *pte;
struct sf_buf *sf;
iommu_gaddr_t base1;
vm_pindex_t pi, idx;
AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
base1 = base;
flags |= IOMMU_PGF_OBJL;
idx = -1;
pte = NULL;
sf = NULL;
for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE,
pi++) {
KASSERT(size >= IOMMU_PAGE_SIZE,
("mapping loop overflow %p %jx %jx %jx", domain,
(uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE));
pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1,
flags, &idx, &sf);
if (pte == NULL) {
KASSERT((flags & IOMMU_PGF_WAITOK) == 0,
("failed waitable pte alloc %p", domain));
if (sf != NULL)
iommu_unmap_pgtbl(sf);
amdiommu_unmap_buf_locked(domain, base1, base - base1,
flags, entry);
return (ENOMEM);
}
/* next level 0, no superpages */
pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR;
vm_page_wire(sf_buf_page(sf));
}
if (sf != NULL)
iommu_unmap_pgtbl(sf);
return (0);
}
static int
amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
vm_page_t *ma, uint64_t eflags, int flags)
{
struct amdiommu_domain *domain;
uint64_t pflags;
iommu_gaddr_t base, size;
int error;
base = entry->start;
size = entry->end - entry->start;
pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) |
((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) |
((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0);
/* IOMMU_MAP_ENTRY_TM ignored */
domain = IODOM2DOM(iodom);
KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0,
("modifying idmap pagetable domain %p", domain));
KASSERT((base & IOMMU_PAGE_MASK) == 0,
("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT((size & IOMMU_PAGE_MASK) == 0,
("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT(base < iodom->end,
("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
(uintmax_t)size, (uintmax_t)iodom->end));
KASSERT(base + size < iodom->end,
("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
(uintmax_t)size, (uintmax_t)iodom->end));
KASSERT(base + size > base,
("size overflow %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0,
("neither read nor write %jx", (uintmax_t)pflags));
KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC
)) == 0,
("invalid pte flags %jx", (uintmax_t)pflags));
KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
AMDIOMMU_DOMAIN_PGLOCK(domain);
error = amdiommu_map_buf_locked(domain, base, size, ma, pflags,
flags, entry);
AMDIOMMU_DOMAIN_PGUNLOCK(domain);
/*
* XXXKIB invalidation seems to be needed even for non-valid->valid
* updates. Recheck.
*/
iommu_qi_invalidate_sync(iodom, base, size,
(flags & IOMMU_PGF_WAITOK) != 0);
return (error);
}
static void
amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base,
int lvl, int flags, struct iommu_map_entry *entry)
{
struct sf_buf *sf;
iommu_pte_t *pde;
vm_pindex_t idx;
sf = NULL;
pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf);
amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry,
true);
}
static void
amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base,
int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf,
struct iommu_map_entry *entry, bool free_sf)
{
vm_page_t m;
pte->pte = 0;
m = sf_buf_page(*sf);
if (free_sf) {
iommu_unmap_pgtbl(*sf);
*sf = NULL;
}
if (!vm_page_unwire_noq(m))
return;
KASSERT(lvl != 0,
("lost reference (lvl) on root pg domain %p base %jx lvl %d",
domain, (uintmax_t)base, lvl));
KASSERT(m->pindex != 0,
("lost reference (idx) on root pg domain %p base %jx lvl %d",
domain, (uintmax_t)base, lvl));
iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry);
amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry);
}
static int
amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, int flags, struct iommu_map_entry *entry)
{
iommu_pte_t *pte;
struct sf_buf *sf;
vm_pindex_t idx;
iommu_gaddr_t pg_sz;
AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain);
if (size == 0)
return (0);
KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0,
("modifying idmap pagetable domain %p", domain));
KASSERT((base & IOMMU_PAGE_MASK) == 0,
("non-aligned base %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT((size & IOMMU_PAGE_MASK) == 0,
("non-aligned size %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT(base < DOM2IODOM(domain)->end,
("base too high %p %jx %jx end %jx", domain, (uintmax_t)base,
(uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
KASSERT(base + size < DOM2IODOM(domain)->end,
("end too high %p %jx %jx end %jx", domain, (uintmax_t)base,
(uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end));
KASSERT(base + size > base,
("size overflow %p %jx %jx", domain, (uintmax_t)base,
(uintmax_t)size));
KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags));
pg_sz = IOMMU_PAGE_SIZE;
flags |= IOMMU_PGF_OBJL;
for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
pte = amdiommu_pgtbl_map_pte(domain, base,
domain->pglvl - 1, flags, &idx, &sf);
KASSERT(pte != NULL,
("sleeping or page missed %p %jx %d 0x%x",
domain, (uintmax_t)base, domain->pglvl - 1, flags));
amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1,
flags, pte, &sf, entry, false);
KASSERT(size >= pg_sz,
("unmapping loop overflow %p %jx %jx %jx", domain,
(uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
}
if (sf != NULL)
iommu_unmap_pgtbl(sf);
return (0);
}
static int
amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
int flags)
{
struct amdiommu_domain *domain;
int error;
domain = IODOM2DOM(iodom);
AMDIOMMU_DOMAIN_PGLOCK(domain);
error = amdiommu_unmap_buf_locked(domain, entry->start,
entry->end - entry->start, flags, entry);
AMDIOMMU_DOMAIN_PGUNLOCK(domain);
return (error);
}
const struct iommu_domain_map_ops amdiommu_domain_map_ops = {
.map = amdiommu_map_buf,
.unmap = amdiommu_unmap_buf,
};

391
sys/x86/iommu/amd_intrmap.c Normal file
View File

@ -0,0 +1,391 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
#include <sys/domainset.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/memdesc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/rman.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/tree.h>
#include <sys/uio.h>
#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_map.h>
#include <contrib/dev/acpica/include/acpi.h>
#include <contrib/dev/acpica/include/accommon.h>
#include <dev/acpica/acpivar.h>
#include <dev/pci/pcireg.h>
#include <dev/pci/pcivar.h>
#include <machine/atomic.h>
#include <machine/bus.h>
#include <machine/md_var.h>
#include <machine/intr_machdep.h>
#include <x86/include/apicreg.h>
#include <x86/include/apicvar.h>
#include <machine/specialreg.h>
#include <x86/include/busdma_impl.h>
#include <dev/iommu/busdma_iommu.h>
#include <x86/iommu/amd_reg.h>
#include <x86/iommu/x86_iommu.h>
#include <x86/iommu/amd_iommu.h>
static struct amdiommu_ctx *amdiommu_ir_find(device_t src, uint16_t *rid,
bool *is_iommu);
static void amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
u_int cookie);
int
amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
{
struct amdiommu_ctx *ctx;
vmem_addr_t vmem_res;
u_int idx, i;
int error;
ctx = amdiommu_ir_find(src, NULL, NULL);
if (ctx == NULL || !CTX2AMD(ctx)->irte_enabled) {
for (i = 0; i < count; i++)
cookies[i] = -1;
return (EOPNOTSUPP);
}
error = vmem_alloc(ctx->irtids, count, M_FIRSTFIT | M_NOWAIT,
&vmem_res);
if (error != 0) {
KASSERT(error != EOPNOTSUPP,
("impossible EOPNOTSUPP from vmem"));
return (error);
}
idx = vmem_res;
for (i = 0; i < count; i++)
cookies[i] = idx + i;
return (0);
}
int
amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
u_int cookie, uint64_t *addr, uint32_t *data)
{
struct amdiommu_ctx *ctx;
struct amdiommu_unit *unit;
uint16_t rid;
bool is_iommu;
ctx = amdiommu_ir_find(src, &rid, &is_iommu);
if (is_iommu) {
if (addr != NULL) {
*data = vector;
*addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12);
if (x2apic_mode)
*addr |= ((uint64_t)cpu & 0xffffff00) << 32;
else
KASSERT(cpu <= 0xff,
("cpu id too big %d", cpu));
}
return (0);
}
if (ctx == NULL)
return (EOPNOTSUPP);
unit = CTX2AMD(ctx);
if (!unit->irte_enabled || cookie == -1)
return (EOPNOTSUPP);
if (cookie >= unit->irte_nentries) {
device_printf(src, "amdiommu%d: cookie %u irte max %u\n",
unit->iommu.unit, cookie, unit->irte_nentries);
return (EINVAL);
}
if (unit->irte_x2apic) {
struct amdiommu_irte_basic_vapic_x2 *irte;
irte = &ctx->irtx2[cookie];
irte->supiopf = 0;
irte->inttype = 0;
irte->rqeoi = 0;
irte->dm = 0;
irte->guestmode = 0;
irte->dest0 = cpu;
irte->rsrv0 = 0;
irte->vector = vector;
irte->rsrv1 = 0;
irte->rsrv2 = 0;
irte->dest1 = cpu >> 24;
atomic_thread_fence_rel();
irte->remapen = 1;
} else {
struct amdiommu_irte_basic_novapic *irte;
irte = &ctx->irtb[cookie];
irte->supiopf = 0;
irte->inttype = 0; /* fixed */
irte->rqeoi = 0;
irte->dm = 0; /* phys */
irte->guestmode = 0;
irte->dest = cpu;
irte->vector = vector;
irte->rsrv = 0;
atomic_thread_fence_rel();
irte->remapen = 1;
}
if (addr != NULL) {
*data = cookie;
*addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12);
if (unit->irte_x2apic)
*addr |= ((uint64_t)cpu & 0xffffff00) << 32;
}
iommu_get_requester(src, &rid);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
return (0);
}
int
amdiommu_unmap_msi_intr(device_t src, u_int cookie)
{
struct amdiommu_ctx *ctx;
if (cookie == -1)
return (0);
ctx = amdiommu_ir_find(src, NULL, NULL);
amdiommu_ir_free_irte(ctx, src, cookie);
return (0);
}
int
amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
uint32_t *lo)
{
/* XXXKIB for early call from ioapic_create() */
return (EOPNOTSUPP);
}
int
amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
{
/* XXXKIB */
return (0);
}
static struct amdiommu_ctx *
amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu)
{
devclass_t src_class;
struct amdiommu_unit *unit;
struct amdiommu_ctx *ctx;
uint32_t edte;
uint16_t rid;
uint8_t dte;
int error;
/*
* We need to determine if the interrupt source generates FSB
* interrupts. If yes, it is either IOMMU, in which case
* interrupts are not remapped. Or it is HPET, and interrupts
* are remapped. For HPET, source id is reported by HPET
* record in IVHD ACPI table.
*/
if (is_iommu != NULL)
*is_iommu = false;
ctx = NULL;
src_class = device_get_devclass(src);
if (src_class == devclass_find("amdiommu")) {
if (is_iommu != NULL)
*is_iommu = true;
} else if (src_class == devclass_find("hpet")) {
error = amdiommu_find_unit_for_hpet(src, &unit, &rid, &dte,
&edte, bootverbose);
ctx = NULL; // XXXKIB allocate ctx
} else {
error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte,
bootverbose);
if (error == 0) {
iommu_get_requester(src, &rid);
ctx = amdiommu_get_ctx_for_dev(unit, src,
rid, 0, false /* XXXKIB */, false, dte, edte);
}
}
if (ridp != NULL)
*ridp = rid;
return (ctx);
}
static void
amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src,
u_int cookie)
{
struct amdiommu_unit *unit;
uint16_t rid;
MPASS(ctx != NULL);
unit = CTX2AMD(ctx);
KASSERT(unit->irte_enabled,
("unmap: cookie %d ctx %p unit %p", cookie, ctx, unit));
KASSERT(cookie < unit->irte_nentries,
("bad cookie %u %u", cookie, unit->irte_nentries));
if (unit->irte_x2apic) {
struct amdiommu_irte_basic_vapic_x2 *irte;
irte = &ctx->irtx2[cookie];
irte->remapen = 0;
atomic_thread_fence_rel();
bzero(irte, sizeof(*irte));
} else {
struct amdiommu_irte_basic_novapic *irte;
irte = &ctx->irtb[cookie];
irte->remapen = 0;
atomic_thread_fence_rel();
bzero(irte, sizeof(*irte));
}
iommu_get_requester(src, &rid);
AMDIOMMU_LOCK(unit);
amdiommu_qi_invalidate_ir_locked(unit, rid);
AMDIOMMU_UNLOCK(unit);
}
int
amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx)
{
struct amdiommu_unit *unit;
void *ptr;
unsigned long sz;
int dom;
unit = CTX2AMD(ctx);
if (!unit->irte_enabled)
return (0);
KASSERT(unit->irte_nentries > 0 &&
unit->irte_nentries <= 2048 &&
powerof2(unit->irte_nentries),
("amdiommu%d: unit %p irte_nentries %u", unit->iommu.unit,
unit, unit->irte_nentries));
if (bus_get_domain(unit->iommu.dev, &dom) != 0)
dom = -1;
sz = unit->irte_nentries;
sz *= unit->irte_x2apic ? sizeof(struct amdiommu_irte_basic_vapic_x2) :
sizeof(struct amdiommu_irte_basic_novapic);
if (dom != -1) {
ptr = contigmalloc_domainset(sz, M_DEVBUF, DOMAINSET_PREF(dom),
M_WAITOK | M_ZERO, 0, ~0ull, 128, 0);
} else {
ptr = contigmalloc(sz, M_DEVBUF, M_WAITOK | M_ZERO,
0, ~0ull, 128, 0);
}
if (unit->irte_x2apic)
ctx->irtx2 = ptr;
else
ctx->irtb = ptr;
ctx->irtids = vmem_create("amdirt", 0, unit->irte_nentries, 1, 0,
M_FIRSTFIT | M_NOWAIT);
intr_reprogram(); // XXXKIB
return (0);
}
void
amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx)
{
struct amdiommu_unit *unit;
unit = CTX2AMD(ctx);
if (!unit->irte_enabled)
return;
if (unit->irte_x2apic)
free(ctx->irtx2, M_DEVBUF);
else
free(ctx->irtb, M_DEVBUF);
vmem_destroy(ctx->irtids);
}
int
amdiommu_init_irt(struct amdiommu_unit *unit)
{
int enabled, nentries;
SYSCTL_ADD_INT(&unit->iommu.sysctl_ctx,
SYSCTL_CHILDREN(device_get_sysctl_tree(unit->iommu.dev)),
OID_AUTO, "ir", CTLFLAG_RD, &unit->irte_enabled, 0,
"Interrupt remapping ops enabled");
enabled = 1;
TUNABLE_INT_FETCH("hw.iommu.ir", &enabled);
unit->irte_enabled = enabled != 0;
if (!unit->irte_enabled)
return (0);
nentries = 32;
TUNABLE_INT_FETCH("hw.iommu.amd.ir_num", &nentries);
nentries = roundup_pow_of_two(nentries);
if (nentries < 1)
nentries = 1;
if (nentries > 2048)
nentries = 2048;
unit->irte_nentries = nentries;
unit->irte_x2apic = x2apic_mode;
return (0);
}
void
amdiommu_fini_irt(struct amdiommu_unit *unit)
{
}

243
sys/x86/iommu/amd_iommu.h Normal file
View File

@ -0,0 +1,243 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef __X86_IOMMU_AMD_IOMMU_H
#define __X86_IOMMU_AMD_IOMMU_H
#include <dev/iommu/iommu.h>
#define AMDIOMMU_DEV_REPORTED 0x00000001
struct amdiommu_unit;
struct amdiommu_domain {
struct iommu_domain iodom;
int domain; /* (c) DID, written in context entry */
struct amdiommu_unit *unit; /* (c) */
u_int ctx_cnt; /* (u) Number of contexts owned */
u_int refs; /* (u) Refs, including ctx */
LIST_ENTRY(amdiommu_domain) link;/* (u) Member in the iommu list */
vm_object_t pgtbl_obj; /* (c) Page table pages */
vm_page_t pgtblr; /* (c) Page table root page */
u_int pglvl; /* (c) Page table levels */
};
struct amdiommu_ctx {
struct iommu_ctx context;
struct amdiommu_irte_basic_novapic *irtb;
struct amdiommu_irte_basic_vapic_x2 *irtx2;
vmem_t *irtids;
};
struct amdiommu_unit {
struct iommu_unit iommu;
struct x86_unit_common x86c;
u_int unit_dom; /* Served PCI domain, from IVRS */
u_int device_id; /* basically PCI RID */
u_int unit_id; /* Hypertransport Unit ID, deprecated */
TAILQ_ENTRY(amdiommu_unit) unit_next;
int seccap_reg;
uint64_t efr;
vm_paddr_t mmio_base;
vm_size_t mmio_sz;
struct resource *mmio_res;
int mmio_rid;
uint64_t hw_ctrl;
u_int numirqs;
struct resource *msix_table;
int msix_table_rid;
int irq_cmdev_rid;
struct resource *irq_cmdev;
void *irq_cmdev_cookie;
struct amdiommu_dte *dev_tbl;
vm_object_t devtbl_obj;
LIST_HEAD(, amdiommu_domain) domains;
struct unrhdr *domids;
struct mtx event_lock;
struct amdiommu_event_generic *event_log;
u_int event_log_size;
u_int event_log_head;
u_int event_log_tail;
struct task event_task;
struct taskqueue *event_taskqueue;
struct amdiommu_event_generic event_copy_log[16];
u_int event_copy_head;
u_int event_copy_tail;
int irte_enabled; /* int for sysctl type */
bool irte_x2apic;
u_int irte_nentries;
};
#define AMD2IOMMU(unit) (&((unit)->iommu))
#define IOMMU2AMD(unit) \
__containerof((unit), struct amdiommu_unit, iommu)
#define AMDIOMMU_LOCK(unit) mtx_lock(&AMD2IOMMU(unit)->lock)
#define AMDIOMMU_UNLOCK(unit) mtx_unlock(&AMD2IOMMU(unit)->lock)
#define AMDIOMMU_ASSERT_LOCKED(unit) mtx_assert(&AMD2IOMMU(unit)->lock, \
MA_OWNED)
#define AMDIOMMU_EVENT_LOCK(unit) mtx_lock_spin(&(unit)->event_lock)
#define AMDIOMMU_EVENT_UNLOCK(unit) mtx_unlock_spin(&(unit)->event_lock)
#define AMDIOMMU_EVENT_ASSERT_LOCKED(unit) \
mtx_assert(&(unit)->event_lock, MA_OWNED)
#define DOM2IODOM(domain) (&((domain)->iodom))
#define IODOM2DOM(domain) \
__containerof((domain), struct amdiommu_domain, iodom)
#define CTX2IOCTX(ctx) (&((ctx)->context))
#define IOCTX2CTX(ctx) \
__containerof((ctx), struct amdiommu_ctx, context)
#define CTX2DOM(ctx) IODOM2DOM((ctx)->context.domain)
#define CTX2AMD(ctx) (CTX2DOM(ctx)->unit)
#define DOM2AMD(domain) ((domain)->unit)
#define AMDIOMMU_DOMAIN_LOCK(dom) mtx_lock(&(dom)->iodom.lock)
#define AMDIOMMU_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock)
#define AMDIOMMU_DOMAIN_ASSERT_LOCKED(dom) \
mtx_assert(&(dom)->iodom.lock, MA_OWNED)
#define AMDIOMMU_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj)
#define AMDIOMMU_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj)
#define AMDIOMMU_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj)
#define AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(dom) \
VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj)
#define AMDIOMMU_RID 1001
static inline uint32_t
amdiommu_read4(const struct amdiommu_unit *unit, int reg)
{
return (bus_read_4(unit->mmio_res, reg));
}
static inline uint64_t
amdiommu_read8(const struct amdiommu_unit *unit, int reg)
{
#ifdef __i386__
uint32_t high, low;
low = bus_read_4(unit->mmio_res, reg);
high = bus_read_4(unit->mmio_res, reg + 4);
return (low | ((uint64_t)high << 32));
#else
return (bus_read_8(unit->mmio_res, reg));
#endif
}
static inline void
amdiommu_write4(const struct amdiommu_unit *unit, int reg, uint32_t val)
{
bus_write_4(unit->mmio_res, reg, val);
}
static inline void
amdiommu_write8(const struct amdiommu_unit *unit, int reg, uint64_t val)
{
#ifdef __i386__
uint32_t high, low;
low = val;
high = val >> 32;
bus_write_4(unit->mmio_res, reg, low);
bus_write_4(unit->mmio_res, reg + 4, high);
#else
bus_write_8(unit->mmio_res, reg, val);
#endif
}
int amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
int amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
int amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
int amdiommu_init_cmd(struct amdiommu_unit *unit);
void amdiommu_fini_cmd(struct amdiommu_unit *unit);
void amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status);
int amdiommu_init_event(struct amdiommu_unit *unit);
void amdiommu_fini_event(struct amdiommu_unit *unit);
int amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count);
int amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
u_int cookie, uint64_t *addr, uint32_t *data);
int amdiommu_unmap_msi_intr(device_t src, u_int cookie);
int amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
uint32_t *lo);
int amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie);
int amdiommu_init_irt(struct amdiommu_unit *unit);
void amdiommu_fini_irt(struct amdiommu_unit *unit);
int amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx);
void amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx);
void amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep);
void amdiommu_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep);
struct amdiommu_ctx *amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit,
device_t dev, uint16_t rid, int dev_domain, bool id_mapped,
bool rmrr_init, uint8_t dte, uint32_t edte);
struct iommu_ctx *amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev,
uint16_t rid, bool id_mapped, bool rmrr_init);
struct amdiommu_ctx *amdiommu_find_ctx_locked(struct amdiommu_unit *unit,
uint16_t rid);
void amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
struct iommu_ctx *context);
void amdiommu_free_ctx_method(struct iommu_ctx *context);
struct amdiommu_domain *amdiommu_find_domain(struct amdiommu_unit *unit,
uint16_t rid);
void amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx);
void amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx);
void amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit,
uint16_t devid);
void amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
uint16_t devid);
void amdiommu_qi_invalidate_all_pages_locked_nowait(
struct amdiommu_domain *domain);
void amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu);
int amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain);
void amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain);
extern const struct iommu_domain_map_ops amdiommu_domain_map_ops;
#endif