Commit 725c2581 authored by Ingo Molnar's avatar Ingo Molnar

Merge branches 'core/iommu', 'x86/amd-iommu' and 'x86/iommu' into x86-v28-for-linus-phase3-B

Conflicts:
	arch/x86/kernel/pci-gart_64.c
	include/asm-x86/dma-mapping.h
...@@ -284,6 +284,11 @@ and is between 256 and 4096 characters. It is defined in the file ...@@ -284,6 +284,11 @@ and is between 256 and 4096 characters. It is defined in the file
isolate - enable device isolation (each device, as far isolate - enable device isolation (each device, as far
as possible, will get its own protection as possible, will get its own protection
domain) domain)
fullflush - enable flushing of IO/TLB entries when
they are unmapped. Otherwise they are
flushed before they will be reused, which
is a lot of faster
amd_iommu_size= [HW,X86-64] amd_iommu_size= [HW,X86-64]
Define the size of the aperture for the AMD IOMMU Define the size of the aperture for the AMD IOMMU
driver. Possible values are: driver. Possible values are:
......
...@@ -387,6 +387,7 @@ AMD IOMMU (AMD-VI) ...@@ -387,6 +387,7 @@ AMD IOMMU (AMD-VI)
P: Joerg Roedel P: Joerg Roedel
M: joerg.roedel@amd.com M: joerg.roedel@amd.com
L: iommu@lists.linux-foundation.org L: iommu@lists.linux-foundation.org
T: git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git
S: Supported S: Supported
AMS (Apple Motion Sensor) DRIVER AMS (Apple Motion Sensor) DRIVER
......
...@@ -8,7 +8,9 @@ ...@@ -8,7 +8,9 @@
#include <asm/machvec.h> #include <asm/machvec.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#define dma_alloc_coherent platform_dma_alloc_coherent #define dma_alloc_coherent(dev, size, handle, gfp) \
platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA)
/* coherent mem. is cheap */ /* coherent mem. is cheap */
static inline void * static inline void *
dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
......
...@@ -554,6 +554,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT ...@@ -554,6 +554,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
config AMD_IOMMU config AMD_IOMMU
bool "AMD IOMMU support" bool "AMD IOMMU support"
select SWIOTLB select SWIOTLB
select PCI_MSI
depends on X86_64 && PCI && ACPI depends on X86_64 && PCI && ACPI
help help
With this option you can enable support for AMD IOMMU hardware in With this option you can enable support for AMD IOMMU hardware in
......
This diff is collapsed.
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
#include <asm/pci-direct.h> #include <asm/pci-direct.h>
#include <asm/amd_iommu_types.h> #include <asm/amd_iommu_types.h>
#include <asm/amd_iommu.h> #include <asm/amd_iommu.h>
...@@ -30,7 +32,6 @@ ...@@ -30,7 +32,6 @@
/* /*
* definitions for the ACPI scanning code * definitions for the ACPI scanning code
*/ */
#define PCI_BUS(x) (((x) >> 8) & 0xff)
#define IVRS_HEADER_LENGTH 48 #define IVRS_HEADER_LENGTH 48
#define ACPI_IVHD_TYPE 0x10 #define ACPI_IVHD_TYPE 0x10
...@@ -121,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings ...@@ -121,6 +122,7 @@ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */ we find in ACPI */
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
int amd_iommu_isolate; /* if 1, device isolation is enabled */ int amd_iommu_isolate; /* if 1, device isolation is enabled */
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */ system */
...@@ -234,7 +236,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) ...@@ -234,7 +236,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
{ {
u32 ctrl; u32 ctrl;
ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET); ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
ctrl &= ~(1 << bit); ctrl &= ~(1 << bit);
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
} }
...@@ -242,13 +244,23 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) ...@@ -242,13 +244,23 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
/* Function to enable the hardware */ /* Function to enable the hardware */
void __init iommu_enable(struct amd_iommu *iommu) void __init iommu_enable(struct amd_iommu *iommu)
{ {
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
print_devid(iommu->devid, 0); "at %02x:%02x.%x cap 0x%hx\n",
printk(" cap 0x%hx\n", iommu->cap_ptr); iommu->dev->bus->number,
PCI_SLOT(iommu->dev->devfn),
PCI_FUNC(iommu->dev->devfn),
iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN); iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
} }
/* Function to enable IOMMU event logging and event interrupts */
void __init iommu_enable_event_logging(struct amd_iommu *iommu)
{
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
}
/* /*
* mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
* the system has one. * the system has one.
...@@ -285,6 +297,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) ...@@ -285,6 +297,14 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
* *
****************************************************************************/ ****************************************************************************/
/*
* This function calculates the length of a given IVHD entry
*/
static inline int ivhd_entry_length(u8 *ivhd)
{
return 0x04 << (*ivhd >> 6);
}
/* /*
* This function reads the last device id the IOMMU has to handle from the PCI * This function reads the last device id the IOMMU has to handle from the PCI
* capability header for this IOMMU * capability header for this IOMMU
...@@ -329,7 +349,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) ...@@ -329,7 +349,7 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
default: default:
break; break;
} }
p += 0x04 << (*p >> 6); p += ivhd_entry_length(p);
} }
WARN_ON(p != end); WARN_ON(p != end);
...@@ -414,7 +434,32 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) ...@@ -414,7 +434,32 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
static void __init free_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu)
{ {
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); free_pages((unsigned long)iommu->cmd_buf,
get_order(iommu->cmd_buf_size));
}
/* allocates the memory where the IOMMU will log its events to */
static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
{
u64 entry;
iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(EVT_BUFFER_SIZE));
if (iommu->evt_buf == NULL)
return NULL;
entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
&entry, sizeof(entry));
iommu->evt_buf_size = EVT_BUFFER_SIZE;
return iommu->evt_buf;
}
static void __init free_event_buffer(struct amd_iommu *iommu)
{
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
} }
/* sets a specific bit in the device table entry. */ /* sets a specific bit in the device table entry. */
...@@ -487,19 +532,21 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) ...@@ -487,19 +532,21 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
*/ */
static void __init init_iommu_from_pci(struct amd_iommu *iommu) static void __init init_iommu_from_pci(struct amd_iommu *iommu)
{ {
int bus = PCI_BUS(iommu->devid);
int dev = PCI_SLOT(iommu->devid);
int fn = PCI_FUNC(iommu->devid);
int cap_ptr = iommu->cap_ptr; int cap_ptr = iommu->cap_ptr;
u32 range; u32 range, misc;
iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
&iommu->cap);
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
&range);
pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
&misc);
range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
iommu->first_device = calc_devid(MMIO_GET_BUS(range), iommu->first_device = calc_devid(MMIO_GET_BUS(range),
MMIO_GET_FD(range)); MMIO_GET_FD(range));
iommu->last_device = calc_devid(MMIO_GET_BUS(range), iommu->last_device = calc_devid(MMIO_GET_BUS(range),
MMIO_GET_LD(range)); MMIO_GET_LD(range));
iommu->evt_msi_num = MMIO_MSI_NUM(misc);
} }
/* /*
...@@ -604,7 +651,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, ...@@ -604,7 +651,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
break; break;
} }
p += 0x04 << (e->type >> 6); p += ivhd_entry_length(p);
} }
} }
...@@ -622,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu) ...@@ -622,6 +669,7 @@ static int __init init_iommu_devices(struct amd_iommu *iommu)
static void __init free_iommu_one(struct amd_iommu *iommu) static void __init free_iommu_one(struct amd_iommu *iommu)
{ {
free_command_buffer(iommu); free_command_buffer(iommu);
free_event_buffer(iommu);
iommu_unmap_mmio_space(iommu); iommu_unmap_mmio_space(iommu);
} }
...@@ -649,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) ...@@ -649,8 +697,12 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
/* /*
* Copy data from ACPI table entry to the iommu struct * Copy data from ACPI table entry to the iommu struct
*/ */
iommu->devid = h->devid; iommu->dev = pci_get_bus_and_slot(PCI_BUS(h->devid), h->devid & 0xff);
if (!iommu->dev)
return 1;
iommu->cap_ptr = h->cap_ptr; iommu->cap_ptr = h->cap_ptr;
iommu->pci_seg = h->pci_seg;
iommu->mmio_phys = h->mmio_phys; iommu->mmio_phys = h->mmio_phys;
iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
if (!iommu->mmio_base) if (!iommu->mmio_base)
...@@ -661,10 +713,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) ...@@ -661,10 +713,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (!iommu->cmd_buf) if (!iommu->cmd_buf)
return -ENOMEM; return -ENOMEM;
iommu->evt_buf = alloc_event_buffer(iommu);
if (!iommu->evt_buf)
return -ENOMEM;
iommu->int_enabled = false;
init_iommu_from_pci(iommu); init_iommu_from_pci(iommu);
init_iommu_from_acpi(iommu, h); init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu); init_iommu_devices(iommu);
pci_enable_device(iommu->dev);
return 0; return 0;
} }
...@@ -704,6 +764,95 @@ static int __init init_iommu_all(struct acpi_table_header *table) ...@@ -704,6 +764,95 @@ static int __init init_iommu_all(struct acpi_table_header *table)
return 0; return 0;
} }
/****************************************************************************
*
* The following functions initialize the MSI interrupts for all IOMMUs
* in the system. Its a bit challenging because there could be multiple
* IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
* pci_dev.
*
****************************************************************************/
static int __init iommu_setup_msix(struct amd_iommu *iommu)
{
struct amd_iommu *curr;
struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
int nvec = 0, i;
list_for_each_entry(curr, &amd_iommu_list, list) {
if (curr->dev == iommu->dev) {
entries[nvec].entry = curr->evt_msi_num;
entries[nvec].vector = 0;
curr->int_enabled = true;
nvec++;
}
}
if (pci_enable_msix(iommu->dev, entries, nvec)) {
pci_disable_msix(iommu->dev);
return 1;
}
for (i = 0; i < nvec; ++i) {
int r = request_irq(entries->vector, amd_iommu_int_handler,
IRQF_SAMPLE_RANDOM,
"AMD IOMMU",
NULL);
if (r)
goto out_free;
}
return 0;
out_free:
for (i -= 1; i >= 0; --i)
free_irq(entries->vector, NULL);
pci_disable_msix(iommu->dev);
return 1;
}
static int __init iommu_setup_msi(struct amd_iommu *iommu)
{
int r;
struct amd_iommu *curr;
list_for_each_entry(curr, &amd_iommu_list, list) {
if (curr->dev == iommu->dev)
curr->int_enabled = true;
}
if (pci_enable_msi(iommu->dev))
return 1;
r = request_irq(iommu->dev->irq, amd_iommu_int_handler,
IRQF_SAMPLE_RANDOM,
"AMD IOMMU",
NULL);
if (r) {
pci_disable_msi(iommu->dev);
return 1;
}
return 0;
}
static int __init iommu_init_msi(struct amd_iommu *iommu)
{
if (iommu->int_enabled)
return 0;
if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
return iommu_setup_msix(iommu);
else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
return iommu_setup_msi(iommu);
return 1;
}
/**************************************************************************** /****************************************************************************
* *
* The next functions belong to the third pass of parsing the ACPI * The next functions belong to the third pass of parsing the ACPI
...@@ -811,7 +960,6 @@ static void init_device_table(void) ...@@ -811,7 +960,6 @@ static void init_device_table(void)
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
set_dev_entry_bit(devid, DEV_ENTRY_VALID); set_dev_entry_bit(devid, DEV_ENTRY_VALID);
set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
} }
} }
...@@ -825,6 +973,8 @@ static void __init enable_iommus(void) ...@@ -825,6 +973,8 @@ static void __init enable_iommus(void)
list_for_each_entry(iommu, &amd_iommu_list, list) { list_for_each_entry(iommu, &amd_iommu_list, list) {
iommu_set_exclusion_range(iommu); iommu_set_exclusion_range(iommu);
iommu_init_msi(iommu);
iommu_enable_event_logging(iommu);
iommu_enable(iommu); iommu_enable(iommu);
} }
} }
...@@ -995,11 +1145,17 @@ int __init amd_iommu_init(void) ...@@ -995,11 +1145,17 @@ int __init amd_iommu_init(void)
else else
printk("disabled\n"); printk("disabled\n");
if (amd_iommu_unmap_flush)
printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
else
printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
out: out:
return ret; return ret;
free: free:
free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
get_order(MAX_DOMAIN_ID/8));
free_pages((unsigned long)amd_iommu_pd_table, free_pages((unsigned long)amd_iommu_pd_table,
get_order(rlookup_table_size)); get_order(rlookup_table_size));
...@@ -1057,8 +1213,10 @@ void __init amd_iommu_detect(void) ...@@ -1057,8 +1213,10 @@ void __init amd_iommu_detect(void)
static int __init parse_amd_iommu_options(char *str) static int __init parse_amd_iommu_options(char *str)
{ {
for (; *str; ++str) { for (; *str; ++str) {
if (strcmp(str, "isolate") == 0) if (strncmp(str, "isolate", 7) == 0)
amd_iommu_isolate = 1; amd_iommu_isolate = 1;
if (strncmp(str, "fullflush", 11) == 0)
amd_iommu_unmap_flush = true;
} }
return 1; return 1;
......
...@@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func) ...@@ -95,6 +95,20 @@ static void __init nvidia_bugs(int num, int slot, int func)
} }
#ifdef CONFIG_DMAR
static void __init intel_g33_dmar(int num, int slot, int func)
{
struct acpi_table_header *dmar_tbl;
acpi_status status;
status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
if (ACPI_SUCCESS(status)) {
printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
dmar_disabled = 1;
}
}
#endif
#define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLY_ONCE 0x1
#define QFLAG_APPLIED 0x2 #define QFLAG_APPLIED 0x2
#define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
...@@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = { ...@@ -114,6 +128,10 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
#ifdef CONFIG_DMAR
{ PCI_VENDOR_ID_INTEL, 0x29c0,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
#endif
{} {}
}; };
......
...@@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges); ...@@ -16,8 +16,9 @@ EXPORT_SYMBOL(num_k8_northbridges);
static u32 *flush_words; static u32 *flush_words;
struct pci_device_id k8_nb_ids[] = { struct pci_device_id k8_nb_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
{} {}
}; };
EXPORT_SYMBOL(k8_nb_ids); EXPORT_SYMBOL(k8_nb_ids);
......
...@@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, ...@@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
badbit, tbl, start_addr, npages); badbit, tbl, start_addr, npages);
} }
set_bit_string(tbl->it_map, index, npages); iommu_area_reserve(tbl->it_map, index, npages);
spin_unlock_irqrestore(&tbl->it_lock, flags); spin_unlock_irqrestore(&tbl->it_lock, flags);
} }
...@@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, ...@@ -491,6 +491,8 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
npages = size >> PAGE_SHIFT; npages = size >> PAGE_SHIFT;
order = get_order(size); order = get_order(size);
flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
/* alloc enough pages (and possibly more) */ /* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order); ret = (void *)__get_free_pages(flag, order);
if (!ret) if (!ret)
...@@ -510,8 +512,22 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, ...@@ -510,8 +512,22 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
return ret; return ret;
} }
static void calgary_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
unsigned int npages;
struct iommu_table *tbl = find_iommu_table(dev);
size = PAGE_ALIGN(size);
npages = size >> PAGE_SHIFT;
iommu_free(tbl, dma_handle, npages);
free_pages((unsigned long)vaddr, get_order(size));
}
static struct dma_mapping_ops calgary_dma_ops = { static struct dma_mapping_ops calgary_dma_ops = {
.alloc_coherent = calgary_alloc_coherent, .alloc_coherent = calgary_alloc_coherent,
.free_coherent = calgary_free_coherent,
.map_single = calgary_map_single, .map_single = calgary_map_single,
.unmap_single = calgary_unmap_single, .unmap_single = calgary_unmap_single,
.map_sg = calgary_map_sg, .map_sg = calgary_map_sg,
......
...@@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address); ...@@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address);
/* Dummy device used for NULL arguments (normally ISA). Better would /* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible be probably a smaller DMA mask, but this is bug-to-bug compatible
to older i386. */ to older i386. */
struct device fallback_dev = { struct device x86_dma_fallback_dev = {
.bus_id = "fallback device", .bus_id = "fallback device",
.coherent_dma_mask = DMA_32BIT_MASK, .coherent_dma_mask = DMA_32BIT_MASK,
.dma_mask = &fallback_dev.coherent_dma_mask, .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
}; };
EXPORT_SYMBOL(x86_dma_fallback_dev);
int dma_set_mask(struct device *dev, u64 mask) int dma_set_mask(struct device *dev, u64 mask)
{ {
...@@ -133,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len) ...@@ -133,6 +134,37 @@ unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
EXPORT_SYMBOL(iommu_num_pages); EXPORT_SYMBOL(iommu_num_pages);
#endif #endif
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag)
{
unsigned long dma_mask;
struct page *page;
dma_addr_t addr;
dma_mask = dma_alloc_coherent_mask(dev, flag);
flag |= __GFP_ZERO;
again:
page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
if (!page)
return NULL;
addr = page_to_phys(page);
if (!is_buffer_dma_capable(dma_mask, addr, size)) {
__free_pages(page, get_order(size));
if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) {
flag = (flag & ~GFP_DMA32) | GFP_DMA;
goto again;
}
return NULL;
}
*dma_addr = addr;
return page_address(page);
}
/* /*
* See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
* documentation. * documentation.
...@@ -241,147 +273,6 @@ int dma_supported(struct device *dev, u64 mask) ...@@ -241,147 +273,6 @@ int dma_supported(struct device *dev, u64 mask)
} }
EXPORT_SYMBOL(dma_supported); EXPORT_SYMBOL(dma_supported);
/* Allocate DMA memory on node near device */
static noinline struct page *
dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
{
int node;
node = dev_to_node(dev);
return alloc_pages_node(node, gfp, order);
}
/*
* Allocate memory for a coherent mapping.
*/
void *
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
void *memory = NULL;
struct page *page;
unsigned long dma_mask = 0;
dma_addr_t bus;
int noretry = 0;
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
return memory;
if (!dev) {
dev = &fallback_dev;
gfp |= GFP_DMA;
}
dma_mask = dev->coherent_dma_mask;
if (dma_mask == 0)
dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
/* Device not DMA able */
if (dev->dma_mask == NULL)
return NULL;
/* Don't invoke OOM killer or retry in lower 16MB DMA zone */
if (gfp & __GFP_DMA)
noretry = 1;
#ifdef CONFIG_X86_64
/* Why <=? Even when the mask is smaller than 4GB it is often
larger than 16MB and in this case we have a chance of
finding fitting memory in the next higher zone first. If
not retry with true GFP_DMA. -AK */
if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
gfp |= GFP_DMA32;
if (dma_mask < DMA_32BIT_MASK)
noretry = 1;
}
#endif
again:
page = dma_alloc_pages(dev,
noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
if (page == NULL)
return NULL;
{
int high, mmu;
bus = page_to_phys(page);
memory = page_address(page);
high = (bus + size) >= dma_mask;
mmu = high;
if (force_iommu && !(gfp & GFP_DMA))
mmu = 1;
else if (high) {
free_pages((unsigned long)memory,
get_order(size));
/* Don't use the 16MB ZONE_DMA unless absolutely
needed. It's better to use remapping first. */
if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
goto again;
}
/* Let low level make its own zone decisions */
gfp &= ~(GFP_DMA32|GFP_DMA);
if (ops->alloc_coherent)
return ops->alloc_coherent(dev, size,
dma_handle, gfp);
return NULL;
}
memset(memory, 0, size);
if (!mmu) {
*dma_handle = bus;
return memory;
}
}
if (ops->alloc_coherent) {
free_pages((unsigned long)memory, get_order(size));
gfp &= ~(GFP_DMA|GFP_DMA32);
return ops->alloc_coherent(dev, size, dma_handle, gfp);
}
if (ops->map_simple) {
*dma_handle = ops->map_simple(dev, virt_to_phys(memory),
size,
PCI_DMA_BIDIRECTIONAL);
if (*dma_handle != bad_dma_address)
return memory;
}
if (panic_on_overflow)
panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
(unsigned long)size);
free_pages((unsigned long)memory, get_order(size));
return NULL;
}
EXPORT_SYMBOL(dma_alloc_coherent);
/*
* Unmap coherent memory.
* The caller must ensure that the device has finished accessing the mapping.
*/
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t bus)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
int order = get_order(size);
WARN_ON(irqs_disabled()); /* for portability */
if (dma_release_from_coherent(dev, order, vaddr))
return;
if (ops->unmap_single)
ops->unmap_single(dev, bus, size, 0);
free_pages((unsigned long)vaddr, order);
}
EXPORT_SYMBOL(dma_free_coherent);
static int __init pci_iommu_init(void) static int __init pci_iommu_init(void)
{ {
calgary_iommu_init(); calgary_iommu_init();
......
...@@ -27,8 +27,8 @@ ...@@ -27,8 +27,8 @@
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <linux/iommu-helper.h> #include <linux/iommu-helper.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/io.h>
#include <asm/atomic.h> #include <asm/atomic.h>
#include <asm/io.h>
#include <asm/mtrr.h> #include <asm/mtrr.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/proto.h> #include <asm/proto.h>
...@@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved; ...@@ -80,7 +80,7 @@ AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table; AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static unsigned long next_bit; /* protected by iommu_bitmap_lock */
static int need_flush; /* global flush state. set for each gart wrap */ static bool need_flush; /* global flush state. set for each gart wrap */
static unsigned long alloc_iommu(struct device *dev, int size, static unsigned long alloc_iommu(struct device *dev, int size,
unsigned long align_mask) unsigned long align_mask)
...@@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, ...@@ -98,7 +98,7 @@ static unsigned long alloc_iommu(struct device *dev, int size,
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit,
size, base_index, boundary_size, align_mask); size, base_index, boundary_size, align_mask);
if (offset == -1) { if (offset == -1) {
need_flush = 1; need_flush = true;
offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0,
size, base_index, boundary_size, size, base_index, boundary_size,
align_mask); align_mask);
...@@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size, ...@@ -107,11 +107,11 @@ static unsigned long alloc_iommu(struct device *dev, int size,
next_bit = offset+size; next_bit = offset+size;
if (next_bit >= iommu_pages) { if (next_bit >= iommu_pages) {
next_bit = 0; next_bit = 0;
need_flush = 1; need_flush = true;
} }
} }
if (iommu_fullflush) if (iommu_fullflush)
need_flush = 1; need_flush = true;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags); spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
return offset; return offset;
...@@ -136,7 +136,7 @@ static void flush_gart(void) ...@@ -136,7 +136,7 @@ static void flush_gart(void)
spin_lock_irqsave(&iommu_bitmap_lock, flags); spin_lock_irqsave(&iommu_bitmap_lock, flags);
if (need_flush) { if (need_flush) {
k8_flush_garts(); k8_flush_garts();
need_flush = 0; need_flush = false;
} }
spin_unlock_irqrestore(&iommu_bitmap_lock, flags); spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
} }
...@@ -175,7 +175,8 @@ static void dump_leak(void) ...@@ -175,7 +175,8 @@ static void dump_leak(void)
iommu_leak_pages); iommu_leak_pages);
for (i = 0; i < iommu_leak_pages; i += 2) { for (i = 0; i < iommu_leak_pages; i += 2) {
printk(KERN_DEBUG "%lu: ", iommu_pages-i); printk(KERN_DEBUG "%lu: ", iommu_pages-i);
printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], 0); printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
0);
printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
} }
printk(KERN_DEBUG "\n"); printk(KERN_DEBUG "\n");
...@@ -214,24 +215,14 @@ static void iommu_full(struct device *dev, size_t size, int dir) ...@@ -214,24 +215,14 @@ static void iommu_full(struct device *dev, size_t size, int dir)
static inline int static inline int
need_iommu(struct device *dev, unsigned long addr, size_t size) need_iommu(struct device *dev, unsigned long addr, size_t size)
{ {
u64 mask = *dev->dma_mask; return force_iommu ||
int high = addr + size > mask; !is_buffer_dma_capable(*dev->dma_mask, addr, size);
int mmu = high;
if (force_iommu)
mmu = 1;
return mmu;
} }
static inline int static inline int
nonforced_iommu(struct device *dev, unsigned long addr, size_t size) nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
{ {
u64 mask = *dev->dma_mask; return !is_buffer_dma_capable(*dev->dma_mask, addr, size);
int high = addr + size > mask;
int mmu = high;
return mmu;
} }
/* Map a single continuous physical area into the IOMMU. /* Map a single continuous physical area into the IOMMU.
...@@ -261,20 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, ...@@ -261,20 +252,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
} }
static dma_addr_t
gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
{
dma_addr_t map;
unsigned long align_mask;
align_mask = (1UL << get_order(size)) - 1;
map = dma_map_area(dev, paddr, size, dir, align_mask);
flush_gart();
return map;
}
/* Map a single area into the IOMMU */ /* Map a single area into the IOMMU */
static dma_addr_t static dma_addr_t
gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
...@@ -282,7 +259,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) ...@@ -282,7 +259,7 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
unsigned long bus; unsigned long bus;
if (!dev) if (!dev)
dev = &fallback_dev; dev = &x86_dma_fallback_dev;
if (!need_iommu(dev, paddr, size)) if (!need_iommu(dev, paddr, size))
return paddr; return paddr;
...@@ -434,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) ...@@ -434,7 +411,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
return 0; return 0;
if (!dev) if (!dev)
dev = &fallback_dev; dev = &x86_dma_fallback_dev;
out = 0; out = 0;
start = 0; start = 0;
...@@ -506,6 +483,46 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) ...@@ -506,6 +483,46 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
return 0; return 0;
} }
/* allocate and map a coherent mapping */
static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
gfp_t flag)
{
dma_addr_t paddr;
unsigned long align_mask;
struct page *page;
if (force_iommu && !(flag & GFP_DMA)) {
flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
page = alloc_pages(flag | __GFP_ZERO, get_order(size));
if (!page)
return NULL;
align_mask = (1UL << get_order(size)) - 1;
paddr = dma_map_area(dev, page_to_phys(page), size,
DMA_BIDIRECTIONAL, align_mask);
flush_gart();
if (paddr != bad_dma_address) {
*dma_addr = paddr;
return page_address(page);
}
__free_pages(page, get_order(size));
} else
return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
return NULL;
}
/* free a coherent mapping */
static void
gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr)
{
gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL);
free_pages((unsigned long)vaddr, get_order(size));
}
static int no_agp; static int no_agp;
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
...@@ -656,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) ...@@ -656,13 +673,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
info->aper_size = aper_size >> 20; info->aper_size = aper_size >> 20;
gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32); gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size)); gatt = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(gatt_size));
if (!gatt) if (!gatt)
panic("Cannot allocate GATT table"); panic("Cannot allocate GATT table");
if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT)) if (set_memory_uc((unsigned long)gatt, gatt_size >> PAGE_SHIFT))
panic("Could not set GART PTEs to uncacheable pages"); panic("Could not set GART PTEs to uncacheable pages");
memset(gatt, 0, gatt_size);
agp_gatt_table = gatt; agp_gatt_table = gatt;
enable_gart_translations(); enable_gart_translations();
...@@ -671,7 +688,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) ...@@ -671,7 +688,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
if (!error) if (!error)
error = sysdev_register(&device_gart); error = sysdev_register(&device_gart);
if (error) if (error)
panic("Could not register gart_sysdev -- would corrupt data on next suspend"); panic("Could not register gart_sysdev -- "
"would corrupt data on next suspend");
flush_gart(); flush_gart();
...@@ -687,20 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) ...@@ -687,20 +705,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
return -1; return -1;
} }
extern int agp_amd64_init(void);
static struct dma_mapping_ops gart_dma_ops = { static struct dma_mapping_ops gart_dma_ops = {
.map_single = gart_map_single, .map_single = gart_map_single,
.map_simple = gart_map_simple,
.unmap_single = gart_unmap_single, .unmap_single = gart_unmap_single,
.sync_single_for_cpu = NULL,
.sync_single_for_device = NULL,
.sync_single_range_for_cpu = NULL,
.sync_single_range_for_device = NULL,
.sync_sg_for_cpu = NULL,
.sync_sg_for_device = NULL,
.map_sg = gart_map_sg, .map_sg = gart_map_sg,
.unmap_sg = gart_unmap_sg, .unmap_sg = gart_unmap_sg,
.alloc_coherent = gart_alloc_coherent,
.free_coherent = gart_free_coherent,
}; };
void gart_iommu_shutdown(void) void gart_iommu_shutdown(void)
...@@ -760,8 +771,8 @@ void __init gart_iommu_init(void) ...@@ -760,8 +771,8 @@ void __init gart_iommu_init(void)
(no_agp && init_k8_gatt(&info) < 0)) { (no_agp && init_k8_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) { if (max_pfn > MAX_DMA32_PFN) {
printk(KERN_WARNING "More than 4GB of memory " printk(KERN_WARNING "More than 4GB of memory "
"but GART IOMMU not available.\n" "but GART IOMMU not available.\n");
KERN_WARNING "falling back to iommu=soft.\n"); printk(KERN_WARNING "falling back to iommu=soft.\n");
} }
return; return;
} }
...@@ -779,19 +790,16 @@ void __init gart_iommu_init(void) ...@@ -779,19 +790,16 @@ void __init gart_iommu_init(void)
iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_size = check_iommu_size(info.aper_base, aper_size);
iommu_pages = iommu_size >> PAGE_SHIFT; iommu_pages = iommu_size >> PAGE_SHIFT;
iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL, iommu_gart_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
get_order(iommu_pages/8)); get_order(iommu_pages/8));
if (!iommu_gart_bitmap) if (!iommu_gart_bitmap)
panic("Cannot allocate iommu bitmap\n"); panic("Cannot allocate iommu bitmap\n");
memset(iommu_gart_bitmap, 0, iommu_pages/8);
#ifdef CONFIG_IOMMU_LEAK #ifdef CONFIG_IOMMU_LEAK
if (leak_trace) { if (leak_trace) {
iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL, iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
get_order(iommu_pages*sizeof(void *))); get_order(iommu_pages*sizeof(void *)));
if (iommu_leak_tab) if (!iommu_leak_tab)
memset(iommu_leak_tab, 0, iommu_pages * 8);
else
printk(KERN_DEBUG printk(KERN_DEBUG
"PCI-DMA: Cannot allocate leak trace area\n"); "PCI-DMA: Cannot allocate leak trace area\n");
} }
...@@ -801,7 +809,7 @@ void __init gart_iommu_init(void) ...@@ -801,7 +809,7 @@ void __init gart_iommu_init(void)
* Out of IOMMU space handling. * Out of IOMMU space handling.
* Reserve some invalid pages at the beginning of the GART. * Reserve some invalid pages at the beginning of the GART.
*/ */
set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES); iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
agp_memory_reserved = iommu_size; agp_memory_reserved = iommu_size;
printk(KERN_INFO printk(KERN_INFO
...@@ -859,7 +867,8 @@ void __init gart_parse_options(char *p) ...@@ -859,7 +867,8 @@ void __init gart_parse_options(char *p)
if (!strncmp(p, "leak", 4)) { if (!strncmp(p, "leak", 4)) {
leak_trace = 1; leak_trace = 1;
p += 4; p += 4;
if (*p == '=') ++p; if (*p == '=')
++p;
if (isdigit(*p) && get_option(&p, &arg)) if (isdigit(*p) && get_option(&p, &arg))
iommu_leak_pages = arg; iommu_leak_pages = arg;
} }
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
static int static int
check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
{ {
if (hwdev && bus + size > *hwdev->dma_mask) { if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) {
if (*hwdev->dma_mask >= DMA_32BIT_MASK) if (*hwdev->dma_mask >= DMA_32BIT_MASK)
printk(KERN_ERR printk(KERN_ERR
"nommu_%s: overflow %Lx+%zu of device mask %Lx\n", "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
...@@ -72,7 +72,15 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, ...@@ -72,7 +72,15 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return nents; return nents;
} }
static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr)
{
free_pages((unsigned long)vaddr, get_order(size));
}
struct dma_mapping_ops nommu_dma_ops = { struct dma_mapping_ops nommu_dma_ops = {
.alloc_coherent = dma_generic_alloc_coherent,
.free_coherent = nommu_free_coherent,
.map_single = nommu_map_single, .map_single = nommu_map_single,
.map_sg = nommu_map_sg, .map_sg = nommu_map_sg,
.is_phys = 1, .is_phys = 1,
......
...@@ -80,7 +80,7 @@ static long list_size; ...@@ -80,7 +80,7 @@ static long list_size;
static void domain_remove_dev_info(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain);
static int dmar_disabled; int dmar_disabled;
static int __initdata dmar_map_gfx = 1; static int __initdata dmar_map_gfx = 1;
static int dmar_forcedac; static int dmar_forcedac;
static int intel_iommu_strict; static int intel_iommu_strict;
......
...@@ -20,10 +20,13 @@ ...@@ -20,10 +20,13 @@
#ifndef ASM_X86__AMD_IOMMU_H #ifndef ASM_X86__AMD_IOMMU_H
#define ASM_X86__AMD_IOMMU_H #define ASM_X86__AMD_IOMMU_H
#include <linux/irqreturn.h>
#ifdef CONFIG_AMD_IOMMU #ifdef CONFIG_AMD_IOMMU
extern int amd_iommu_init(void); extern int amd_iommu_init(void);
extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_dma_ops(void);
extern void amd_iommu_detect(void); extern void amd_iommu_detect(void);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
#else #else
static inline int amd_iommu_init(void) { return -ENODEV; } static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { } static inline void amd_iommu_detect(void) { }
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
/* Capability offsets used by the driver */ /* Capability offsets used by the driver */
#define MMIO_CAP_HDR_OFFSET 0x00 #define MMIO_CAP_HDR_OFFSET 0x00
#define MMIO_RANGE_OFFSET 0x0c #define MMIO_RANGE_OFFSET 0x0c
#define MMIO_MISC_OFFSET 0x10
/* Masks, shifts and macros to parse the device range capability */ /* Masks, shifts and macros to parse the device range capability */
#define MMIO_RANGE_LD_MASK 0xff000000 #define MMIO_RANGE_LD_MASK 0xff000000
...@@ -48,6 +49,7 @@ ...@@ -48,6 +49,7 @@
#define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT) #define MMIO_GET_LD(x) (((x) & MMIO_RANGE_LD_MASK) >> MMIO_RANGE_LD_SHIFT)
#define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT) #define MMIO_GET_FD(x) (((x) & MMIO_RANGE_FD_MASK) >> MMIO_RANGE_FD_SHIFT)
#define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT) #define MMIO_GET_BUS(x) (((x) & MMIO_RANGE_BUS_MASK) >> MMIO_RANGE_BUS_SHIFT)
#define MMIO_MSI_NUM(x) ((x) & 0x1f)
/* Flag masks for the AMD IOMMU exclusion range */ /* Flag masks for the AMD IOMMU exclusion range */
#define MMIO_EXCL_ENABLE_MASK 0x01ULL #define MMIO_EXCL_ENABLE_MASK 0x01ULL
...@@ -69,6 +71,25 @@ ...@@ -69,6 +71,25 @@
/* MMIO status bits */ /* MMIO status bits */
#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04
/* event logging constants */
#define EVENT_ENTRY_SIZE 0x10
#define EVENT_TYPE_SHIFT 28
#define EVENT_TYPE_MASK 0xf
#define EVENT_TYPE_ILL_DEV 0x1
#define EVENT_TYPE_IO_FAULT 0x2
#define EVENT_TYPE_DEV_TAB_ERR 0x3
#define EVENT_TYPE_PAGE_TAB_ERR 0x4
#define EVENT_TYPE_ILL_CMD 0x5
#define EVENT_TYPE_CMD_HARD_ERR 0x6
#define EVENT_TYPE_IOTLB_INV_TO 0x7
#define EVENT_TYPE_INV_DEV_REQ 0x8
#define EVENT_DEVID_MASK 0xffff
#define EVENT_DEVID_SHIFT 0
#define EVENT_DOMID_MASK 0xffff
#define EVENT_DOMID_SHIFT 0
#define EVENT_FLAGS_MASK 0xfff
#define EVENT_FLAGS_SHIFT 0x10
/* feature control bits */ /* feature control bits */
#define CONTROL_IOMMU_EN 0x00ULL #define CONTROL_IOMMU_EN 0x00ULL
#define CONTROL_HT_TUN_EN 0x01ULL #define CONTROL_HT_TUN_EN 0x01ULL
...@@ -109,6 +130,8 @@ ...@@ -109,6 +130,8 @@
#define DEV_ENTRY_NMI_PASS 0xba #define DEV_ENTRY_NMI_PASS 0xba
#define DEV_ENTRY_LINT0_PASS 0xbe #define DEV_ENTRY_LINT0_PASS 0xbe
#define DEV_ENTRY_LINT1_PASS 0xbf #define DEV_ENTRY_LINT1_PASS 0xbf
#define DEV_ENTRY_MODE_MASK 0x07
#define DEV_ENTRY_MODE_SHIFT 0x09
/* constants to configure the command buffer */ /* constants to configure the command buffer */
#define CMD_BUFFER_SIZE 8192 #define CMD_BUFFER_SIZE 8192
...@@ -116,6 +139,10 @@ ...@@ -116,6 +139,10 @@
#define MMIO_CMD_SIZE_SHIFT 56 #define MMIO_CMD_SIZE_SHIFT 56
#define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
/* constants for event buffer handling */
#define EVT_BUFFER_SIZE 8192 /* 512 entries */
#define EVT_LEN_MASK (0x9ULL << 56)
#define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02 #define PAGE_MODE_2_LEVEL 0x02
#define PAGE_MODE_3_LEVEL 0x03 #define PAGE_MODE_3_LEVEL 0x03
...@@ -134,6 +161,7 @@ ...@@ -134,6 +161,7 @@
#define IOMMU_MAP_SIZE_L3 (1ULL << 39) #define IOMMU_MAP_SIZE_L3 (1ULL << 39)
#define IOMMU_PTE_P (1ULL << 0) #define IOMMU_PTE_P (1ULL << 0)
#define IOMMU_PTE_TV (1ULL << 1)
#define IOMMU_PTE_U (1ULL << 59) #define IOMMU_PTE_U (1ULL << 59)
#define IOMMU_PTE_FC (1ULL << 60) #define IOMMU_PTE_FC (1ULL << 60)
#define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IR (1ULL << 61)
...@@ -159,6 +187,9 @@ ...@@ -159,6 +187,9 @@
#define MAX_DOMAIN_ID 65536 #define MAX_DOMAIN_ID 65536
/* FIXME: move this macro to <linux/pci.h> */
#define PCI_BUS(x) (((x) >> 8) & 0xff)
/* /*
* This structure contains generic data for IOMMU protection domains * This structure contains generic data for IOMMU protection domains
* independent of their use. * independent of their use.
...@@ -196,6 +227,15 @@ struct dma_ops_domain { ...@@ -196,6 +227,15 @@ struct dma_ops_domain {
* just calculate its address in constant time. * just calculate its address in constant time.
*/ */
u64 **pte_pages; u64 **pte_pages;
/* This will be set to true when TLB needs to be flushed */
bool need_flush;
/*
* if this is a preallocated domain, keep the device for which it was
* preallocated in this variable
*/
u16 target_dev;
}; };
/* /*
...@@ -208,8 +248,9 @@ struct amd_iommu { ...@@ -208,8 +248,9 @@ struct amd_iommu {
/* locks the accesses to the hardware */ /* locks the accesses to the hardware */
spinlock_t lock; spinlock_t lock;
/* device id of this IOMMU */ /* Pointer to PCI device of this IOMMU */
u16 devid; struct pci_dev *dev;
/* /*
* Capability pointer. There could be more than one IOMMU per PCI * Capability pointer. There could be more than one IOMMU per PCI
* device function if there are more than one AMD IOMMU capability * device function if there are more than one AMD IOMMU capability
...@@ -225,6 +266,9 @@ struct amd_iommu { ...@@ -225,6 +266,9 @@ struct amd_iommu {
/* capabilities of that IOMMU read from ACPI */ /* capabilities of that IOMMU read from ACPI */
u32 cap; u32 cap;
/* pci domain of this IOMMU */
u16 pci_seg;
/* first device this IOMMU handles. read from PCI */ /* first device this IOMMU handles. read from PCI */
u16 first_device; u16 first_device;
/* last device this IOMMU handles. read from PCI */ /* last device this IOMMU handles. read from PCI */
...@@ -240,9 +284,19 @@ struct amd_iommu { ...@@ -240,9 +284,19 @@ struct amd_iommu {
/* size of command buffer */ /* size of command buffer */
u32 cmd_buf_size; u32 cmd_buf_size;
/* event buffer virtual address */
u8 *evt_buf;
/* size of event buffer */
u32 evt_buf_size;
/* MSI number for event interrupt */
u16 evt_msi_num;
/* if one, we need to send a completion wait command */ /* if one, we need to send a completion wait command */
int need_sync; int need_sync;
/* true if interrupts for this IOMMU are already enabled */
bool int_enabled;
/* default dma_ops domain for that IOMMU */ /* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom; struct dma_ops_domain *default_dom;
}; };
...@@ -322,6 +376,12 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap; ...@@ -322,6 +376,12 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap;
/* will be 1 if device isolation is enabled */ /* will be 1 if device isolation is enabled */
extern int amd_iommu_isolate; extern int amd_iommu_isolate;
/*
* If true, the addresses will be flushed on unmap time, not when
* they are reused
*/
extern bool amd_iommu_unmap_flush;
/* takes a PCI device id and prints it out in a readable form */ /* takes a PCI device id and prints it out in a readable form */
static inline void print_devid(u16 devid, int nl) static inline void print_devid(u16 devid, int nl)
{ {
......
...@@ -424,16 +424,6 @@ static inline int fls(int x) ...@@ -424,16 +424,6 @@ static inline int fls(int x)
#undef ADDR #undef ADDR
static inline void set_bit_string(unsigned long *bitmap,
unsigned long i, int len)
{
unsigned long end = i + len;
while (i < end) {
__set_bit(i, bitmap);
i++;
}
}
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/sched.h>
......
...@@ -9,12 +9,12 @@ ...@@ -9,12 +9,12 @@
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/swiotlb.h> #include <asm/swiotlb.h>
#include <asm-generic/dma-coherent.h>
extern dma_addr_t bad_dma_address; extern dma_addr_t bad_dma_address;
extern int iommu_merge; extern int iommu_merge;
extern struct device fallback_dev; extern struct device x86_dma_fallback_dev;
extern int panic_on_overflow; extern int panic_on_overflow;
extern int force_iommu;
struct dma_mapping_ops { struct dma_mapping_ops {
int (*mapping_error)(struct device *dev, int (*mapping_error)(struct device *dev,
...@@ -25,9 +25,6 @@ struct dma_mapping_ops { ...@@ -25,9 +25,6 @@ struct dma_mapping_ops {
void *vaddr, dma_addr_t dma_handle); void *vaddr, dma_addr_t dma_handle);
dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr,
size_t size, int direction); size_t size, int direction);
/* like map_single, but doesn't check the device mask */
dma_addr_t (*map_simple)(struct device *hwdev, phys_addr_t ptr,
size_t size, int direction);
void (*unmap_single)(struct device *dev, dma_addr_t addr, void (*unmap_single)(struct device *dev, dma_addr_t addr,
size_t size, int direction); size_t size, int direction);
void (*sync_single_for_cpu)(struct device *hwdev, void (*sync_single_for_cpu)(struct device *hwdev,
...@@ -68,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) ...@@ -68,7 +65,7 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
return dma_ops; return dma_ops;
else else
return dev->archdata.dma_ops; return dev->archdata.dma_ops;
#endif #endif /* ASM_X86__DMA_MAPPING_H */
} }
/* Make sure we keep the same behaviour */ /* Make sure we keep the same behaviour */
...@@ -87,17 +84,14 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) ...@@ -87,17 +84,14 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
#define dma_is_consistent(d, h) (1)
void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag);
void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle);
extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_supported(struct device *hwdev, u64 mask);
extern int dma_set_mask(struct device *dev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask);
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag);
static inline dma_addr_t static inline dma_addr_t
dma_map_single(struct device *hwdev, void *ptr, size_t size, dma_map_single(struct device *hwdev, void *ptr, size_t size,
int direction) int direction)
...@@ -247,7 +241,68 @@ static inline int dma_get_cache_alignment(void) ...@@ -247,7 +241,68 @@ static inline int dma_get_cache_alignment(void)
return boot_cpu_data.x86_clflush_size; return boot_cpu_data.x86_clflush_size;
} }
#define dma_is_consistent(d, h) (1) static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
gfp_t gfp)
{
unsigned long dma_mask = 0;
#include <asm-generic/dma-coherent.h> dma_mask = dev->coherent_dma_mask;
#endif /* ASM_X86__DMA_MAPPING_H */ if (!dma_mask)
dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
return dma_mask;
}
static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
{
#ifdef CONFIG_X86_64
unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA))
gfp |= GFP_DMA32;
#endif
return gfp;
}
static inline void *
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
void *memory;
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
return memory;
if (!dev) {
dev = &x86_dma_fallback_dev;
gfp |= GFP_DMA;
}
if (!is_device_dma_capable(dev))
return NULL;
if (!ops->alloc_coherent)
return NULL;
return ops->alloc_coherent(dev, size, dma_handle,
dma_alloc_coherent_gfp_flags(dev, gfp));
}
static inline void dma_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t bus)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
WARN_ON(irqs_disabled()); /* for portability */
if (dma_release_from_coherent(dev, get_order(size), vaddr))
return;
if (ops->free_coherent)
ops->free_coherent(dev, size, vaddr, bus);
}
#endif
...@@ -29,6 +29,8 @@ extern int fix_aperture; ...@@ -29,6 +29,8 @@ extern int fix_aperture;
#define AMD64_GARTCACHECTL 0x9c #define AMD64_GARTCACHECTL 0x9c
#define AMD64_GARTEN (1<<0) #define AMD64_GARTEN (1<<0)
extern int agp_amd64_init(void);
static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
{ {
u32 tmp, ctl; u32 tmp, ctl;
......
...@@ -6,6 +6,7 @@ extern void no_iommu_init(void); ...@@ -6,6 +6,7 @@ extern void no_iommu_init(void);
extern struct dma_mapping_ops nommu_dma_ops; extern struct dma_mapping_ops nommu_dma_ops;
extern int force_iommu, no_iommu; extern int force_iommu, no_iommu;
extern int iommu_detected; extern int iommu_detected;
extern int dmar_disabled;
extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
......
...@@ -48,6 +48,11 @@ static inline int is_device_dma_capable(struct device *dev) ...@@ -48,6 +48,11 @@ static inline int is_device_dma_capable(struct device *dev)
return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE; return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
} }
static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size)
{
return addr + size <= mask;
}
#ifdef CONFIG_HAS_DMA #ifdef CONFIG_HAS_DMA
#include <asm/dma-mapping.h> #include <asm/dma-mapping.h>
#else #else
...@@ -58,6 +63,13 @@ static inline int is_device_dma_capable(struct device *dev) ...@@ -58,6 +63,13 @@ static inline int is_device_dma_capable(struct device *dev)
#define dma_sync_single dma_sync_single_for_cpu #define dma_sync_single dma_sync_single_for_cpu
#define dma_sync_sg dma_sync_sg_for_cpu #define dma_sync_sg dma_sync_sg_for_cpu
static inline u64 dma_get_mask(struct device *dev)
{
if (dev && dev->dma_mask && *dev->dma_mask)
return *dev->dma_mask;
return DMA_32BIT_MASK;
}
extern u64 dma_get_required_mask(struct device *dev); extern u64 dma_get_required_mask(struct device *dev);
static inline unsigned int dma_get_max_seg_size(struct device *dev) static inline unsigned int dma_get_max_seg_size(struct device *dev)
......
#ifndef _LINUX_IOMMU_HELPER_H
#define _LINUX_IOMMU_HELPER_H
static inline unsigned long iommu_device_max_index(unsigned long size,
unsigned long offset,
u64 dma_mask)
{
if (size + offset > dma_mask)
return dma_mask - offset + 1;
else
return size;
}
extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, extern int iommu_is_span_boundary(unsigned int index, unsigned int nr,
unsigned long shift, unsigned long shift,
unsigned long boundary_size); unsigned long boundary_size);
extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len);
extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, unsigned long start, unsigned int nr,
unsigned long shift, unsigned long shift,
...@@ -8,3 +22,5 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, ...@@ -8,3 +22,5 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long align_mask); unsigned long align_mask);
extern void iommu_area_free(unsigned long *map, unsigned long start, extern void iommu_area_free(unsigned long *map, unsigned long start,
unsigned int nr); unsigned int nr);
#endif
...@@ -497,6 +497,16 @@ ...@@ -497,6 +497,16 @@
#define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101 #define PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP 0x1101
#define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102 #define PCI_DEVICE_ID_AMD_K8_NB_MEMCTL 0x1102
#define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103 #define PCI_DEVICE_ID_AMD_K8_NB_MISC 0x1103
#define PCI_DEVICE_ID_AMD_10H_NB_HT 0x1200
#define PCI_DEVICE_ID_AMD_10H_NB_MAP 0x1201
#define PCI_DEVICE_ID_AMD_10H_NB_DRAM 0x1202
#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203
#define PCI_DEVICE_ID_AMD_10H_NB_LINK 0x1204
#define PCI_DEVICE_ID_AMD_11H_NB_HT 0x1300
#define PCI_DEVICE_ID_AMD_11H_NB_MAP 0x1301
#define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
#define PCI_DEVICE_ID_AMD_SCSI 0x2020 #define PCI_DEVICE_ID_AMD_SCSI 0x2020
......
...@@ -124,6 +124,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size, ...@@ -124,6 +124,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
} }
return (mem != NULL); return (mem != NULL);
} }
EXPORT_SYMBOL(dma_alloc_from_coherent);
/** /**
* dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
...@@ -151,3 +152,4 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) ...@@ -151,3 +152,4 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
} }
return 0; return 0;
} }
EXPORT_SYMBOL(dma_release_from_coherent);
...@@ -30,8 +30,7 @@ static unsigned long find_next_zero_area(unsigned long *map, ...@@ -30,8 +30,7 @@ static unsigned long find_next_zero_area(unsigned long *map,
return index; return index;
} }
static inline void set_bit_area(unsigned long *map, unsigned long i, void iommu_area_reserve(unsigned long *map, unsigned long i, int len)
int len)
{ {
unsigned long end = i + len; unsigned long end = i + len;
while (i < end) { while (i < end) {
...@@ -64,7 +63,7 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, ...@@ -64,7 +63,7 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
start = index + 1; start = index + 1;
goto again; goto again;
} }
set_bit_area(map, index, nr); iommu_area_reserve(map, index, nr);
} }
return index; return index;
} }
......
...@@ -274,13 +274,14 @@ swiotlb_late_init_with_default_size(size_t default_size) ...@@ -274,13 +274,14 @@ swiotlb_late_init_with_default_size(size_t default_size)
} }
static int static int
address_needs_mapping(struct device *hwdev, dma_addr_t addr) address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
{ {
dma_addr_t mask = 0xffffffff; return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
/* If the device has a mask, use it, otherwise default to 32 bits */ }
if (hwdev && hwdev->dma_mask)
mask = *hwdev->dma_mask; static int is_swiotlb_buffer(char *addr)
return (addr & ~mask) != 0; {
return addr >= io_tlb_start && addr < io_tlb_end;
} }
/* /*
...@@ -467,15 +468,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, ...@@ -467,15 +468,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
void *ret; void *ret;
int order = get_order(size); int order = get_order(size);
/*
* XXX fix me: the DMA API should pass us an explicit DMA mask
* instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
* bit range instead of a 16MB one).
*/
flags |= GFP_DMA;
ret = (void *)__get_free_pages(flags, order); ret = (void *)__get_free_pages(flags, order);
if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) { if (ret && address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
/* /*
* The allocated memory isn't reachable by the device. * The allocated memory isn't reachable by the device.
* Fall back on swiotlb_map_single(). * Fall back on swiotlb_map_single().
...@@ -490,19 +484,16 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, ...@@ -490,19 +484,16 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
* swiotlb_map_single(), which will grab memory from * swiotlb_map_single(), which will grab memory from
* the lowest available address range. * the lowest available address range.
*/ */
dma_addr_t handle; ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
handle = swiotlb_map_single(hwdev, NULL, size, DMA_FROM_DEVICE); if (!ret)
if (swiotlb_dma_mapping_error(hwdev, handle))
return NULL; return NULL;
ret = bus_to_virt(handle);
} }
memset(ret, 0, size); memset(ret, 0, size);
dev_addr = virt_to_bus(ret); dev_addr = virt_to_bus(ret);
/* Confirm address can be DMA'd by device */ /* Confirm address can be DMA'd by device */
if (address_needs_mapping(hwdev, dev_addr)) { if (address_needs_mapping(hwdev, dev_addr, size)) {
printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
(unsigned long long)*hwdev->dma_mask, (unsigned long long)*hwdev->dma_mask,
(unsigned long long)dev_addr); (unsigned long long)dev_addr);
...@@ -518,12 +509,11 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, ...@@ -518,12 +509,11 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
dma_addr_t dma_handle) dma_addr_t dma_handle)
{ {
WARN_ON(irqs_disabled()); WARN_ON(irqs_disabled());
if (!(vaddr >= (void *)io_tlb_start if (!is_swiotlb_buffer(vaddr))
&& vaddr < (void *)io_tlb_end))
free_pages((unsigned long) vaddr, get_order(size)); free_pages((unsigned long) vaddr, get_order(size));
else else
/* DMA_TO_DEVICE to avoid memcpy in unmap_single */ /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE); unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
} }
static void static void
...@@ -567,7 +557,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, ...@@ -567,7 +557,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
* we can safely return the device addr and not worry about bounce * we can safely return the device addr and not worry about bounce
* buffering it. * buffering it.
*/ */
if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force) if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
return dev_addr; return dev_addr;
/* /*
...@@ -584,7 +574,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, ...@@ -584,7 +574,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
/* /*
* Ensure that the address returned is DMA'ble * Ensure that the address returned is DMA'ble
*/ */
if (address_needs_mapping(hwdev, dev_addr)) if (address_needs_mapping(hwdev, dev_addr, size))
panic("map_single: bounce buffer is not DMA'ble"); panic("map_single: bounce buffer is not DMA'ble");
return dev_addr; return dev_addr;
...@@ -612,7 +602,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, ...@@ -612,7 +602,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
char *dma_addr = bus_to_virt(dev_addr); char *dma_addr = bus_to_virt(dev_addr);
BUG_ON(dir == DMA_NONE); BUG_ON(dir == DMA_NONE);
if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) if (is_swiotlb_buffer(dma_addr))
unmap_single(hwdev, dma_addr, size, dir); unmap_single(hwdev, dma_addr, size, dir);
else if (dir == DMA_FROM_DEVICE) else if (dir == DMA_FROM_DEVICE)
dma_mark_clean(dma_addr, size); dma_mark_clean(dma_addr, size);
...@@ -642,7 +632,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, ...@@ -642,7 +632,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
char *dma_addr = bus_to_virt(dev_addr); char *dma_addr = bus_to_virt(dev_addr);
BUG_ON(dir == DMA_NONE); BUG_ON(dir == DMA_NONE);
if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) if (is_swiotlb_buffer(dma_addr))
sync_single(hwdev, dma_addr, size, dir, target); sync_single(hwdev, dma_addr, size, dir, target);
else if (dir == DMA_FROM_DEVICE) else if (dir == DMA_FROM_DEVICE)
dma_mark_clean(dma_addr, size); dma_mark_clean(dma_addr, size);
...@@ -673,7 +663,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, ...@@ -673,7 +663,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
char *dma_addr = bus_to_virt(dev_addr) + offset; char *dma_addr = bus_to_virt(dev_addr) + offset;
BUG_ON(dir == DMA_NONE); BUG_ON(dir == DMA_NONE);
if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end) if (is_swiotlb_buffer(dma_addr))
sync_single(hwdev, dma_addr, size, dir, target); sync_single(hwdev, dma_addr, size, dir, target);
else if (dir == DMA_FROM_DEVICE) else if (dir == DMA_FROM_DEVICE)
dma_mark_clean(dma_addr, size); dma_mark_clean(dma_addr, size);
...@@ -727,7 +717,8 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, ...@@ -727,7 +717,8 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
for_each_sg(sgl, sg, nelems, i) { for_each_sg(sgl, sg, nelems, i) {
addr = SG_ENT_VIRT_ADDRESS(sg); addr = SG_ENT_VIRT_ADDRESS(sg);
dev_addr = virt_to_bus(addr); dev_addr = virt_to_bus(addr);
if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) { if (swiotlb_force ||
address_needs_mapping(hwdev, dev_addr, sg->length)) {
void *map = map_single(hwdev, addr, sg->length, dir); void *map = map_single(hwdev, addr, sg->length, dir);
if (!map) { if (!map) {
/* Don't panic here, we expect map_sg users /* Don't panic here, we expect map_sg users
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment