From 426b3b8d535e3e141331dc19c40f457b997c4d6d Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:16 +0200 Subject: pci: add quirk to disable boot interrupt generation on intel 6300ESB Add a quirk to disable legacy boot interrupt generation on intel devices that support disabling it. This patch benefited from discussions with Alexander Graf, Torsten Duwe, Ihno Krumreich, Daniel Gollub, Hannes Reinecke. The conclusions we drew and the patch itself are the authors' responsibility alone. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- drivers/pci/quirks.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 338a3f94b4d4..eb97564316d0 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1363,6 +1363,36 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2609, quirk_intel_pcie_pm); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260a, quirk_intel_pcie_pm); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); +#ifdef CONFIG_X86_IO_APIC +/* + * On some chipsets we can disable the generation of legacy INTx boot + * interrupts. + */ + +/* + * IO-APIC1 on 6300ESB generates boot interrupts, see intel order no + * 300641-004US, section 5.7.3. + */ +#define INTEL_6300_IOAPIC_ABAR 0x40 +#define INTEL_6300_DISABLE_BOOT_IRQ (1<<14) + +static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev) +{ + u16 pci_config_word; + + if (noioapicquirk) + return; + + pci_read_config_word(dev, INTEL_6300_IOAPIC_ABAR, &pci_config_word); + pci_config_word |= INTEL_6300_DISABLE_BOOT_IRQ; + pci_write_config_word(dev, INTEL_6300_IOAPIC_ABAR, pci_config_word); + + printk(KERN_INFO "disabled boot interrupt on device 0x%04x:0x%04x\n", + dev->vendor, dev->device); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); +#endif /* CONFIG_X86_IO_APIC */ + /* * Toshiba TC86C001 IDE controller reports the standard 8-byte BAR0 size * but the PIO transfers won't work if BAR0 falls at the odd 8 bytes. -- cgit 1.4.1 From e1d3a90846b40ad3160bf4b648d36c6badad39ac Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:17 +0200 Subject: pci, acpi: reroute PCI interrupt to legacy boot interrupt equivalent Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does during interrupt handling). On chipsets where this INTx generation cannot be disabled, we reroute the valid interrupts to their legacy equivalent to get rid of spurious interrupts that might otherwise bring down (vital) interrupt lines through spurious interrupt detection in note_interrupt(). This patch benefited from discussions with Alexander Graf, Torsten Duwe, Ihno Krumreich, Daniel Gollub, Hannes Reinecke. The conclusions we drew and the patch itself are the authors' responsibility alone. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- drivers/acpi/pci_irq.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/quirks.c | 28 +++++++++++++++++++++++++ include/linux/pci.h | 6 ++++++ 3 files changed, 90 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 89022a74faee..b37cb0a9826e 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -384,6 +384,27 @@ acpi_pci_free_irq(struct acpi_prt_entry *entry, return irq; } +#ifdef CONFIG_X86_IO_APIC +extern int noioapicquirk; + +static int bridge_has_boot_interrupt_variant(struct pci_bus *bus) +{ + struct pci_bus *bus_it; + + for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) { + if (!bus_it->self) + return 0; + + printk(KERN_INFO "vendor=%04x device=%04x\n", bus_it->self->vendor, + bus_it->self->device); + + if (bus_it->self->irq_reroute_variant) + return bus_it->self->irq_reroute_variant; + } + return 0; +} +#endif /* CONFIG_X86_IO_APIC */ + /* * acpi_pci_irq_lookup * success: return IRQ >= 0 @@ -413,6 +434,41 @@ acpi_pci_irq_lookup(struct pci_bus *bus, } ret = func(entry, triggering, polarity, link); + +#ifdef CONFIG_X86_IO_APIC + /* + * Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the + * IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel + * does during interrupt handling). When this INTx generation cannot be + * disabled, we reroute these interrupts to their legacy equivalent to + * get rid of spurious interrupts. + */ + if (!noioapicquirk) { + switch (bridge_has_boot_interrupt_variant(bus)) { + case 0: + /* no rerouting necessary */ + break; + + case INTEL_IRQ_REROUTE_VARIANT: + /* + * Remap according to INTx routing table in 6700PXH + * specs, intel order number 302628-002, section + * 2.15.2. Other chipsets (80332, ...) have the same + * mapping and are handled here as well. + */ + printk(KERN_INFO "pci irq %d -> rerouted to legacy " + "irq %d\n", ret, (ret % 4) + 16); + ret = (ret % 4) + 16; + break; + + default: + printk(KERN_INFO "not rerouting irq %d to legacy irq: " + "unknown mapping\n", ret); + break; + } + } +#endif /* CONFIG_X86_IO_APIC */ + return ret; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index eb97564316d0..ac634ae2eb08 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1364,6 +1364,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260a, quirk_intel_pcie_pm); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); #ifdef CONFIG_X86_IO_APIC +/* + * Boot interrupts on some chipsets cannot be turned off. For these chipsets, + * remap the original interrupt in the linux kernel to the boot interrupt, so + * that a PCI device's interrupt handler is installed on the boot interrupt + * line instead. + */ +static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev) +{ + int i; + + if (noioapicquirk) + return; + + dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT; + + printk(KERN_INFO "PCI quirk: reroute interrupts for 0x%04x:0x%04x\n", + dev->vendor, dev->device); + return; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel); + /* * On some chipsets we can disable the generation of legacy INTx boot * interrupts. diff --git a/include/linux/pci.h b/include/linux/pci.h index d18b1dd49fab..6755cf5ac109 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -117,6 +117,11 @@ enum pci_dev_flags { PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, }; +enum pci_irq_reroute_variant { + INTEL_IRQ_REROUTE_VARIANT = 1, + MAX_IRQ_REROUTE_VARIANTS = 3 +}; + typedef unsigned short __bitwise pci_bus_flags_t; enum pci_bus_flags { PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, @@ -194,6 +199,7 @@ struct pci_dev { unsigned int no_d1d2:1; /* only allow d0 or d3 */ unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ + unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ unsigned int msi_enabled:1; unsigned int msix_enabled:1; unsigned int is_managed:1; -- cgit 1.4.1 From b0356cd0e7497252a2c45ecb07b79d931390c8b2 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Fri, 20 Jun 2008 17:05:33 +0200 Subject: pci, acpi: reroute PCI interrupt to legacy boot interrupt equivalent, warning fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch just fixes the compiler warning: drivers/pci/quirks.c: In function ‘quirk_reroute_to_boot_interrupts_intel’: drivers/pci/quirks.c:1375: warning: unused variable ‘i’ Signed-off-by: Bernhard Walle Cc: sassmann@suse.de Signed-off-by: Ingo Molnar --- drivers/pci/quirks.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ac634ae2eb08..9871a3cca4d4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1372,8 +1372,6 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); */ static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev) { - int i; - if (noioapicquirk) return; -- cgit 1.4.1 From 7725118815537bd481a81828fa125ee154ee74fc Mon Sep 17 00:00:00 2001 From: Olaf Dabrunz Date: Tue, 8 Jul 2008 15:59:47 +0200 Subject: PCI quirks: add quirk to disable boot interrupt generation on broadcom HT1000 Signed-off-by: Olaf Dabrunz Signed-off-by: Stefan Assmann Cc: Jon Masters Cc: Olaf Dabrunz Cc: Stefan Assmann Cc: Ihno Krumreich Cc: Sven Dietrich Cc: Daniel Gollub Cc: Felix Foerster Signed-off-by: Ingo Molnar --- drivers/pci/quirks.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 9871a3cca4d4..d1f0281279c3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1417,6 +1417,38 @@ static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev) dev->vendor, dev->device); } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); + +/* + * disable boot interrupts on HT-1000 + */ +#define BC_HT1000_FEATURE_REG 0x64 +#define BC_HT1000_PIC_REGS_ENABLE (1<<0) +#define BC_HT1000_MAP_IDX 0xC00 +#define BC_HT1000_MAP_DATA 0xC01 + +static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev) +{ + u32 pci_config_dword; + u8 irq; + + if (noioapicquirk) + return; + + pci_read_config_dword(dev, BC_HT1000_FEATURE_REG, &pci_config_dword); + pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword | + BC_HT1000_PIC_REGS_ENABLE); + + for (irq = 0x10; irq < 0x10 + 32; irq++) { + outb(irq, BC_HT1000_MAP_IDX); + outb(0x00, BC_HT1000_MAP_DATA); + } + + pci_write_config_dword(dev, BC_HT1000_FEATURE_REG, pci_config_dword); + + printk(KERN_INFO "disabled boot interrupts on PCI device" + "0x%04x:0x%04x\n", dev->vendor, dev->device); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); #endif /* CONFIG_X86_IO_APIC */ /* -- cgit 1.4.1 From 542622da899be17250b8cae2ef5203025b91230a Mon Sep 17 00:00:00 2001 From: Olaf Dabrunz Date: Tue, 8 Jul 2008 15:59:48 +0200 Subject: PCI quirks: disable AMD/ATI legacy boot interrupt generation Add quirks for several AMD/ATI chipsets to prevent generation of legacy boot interrupts. Integrates a separate older quirk to make IO-APIC mode work on AMD 8131 rev. A0 and B0, which was due to an AMD erratum. Signed-off-by: Olaf Dabrunz Signed-off-by: Stefan Assmann Cc: Jon Masters Cc: Olaf Dabrunz Cc: Stefan Assmann Cc: Ihno Krumreich Cc: Sven Dietrich Cc: Daniel Gollub Cc: Felix Foerster Signed-off-by: Ingo Molnar --- drivers/pci/quirks.c | 71 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 21 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d1f0281279c3..aed7c5e55dce 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -602,27 +602,6 @@ static void __init quirk_ioapic_rmw(struct pci_dev *dev) sis_apic_bug = 1; } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SI, PCI_ANY_ID, quirk_ioapic_rmw); - -#define AMD8131_revA0 0x01 -#define AMD8131_revB0 0x11 -#define AMD8131_MISC 0x40 -#define AMD8131_NIOAMODE_BIT 0 -static void quirk_amd_8131_ioapic(struct pci_dev *dev) -{ - unsigned char tmp; - - if (nr_ioapics == 0) - return; - - if (dev->revision == AMD8131_revA0 || dev->revision == AMD8131_revB0) { - dev_info(&dev->dev, "Fixing up AMD8131 IOAPIC mode\n"); - pci_read_config_byte( dev, AMD8131_MISC, &tmp); - tmp &= ~(1 << AMD8131_NIOAMODE_BIT); - pci_write_config_byte( dev, AMD8131_MISC, tmp); - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); -DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_amd_8131_ioapic); #endif /* CONFIG_X86_IO_APIC */ /* @@ -1449,6 +1428,56 @@ static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev) "0x%04x:0x%04x\n", dev->vendor, dev->device); } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); + +/* + * disable boot interrupts on AMD and ATI chipsets + */ +/* + * NOIOAMODE needs to be disabled to disable "boot interrupts". For AMD 8131 + * rev. A0 and B0, NOIOAMODE needs to be disabled anyway to fix IO-APIC mode + * (due to an erratum). + */ +#define AMD_813X_MISC 0x40 +#define AMD_813X_NOIOAMODE (1<<0) + +static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev) +{ + u32 pci_config_dword; + + if (noioapicquirk) + return; + + pci_read_config_dword(dev, AMD_813X_MISC, &pci_config_dword); + pci_config_dword &= ~AMD_813X_NOIOAMODE; + pci_write_config_dword(dev, AMD_813X_MISC, pci_config_dword); + + printk(KERN_INFO "disabled boot interrupts on PCI device " + "0x%04x:0x%04x\n", dev->vendor, dev->device); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt); + +#define AMD_8111_PCI_IRQ_ROUTING 0x56 + +static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev) +{ + u16 pci_config_word; + + if (noioapicquirk) + return; + + pci_read_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, &pci_config_word); + if (!pci_config_word) { + printk(KERN_INFO "boot interrupts on PCI device 0x%04x:0x%04x " + "already disabled\n", + dev->vendor, dev->device); + return; + } + pci_write_config_word(dev, AMD_8111_PCI_IRQ_ROUTING, 0); + printk(KERN_INFO "disabled boot interrupts on PCI device " + "0x%04x:0x%04x\n", dev->vendor, dev->device); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); #endif /* CONFIG_X86_IO_APIC */ /* -- cgit 1.4.1 From 88d1dce3a74367291f65a757fbdcaf17f042f30c Mon Sep 17 00:00:00 2001 From: Olaf Dabrunz Date: Tue, 8 Jul 2008 15:59:48 +0200 Subject: PCI quirks: call boot IRQ quirks at end of device init and during resume It is not necessary to call boot IRQ quirks before the BARs of the bridges are probed. The normal case is to use DECLARE_PCI_FIXUP_FINAL, so we use this instead now. After a resume, we need to call the quirks again. Signed-off-by: Olaf Dabrunz Signed-off-by: Stefan Assmann Cc: Jon Masters Cc: Olaf Dabrunz Cc: Stefan Assmann Cc: Ihno Krumreich Cc: Sven Dietrich Cc: Daniel Gollub Cc: Felix Foerster Signed-off-by: Ingo Molnar --- drivers/pci/quirks.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index aed7c5e55dce..6f77675d6131 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1360,14 +1360,22 @@ static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev) dev->vendor, dev->device); return; } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel); /* * On some chipsets we can disable the generation of legacy INTx boot @@ -1395,7 +1403,8 @@ static void quirk_disable_intel_boot_interrupt(struct pci_dev *dev) printk(KERN_INFO "disabled boot interrupt on device 0x%04x:0x%04x\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_10, quirk_disable_intel_boot_interrupt); /* * disable boot interrupts on HT-1000 @@ -1427,7 +1436,8 @@ static void quirk_disable_broadcom_boot_interrupt(struct pci_dev *dev) printk(KERN_INFO "disabled boot interrupts on PCI device" "0x%04x:0x%04x\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000SB, quirk_disable_broadcom_boot_interrupt); /* * disable boot interrupts on AMD and ATI chipsets @@ -1454,8 +1464,8 @@ static void quirk_disable_amd_813x_boot_interrupt(struct pci_dev *dev) printk(KERN_INFO "disabled boot interrupts on PCI device " "0x%04x:0x%04x\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_amd_813x_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8132_BRIDGE, quirk_disable_amd_813x_boot_interrupt); #define AMD_8111_PCI_IRQ_ROUTING 0x56 @@ -1477,7 +1487,8 @@ static void quirk_disable_amd_8111_boot_interrupt(struct pci_dev *dev) printk(KERN_INFO "disabled boot interrupts on PCI device " "0x%04x:0x%04x\n", dev->vendor, dev->device); } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8111_SMBUS, quirk_disable_amd_8111_boot_interrupt); #endif /* CONFIG_X86_IO_APIC */ /* -- cgit 1.4.1 From 41b9eb264c8407655db57b60b4457fe1b2ec9977 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 15 Jul 2008 13:48:55 +0200 Subject: x86, pci: introduce config option for pci reroute quirks (was: [PATCH 0/3] Boot IRQ quirks for Broadcom and AMD/ATI) This is against linux-2.6-tip, branch pci-ioapic-boot-irq-quirks. From: Stefan Assmann Subject: Introduce config option for pci reroute quirks The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS is introduced to enable (or disable) the redirection of the interrupt handler to the boot interrupt line by default. Depending on the existence of interrupt masking / threaded interrupt handling in the kernel (vanilla, rt, ...) and the maturity of the rerouting patch, users can enable or disable the redirection by default. This means that the reroute quirk can be applied to any kernel without changing it. Interrupt sharing could be increased if this option is enabled. However this option is vital for threaded interrupt handling, as done by the RT kernel. It should simplify the consolidation with the RT kernel. The option can be overridden by either pci=ioapicreroute or pci=noioapicreroute. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Cc: Jesse Barnes Cc: Jon Masters Cc: Ihno Krumreich Cc: Sven Dietrich Cc: Daniel Gollub Cc: Felix Foerster Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/Kconfig | 24 ++++++++++++++++++++++++ arch/x86/pci/common.c | 8 ++++++++ drivers/pci/quirks.c | 2 +- include/asm-x86/pci.h | 2 +- 5 files changed, 38 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f5662b7a34d1..62b6e8067a5b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1536,6 +1536,10 @@ and is between 256 and 4096 characters. It is defined in the file primary IO-APIC for bridges that cannot disable boot IRQs. This fixes a source of spurious IRQs when the system masks IRQs. + noioapicreroute [APIC] Disable workaround that uses the + boot IRQ equivalent of an IRQ that connects to + a chipset where boot IRQs cannot be disabled. + The opposite of ioapicreroute. biosirq [X86-32] Use PCI BIOS calls to get the interrupt routing table. These calls are known to be buggy on several machines and they hang the machine diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96e0c2ebc388..09521332636b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -665,6 +665,30 @@ config X86_VISWS_APIC def_bool y depends on X86_32 && X86_VISWS +config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + default n + depends on X86_IO_APIC + help + This option enables a workaround that fixes a source of + spurious interrupts. This is recommended when threaded + interrupt handling is used on systems where the generation of + superfluous "boot interrupts" cannot be disabled. + + Some chipsets generate a legacy INTx "boot IRQ" when the IRQ + entry in the chipset's IO-APIC is masked (as, e.g. the RT + kernel does during interrupt handling). On chipsets where this + boot IRQ generation cannot be disabled, this workaround keeps + the original IRQ line masked so that only the equivalent "boot + IRQ" is delivered to the CPUs. The workaround also tells the + kernel to set up the IRQ handler on the boot IRQ line. In this + way only one interrupt is delivered to the kernel. Otherwise + the spurious second interrupt may cause the kernel to bring + down (vital) interrupt lines. + + Only affects "broken" chipsets. Interrupt sharing may be + increased on these systems. + config X86_MCE bool "Machine Check Exception" depends on !X86_VOYAGER diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1485a26ddcef..bb1a01f089e2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -24,7 +24,11 @@ unsigned int pci_early_dump_regs; static int pci_bf_sort; int pci_routeirq; int noioapicquirk; +#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS +int noioapicreroute = 0; +#else int noioapicreroute = 1; +#endif int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; @@ -528,6 +532,10 @@ char * __devinit pcibios_setup(char *str) if (noioapicreroute != -1) noioapicreroute = 0; return NULL; + } else if (!strcmp(str, "noioapicreroute")) { + if (noioapicreroute != -1) + noioapicreroute = 1; + return NULL; } return str; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 0911b0c60b64..c880dd0bbfb5 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1397,7 +1397,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); */ static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev) { - if (noioapicquirk) + if (noioapicquirk || noioapicreroute) return; dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT; diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 52a29f7668ef..9584d6d5eb93 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -20,7 +20,7 @@ struct pci_sysdata { extern int pci_routeirq; extern int noioapicquirk; -extern int ioapicreroute; +extern int noioapicreroute; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -- cgit 1.4.1 From fd6852c8fa060bd45c82a2593e18f933f6c6204f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 27 Oct 2008 19:48:52 +0000 Subject: powerpc/pci: Fix various pseries PCI hotplug issues The pseries PCI hotplug code has a number of issues, ranging from incorrect resource setup to crashes, depending on what is added, when, whether it contains a bridge, etc etc.... This fixes a whole bunch of these, while actually simplifying the code a bit, using more generic code in the process and factoring out common code between adding of a PHB, a slot or a device. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/pci-bridge.h | 3 - arch/powerpc/include/asm/pci.h | 5 +- arch/powerpc/kernel/pci-common.c | 41 +++++++- arch/powerpc/kernel/rtas_pci.c | 48 --------- arch/powerpc/platforms/pseries/pci_dlpar.c | 163 ++++++++++++++--------------- drivers/pci/hotplug/rpadlpar_core.c | 69 ++++++------ 6 files changed, 150 insertions(+), 179 deletions(-) (limited to 'drivers/pci') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 2dfa8a3d8c76..fa8b3b724438 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -241,9 +241,6 @@ extern void pcibios_remove_pci_devices(struct pci_bus *bus); /** Discover new pci devices under this bus, and add them */ extern void pcibios_add_pci_devices(struct pci_bus *bus); -extern void pcibios_fixup_new_pci_devices(struct pci_bus *bus); - -extern int pcibios_remove_root_bus(struct pci_controller *phb); static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus) { diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 32e03e6d25c5..1c721a632d8e 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -204,15 +204,14 @@ static inline struct resource *pcibios_select_root(struct pci_dev *pdev, return root; } -extern void pcibios_setup_new_device(struct pci_dev *dev); - extern void pcibios_claim_one_bus(struct pci_bus *b); -extern void pcibios_allocate_bus_resources(struct pci_bus *bus); +extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); extern void pcibios_resource_survey(void); extern struct pci_controller *init_phb_dynamic(struct device_node *dn); +extern int remove_phb_dynamic(struct pci_controller *phb); extern struct pci_dev *of_create_pci_dev(struct device_node *node, struct pci_bus *bus, int devfn); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f965397a6105..f3fd7eb90a7b 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -203,7 +203,7 @@ char __devinit *pcibios_setup(char *str) return str; } -void __devinit pcibios_setup_new_device(struct pci_dev *dev) +static void __devinit pcibios_setup_new_device(struct pci_dev *dev) { struct dev_archdata *sd = &dev->dev.archdata; @@ -221,7 +221,6 @@ void __devinit pcibios_setup_new_device(struct pci_dev *dev) if (ppc_md.pci_dma_dev_setup) ppc_md.pci_dma_dev_setup(dev); } -EXPORT_SYMBOL(pcibios_setup_new_device); /* * Reads the interrupt pin to determine if interrupt is use by card. @@ -1397,9 +1396,10 @@ void __init pcibios_resource_survey(void) #ifdef CONFIG_HOTPLUG -/* This is used by the pSeries hotplug driver to allocate resource +/* This is used by the PCI hotplug driver to allocate resource * of newly plugged busses. We can try to consolidate with the - * rest of the code later, for now, keep it as-is + * rest of the code later, for now, keep it as-is as our main + * resource allocation function doesn't deal with sub-trees yet. */ void __devinit pcibios_claim_one_bus(struct pci_bus *bus) { @@ -1414,6 +1414,14 @@ void __devinit pcibios_claim_one_bus(struct pci_bus *bus) if (r->parent || !r->start || !r->flags) continue; + + pr_debug("PCI: Claiming %s: " + "Resource %d: %016llx..%016llx [%x]\n", + pci_name(dev), i, + (unsigned long long)r->start, + (unsigned long long)r->end, + (unsigned int)r->flags); + pci_claim_resource(dev, i); } } @@ -1422,6 +1430,31 @@ void __devinit pcibios_claim_one_bus(struct pci_bus *bus) pcibios_claim_one_bus(child_bus); } EXPORT_SYMBOL_GPL(pcibios_claim_one_bus); + + +/* pcibios_finish_adding_to_bus + * + * This is to be called by the hotplug code after devices have been + * added to a bus, this include calling it for a PHB that is just + * being added + */ +void pcibios_finish_adding_to_bus(struct pci_bus *bus) +{ + pr_debug("PCI: Finishing adding to hotplug bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); + + /* Allocate bus and devices resources */ + pcibios_allocate_bus_resources(bus); + pcibios_claim_one_bus(bus); + + /* Add new devices to global lists. Register in proc, sysfs. */ + pci_bus_add_devices(bus); + + /* Fixup EEH */ + eeh_add_device_tree_late(bus); +} +EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); + #endif /* CONFIG_HOTPLUG */ int pcibios_enable_device(struct pci_dev *dev, int mask) diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 589a2797eac2..8869001ab5d7 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -301,51 +301,3 @@ void __init find_and_init_phbs(void) #endif /* CONFIG_PPC32 */ } } - -/* RPA-specific bits for removing PHBs */ -int pcibios_remove_root_bus(struct pci_controller *phb) -{ - struct pci_bus *b = phb->bus; - struct resource *res; - int rc, i; - - res = b->resource[0]; - if (!res->flags) { - printk(KERN_ERR "%s: no IO resource for PHB %s\n", __func__, - b->name); - return 1; - } - - rc = pcibios_unmap_io_space(b); - if (rc) { - printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", - __func__, b->name); - return 1; - } - - if (release_resource(res)) { - printk(KERN_ERR "%s: failed to release IO on bus %s\n", - __func__, b->name); - return 1; - } - - for (i = 1; i < 3; ++i) { - res = b->resource[i]; - if (!res->flags && i == 0) { - printk(KERN_ERR "%s: no MEM resource for PHB %s\n", - __func__, b->name); - return 1; - } - if (res->flags && release_resource(res)) { - printk(KERN_ERR - "%s: failed to release IO %d on bus %s\n", - __func__, i, b->name); - return 1; - } - } - - pcibios_free_controller(phb); - - return 0; -} -EXPORT_SYMBOL(pcibios_remove_root_bus); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 7190493e9bdc..5e1ed3d60ee5 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -25,6 +25,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#undef DEBUG + #include #include #include @@ -69,74 +71,25 @@ EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); * Remove all of the PCI devices under this bus both from the * linux pci device tree, and from the powerpc EEH address cache. */ -void -pcibios_remove_pci_devices(struct pci_bus *bus) +void pcibios_remove_pci_devices(struct pci_bus *bus) { - struct pci_dev *dev, *tmp; + struct pci_dev *dev, *tmp; + struct pci_bus *child_bus; + + /* First go down child busses */ + list_for_each_entry(child_bus, &bus->children, node) + pcibios_remove_pci_devices(child_bus); + pr_debug("PCI: Removing devices on bus %04x:%02x\n", + pci_domain_nr(bus), bus->number); list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { + pr_debug(" * Removing %s...\n", pci_name(dev)); eeh_remove_bus_device(dev); - pci_remove_bus_device(dev); - } + pci_remove_bus_device(dev); + } } EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); -/* Must be called before pci_bus_add_devices */ -void -pcibios_fixup_new_pci_devices(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - /* Skip already-added devices */ - if (!dev->is_added) { - int i; - - /* Fill device archdata and setup iommu table */ - pcibios_setup_new_device(dev); - - pci_read_irq_line(dev); - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r = &dev->resource[i]; - - if (r->parent || !r->start || !r->flags) - continue; - pci_claim_resource(dev, i); - } - } - } -} -EXPORT_SYMBOL_GPL(pcibios_fixup_new_pci_devices); - -static int -pcibios_pci_config_bridge(struct pci_dev *dev) -{ - u8 sec_busno; - struct pci_bus *child_bus; - - /* Get busno of downstream bus */ - pci_read_config_byte(dev, PCI_SECONDARY_BUS, &sec_busno); - - /* Add to children of PCI bridge dev->bus */ - child_bus = pci_add_new_bus(dev->bus, dev, sec_busno); - if (!child_bus) { - printk (KERN_ERR "%s: could not add second bus\n", __func__); - return -EIO; - } - sprintf(child_bus->name, "PCI Bus #%02x", child_bus->number); - - pci_scan_child_bus(child_bus); - - /* Fixup new pci devices */ - pcibios_fixup_new_pci_devices(child_bus); - - /* Make the discovered devices available */ - pci_bus_add_devices(child_bus); - - eeh_add_device_tree_late(child_bus); - return 0; -} - /** * pcibios_add_pci_devices - adds new pci devices to bus * @@ -147,10 +100,9 @@ pcibios_pci_config_bridge(struct pci_dev *dev) * is how this routine differs from other, similar pcibios * routines.) */ -void -pcibios_add_pci_devices(struct pci_bus * bus) +void pcibios_add_pci_devices(struct pci_bus * bus) { - int slotno, num, mode; + int slotno, num, mode, pass, max; struct pci_dev *dev; struct device_node *dn = pci_bus_to_OF_node(bus); @@ -162,26 +114,23 @@ pcibios_add_pci_devices(struct pci_bus * bus) if (mode == PCI_PROBE_DEVTREE) { /* use ofdt-based probe */ - of_scan_bus(dn, bus); - if (!list_empty(&bus->devices)) { - pcibios_fixup_new_pci_devices(bus); - pci_bus_add_devices(bus); - eeh_add_device_tree_late(bus); - } + of_rescan_bus(dn, bus); } else if (mode == PCI_PROBE_NORMAL) { /* use legacy probe */ slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - if (num) { - pcibios_fixup_new_pci_devices(bus); - pci_bus_add_devices(bus); - eeh_add_device_tree_late(bus); + if (!num) + return; + pcibios_setup_bus_devices(bus); + max = bus->secondary; + for (pass=0; pass < 2; pass++) + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || + dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) + max = pci_scan_bridge(bus, dev, max, pass); } - - list_for_each_entry(dev, &bus->devices, bus_list) - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - pcibios_pci_config_bridge(dev); } + pcibios_finish_adding_to_bus(bus); } EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); @@ -190,6 +139,8 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) struct pci_controller *phb; int primary; + pr_debug("PCI: Initializing new hotplug PHB %s\n", dn->full_name); + primary = list_empty(&hose_list); phb = pcibios_alloc_controller(dn); if (!phb) @@ -203,11 +154,59 @@ struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) eeh_add_device_tree_early(dn); scan_phb(phb); - pcibios_allocate_bus_resources(phb->bus); - pcibios_fixup_new_pci_devices(phb->bus); - pci_bus_add_devices(phb->bus); - eeh_add_device_tree_late(phb->bus); + pcibios_finish_adding_to_bus(phb->bus); return phb; } EXPORT_SYMBOL_GPL(init_phb_dynamic); + +/* RPA-specific bits for removing PHBs */ +int remove_phb_dynamic(struct pci_controller *phb) +{ + struct pci_bus *b = phb->bus; + struct resource *res; + int rc, i; + + pr_debug("PCI: Removing PHB %04x:%02x... \n", + pci_domain_nr(b), b->number); + + /* We cannot to remove a root bus that has children */ + if (!(list_empty(&b->children) && list_empty(&b->devices))) + return -EBUSY; + + /* We -know- there aren't any child devices anymore at this stage + * and thus, we can safely unmap the IO space as it's not in use + */ + res = &phb->io_resource; + if (res->flags & IORESOURCE_IO) { + rc = pcibios_unmap_io_space(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __func__, b->name); + return 1; + } + } + + /* Unregister the bridge device from sysfs and remove the PCI bus */ + device_unregister(b->bridge); + phb->bus = NULL; + pci_remove_bus(b); + + /* Now release the IO resource */ + if (res->flags & IORESOURCE_IO) + release_resource(res); + + /* Release memory resources */ + for (i = 0; i < 3; ++i) { + res = &phb->mem_resources[i]; + if (!(res->flags & IORESOURCE_MEM)) + continue; + release_resource(res); + } + + /* Free pci_controller data structure */ + pcibios_free_controller(phb); + + return 0; +} +EXPORT_SYMBOL_GPL(remove_phb_dynamic); diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index 9c2a22fed18b..4e3e0382c16e 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -14,6 +14,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#undef DEBUG + #include #include #include @@ -151,20 +154,20 @@ static void dlpar_pci_add_bus(struct device_node *dn) return; } + /* Scan below the new bridge */ if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) of_scan_pci_bridge(dn, dev); - pcibios_fixup_new_pci_devices(dev->subordinate); - - /* Claim new bus resources */ - pcibios_claim_one_bus(dev->bus); - /* Map IO space for child bus, which may or may not succeed */ pcibios_map_io_space(dev->subordinate); - /* Add new devices to global lists. Register in proc, sysfs. */ - pci_bus_add_devices(phb->bus); + /* Finish adding it : resource allocation, adding devices, etc... + * Note that we need to perform the finish pass on the -parent- + * bus of the EADS bridge so the bridge device itself gets + * properly added + */ + pcibios_finish_adding_to_bus(phb->bus); } static int dlpar_add_pci_slot(char *drc_name, struct device_node *dn) @@ -203,27 +206,6 @@ static int dlpar_add_pci_slot(char *drc_name, struct device_node *dn) return 0; } -static int dlpar_remove_root_bus(struct pci_controller *phb) -{ - struct pci_bus *phb_bus; - int rc; - - phb_bus = phb->bus; - if (!(list_empty(&phb_bus->children) && - list_empty(&phb_bus->devices))) { - return -EBUSY; - } - - rc = pcibios_remove_root_bus(phb); - if (rc) - return -EIO; - - device_unregister(phb_bus->bridge); - pci_remove_bus(phb_bus); - - return 0; -} - static int dlpar_remove_phb(char *drc_name, struct device_node *dn) { struct slot *slot; @@ -235,18 +217,15 @@ static int dlpar_remove_phb(char *drc_name, struct device_node *dn) /* If pci slot is hotplugable, use hotplug to remove it */ slot = find_php_slot(dn); - if (slot) { - if (rpaphp_deregister_slot(slot)) { - printk(KERN_ERR - "%s: unable to remove hotplug slot %s\n", - __func__, drc_name); - return -EIO; - } + if (slot && rpaphp_deregister_slot(slot)) { + printk(KERN_ERR "%s: unable to remove hotplug slot %s\n", + __func__, drc_name); + return -EIO; } pdn = dn->data; BUG_ON(!pdn || !pdn->phb); - rc = dlpar_remove_root_bus(pdn->phb); + rc = remove_phb_dynamic(pdn->phb); if (rc < 0) return rc; @@ -378,26 +357,38 @@ int dlpar_remove_pci_slot(char *drc_name, struct device_node *dn) if (!bus) return -EINVAL; - /* If pci slot is hotplugable, use hotplug to remove it */ + pr_debug("PCI: Removing PCI slot below EADS bridge %s\n", + bus->self ? pci_name(bus->self) : ""); + slot = find_php_slot(dn); if (slot) { + pr_debug("PCI: Removing hotplug slot for %04x:%02x...\n", + pci_domain_nr(bus), bus->number); + if (rpaphp_deregister_slot(slot)) { printk(KERN_ERR "%s: unable to remove hotplug slot %s\n", __func__, drc_name); return -EIO; } - } else - pcibios_remove_pci_devices(bus); + } + + /* Remove all devices below slot */ + pcibios_remove_pci_devices(bus); + /* Unmap PCI IO space */ if (pcibios_unmap_io_space(bus)) { printk(KERN_ERR "%s: failed to unmap bus range\n", __func__); return -ERANGE; } + /* Remove the EADS bridge device itself */ BUG_ON(!bus->self); + pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self)); + eeh_remove_bus_device(bus->self); pci_remove_bus_device(bus->self); + return 0; } -- cgit 1.4.1 From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:31 -0800 Subject: sparse irq_desc[] array: core kernel and x86 changes Impact: new feature Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with NR_CPUS set to large values. The goal is to be able to scale up to much larger NR_IRQS value without impacting the (important) common case. To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of irq_desc pointers. When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc, this also makes the IRQ descriptors NUMA-local (to the site that calls request_irq()). This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now uses desc->chip_data for x86 to store irq_cfg. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 10 ++ arch/x86/include/asm/irq_vectors.h | 9 ++ arch/x86/kernel/io_apic.c | 301 +++++++++++++++++++++++-------------- arch/x86/kernel/irq.c | 3 + arch/x86/kernel/irq_32.c | 2 + arch/x86/kernel/irq_64.c | 2 + arch/x86/kernel/irqinit_32.c | 1 - arch/x86/kernel/irqinit_64.c | 1 - drivers/char/random.c | 22 +-- drivers/pci/intr_remapping.c | 76 +++++++++- drivers/xen/events.c | 12 +- fs/proc/stat.c | 17 ++- include/linux/interrupt.h | 2 + include/linux/irq.h | 54 ++++++- include/linux/irqnr.h | 14 +- include/linux/kernel_stat.h | 14 +- include/linux/random.h | 51 +++++++ init/main.c | 11 ++ kernel/irq/autoprobe.c | 15 ++ kernel/irq/chip.c | 3 +- kernel/irq/handle.c | 181 +++++++++++++++++++++- kernel/irq/proc.c | 6 +- kernel/irq/spurious.c | 5 + 23 files changed, 649 insertions(+), 163 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7719f7..48ac688de3cd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -238,6 +238,16 @@ config X86_HAS_BOOT_CPU_ID def_bool y depends on X86_VOYAGER +config SPARSE_IRQ + bool "Support sparse irq numbering" + depends on PCI_MSI || HT_IRQ + default y + help + This enables support for sparse irq, esp for msi/msi-x. You may need + if you have lots of cards supports msi-x installed. + + If you don't know what to do here, say Y. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f941..bb6b69a6b125 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,11 +102,20 @@ #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) + +#ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif +#else +# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) +# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif +#endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9043251210fb..9de17f5c1125 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -108,8 +108,33 @@ static int __init parse_noapic(char *str) early_param("noapic", parse_noapic); struct irq_pin_list; + +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +{ + struct irq_pin_list *pin; + int node; + + node = cpu_to_node(cpu); + + pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node); + + return pin; +} + struct irq_cfg { - unsigned int irq; struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; @@ -119,83 +144,93 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +#ifdef CONFIG_SPARSE_IRQ +static struct irq_cfg irq_cfgx[] = { +#else static struct irq_cfg irq_cfgx[NR_IRQS] = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, +#endif + [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -#define for_each_irq_cfg(irq, cfg) \ - for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) - -static struct irq_cfg *irq_cfg(unsigned int irq) +void __init arch_early_irq_init(void) { - return irq < nr_irqs ? irq_cfgx + irq : NULL; -} + struct irq_cfg *cfg; + struct irq_desc *desc; + int count; + int i; -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - return irq_cfg(irq); -} + cfg = irq_cfgx; + count = ARRAY_SIZE(irq_cfgx); -/* - * Rough estimation of how many shared IRQs there are, can be changed - * anytime. - */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + for (i = 0; i < count; i++) { + desc = irq_to_desc(i); + desc->chip_data = &cfg[i]; + } +} -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ +#ifdef CONFIG_SPARSE_IRQ +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + struct irq_cfg *cfg = NULL; + struct irq_desc *desc; -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; + desc = irq_to_desc(irq); + if (desc) + cfg = desc->chip_data; -static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; -static struct irq_pin_list *irq_2_pin_ptr; + return cfg; +} -static void __init irq_2_pin_init(void) +static struct irq_cfg *get_one_free_irq_cfg(int cpu) { - struct irq_pin_list *pin = irq_2_pin_head; - int i; + struct irq_cfg *cfg; + int node; + + node = cpu_to_node(cpu); - for (i = 1; i < PIN_MAP_SIZE; i++) - pin[i-1].next = &pin[i]; + cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); - irq_2_pin_ptr = &pin[0]; + return cfg; } -static struct irq_pin_list *get_one_free_irq_2_pin(void) +void arch_init_chip_data(struct irq_desc *desc, int cpu) { - struct irq_pin_list *pin = irq_2_pin_ptr; + struct irq_cfg *cfg; - if (!pin) - panic("can not get more irq_2_pin\n"); + cfg = desc->chip_data; + if (!cfg) { + desc->chip_data = get_one_free_irq_cfg(cpu); + if (!desc->chip_data) { + printk(KERN_ERR "can not alloc irq_cfg\n"); + BUG_ON(1); + } + } +} - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; +#else +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + return irq < nr_irqs ? irq_cfgx + irq : NULL; } +#endif + struct io_apic { unsigned int index; unsigned int unused[3]; @@ -397,16 +432,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) +static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin) { - struct irq_cfg *cfg; struct irq_pin_list *entry; + struct irq_cfg *cfg = irq_cfg(irq); - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(); + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", + apic, pin); + return; + } cfg->irq_2_pin = entry; entry->apic = apic; entry->pin = pin; @@ -421,7 +459,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(); + entry->next = get_one_free_irq_2_pin(cpu); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -430,7 +468,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq(unsigned int irq, +static void __init replace_pin_at_irq(unsigned int irq, int cpu, int oldapic, int oldpin, int newapic, int newpin) { @@ -451,7 +489,7 @@ static void __init replace_pin_at_irq(unsigned int irq, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq(irq, newapic, newpin); + add_pin_to_irq_cpu(irq, cpu, newapic, newpin); } static inline void io_apic_modify_irq(unsigned int irq, @@ -1162,9 +1200,13 @@ void __setup_vector_irq(int cpu) /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; + struct irq_desc *desc; /* Mark the inuse vectors */ - for_each_irq_cfg(irq, cfg) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + cfg = desc->chip_data; if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; @@ -1356,6 +1398,8 @@ static void __init setup_IO_APIC_irqs(void) { int apic, pin, idx, irq; int notcon = 0; + struct irq_desc *desc; + int cpu = boot_cpu_id; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1387,7 +1431,12 @@ static void __init setup_IO_APIC_irqs(void) if (multi_timer_check(apic, irq)) continue; #endif - add_pin_to_irq(irq, apic, pin); + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; + } + add_pin_to_irq_cpu(irq, cpu, apic, pin); setup_IO_APIC_irq(apic, pin, irq, irq_trigger(idx), irq_polarity(idx)); @@ -1448,6 +1497,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; + struct irq_desc *desc; unsigned int irq; if (apic_verbosity == APIC_QUIET) @@ -1537,8 +1587,13 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(irq, cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; + for_each_irq_desc(irq, desc) { + struct irq_pin_list *entry; + + if (!desc) + continue; + cfg = desc->chip_data; + entry = cfg->irq_2_pin; if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); @@ -2022,6 +2077,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; unsigned long flags; + struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); if (irq < 16) { @@ -2029,6 +2085,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) if (i8259A_irq_pending(irq)) was_pending = 1; } + cfg = irq_cfg(irq); __unmask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2178,6 +2235,9 @@ static void ir_irq_migration(struct work_struct *work) struct irq_desc *desc; for_each_irq_desc(irq, desc) { + if (!desc) + continue; + if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -2229,6 +2289,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; + if (irq == -1) + continue; + desc = irq_to_desc(irq); if (!desc) continue; @@ -2430,8 +2493,12 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for_each_irq_cfg(irq, cfg) { - if (IO_APIC_IRQ(irq) && !cfg->vector) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + + cfg = desc->chip_data; + if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2439,11 +2506,9 @@ static inline void init_IO_APIC_traps(void) */ if (irq < 16) make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); + else /* Strange. Oh, well.. */ desc->chip = &no_irq_chip; - } } } } @@ -2654,7 +2719,7 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); + add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } unmask_IO_APIC_irq(0); @@ -2683,7 +2748,7 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); unmask_IO_APIC_irq(0); enable_8259A_irq(0); @@ -2902,21 +2967,25 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned int irq; unsigned int new; unsigned long flags; - struct irq_cfg *cfg_new; - - irq_want = nr_irqs - 1; + struct irq_cfg *cfg_new = NULL; + int cpu = boot_cpu_id; + struct irq_desc *desc_new = NULL; irq = 0; spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) + + desc_new = irq_to_desc_alloc_cpu(new, cpu); + if (!desc_new) { + printk(KERN_INFO "can not get irq_desc for %d\n", new); + continue; + } + cfg_new = desc_new->chip_data; + + if (cfg_new->vector != 0) continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; @@ -2925,6 +2994,9 @@ unsigned int create_irq_nr(unsigned int irq_want) if (irq > 0) { dynamic_irq_init(irq); + /* restore it, in case dynamic_irq_init clear it */ + if (desc_new) + desc_new->chip_data = cfg_new; } return irq; } @@ -2944,8 +3016,16 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; + struct irq_cfg *cfg; + struct irq_desc *desc; + /* store it, in case dynamic_irq_cleanup clear it */ + desc = irq_to_desc(irq); + cfg = desc->chip_data; dynamic_irq_cleanup(irq); + /* connect back irq_cfg */ + if (desc) + desc->chip_data = cfg; #ifdef CONFIG_INTR_REMAP free_irte(irq); @@ -3195,26 +3275,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) return 0; } -static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) -{ - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; -} - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) { unsigned int irq; int ret; unsigned int irq_want; - irq_want = build_irq_for_pci_dev(dev) + 0x100; - + irq_want = nr_irqs - 1; irq = create_irq_nr(irq_want); if (irq == 0) return -1; @@ -3228,7 +3295,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) goto error; no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) { destroy_irq(irq); return ret; @@ -3246,7 +3313,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; int ret, sub_handle; - struct msi_desc *desc; + struct msi_desc *msidesc; unsigned int irq_want; #ifdef CONFIG_INTR_REMAP @@ -3254,10 +3321,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int index = 0; #endif - irq_want = build_irq_for_pci_dev(dev) + 0x100; + irq_want = nr_irqs - 1; sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want); + irq_want--; if (irq == 0) return -1; #ifdef CONFIG_INTR_REMAP @@ -3289,7 +3357,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) goto error; sub_handle++; @@ -3707,17 +3775,29 @@ int __init io_apic_get_version(int ioapic) int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { + struct irq_desc *desc; + struct irq_cfg *cfg; + int cpu = boot_cpu_id; + if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc %d\n", irq); + return 0; + } + /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); + if (irq >= 16) { + cfg = desc->chip_data; + add_pin_to_irq_cpu(irq, cpu, ioapic, pin); + } setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); @@ -3773,7 +3853,8 @@ void __init setup_ioapic_dest(void) * when you have too many devices, because at that time only boot * cpu is online. */ - cfg = irq_cfg(irq); + desc = irq_to_desc(irq); + cfg = desc->chip_data; if (!cfg->vector) { setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), @@ -3785,7 +3866,6 @@ void __init setup_ioapic_dest(void) /* * Honour affinities which have been set in early boot */ - desc = irq_to_desc(irq); if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) mask = desc->affinity; @@ -3846,7 +3926,6 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; - irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc52f649..3f1d9d18df67 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v) } desc = irq_to_desc(i); + if (!desc) + return 0; + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a51382672de0..119fc9c8ff7f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map) for_each_irq_desc(irq, desc) { cpumask_t mask; + if (!desc) + continue; if (irq == 2) continue; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..900009c70591 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map) int break_affinity = 0; int set_affinity = 1; + if (!desc) + continue; if (irq == 2) continue; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e80..5a5651b7f9e6 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -69,7 +69,6 @@ void __init init_ISA_irqs (void) * 16 old-style INTA-cycle interrupts: */ for (i = 0; i < 16; i++) { - /* first time call this irq_desc */ struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff0235391285..cd9f42d028d9 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -143,7 +143,6 @@ void __init init_ISA_irqs(void) init_8259A(0); for (i = 0; i < 16; i++) { - /* first time call this irq_desc */ struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/drivers/char/random.c b/drivers/char/random.c index 675076f5fca8..d26891bfcd41 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -558,23 +558,9 @@ struct timer_rand_state { unsigned dont_count_entropy:1; }; -static struct timer_rand_state *irq_timer_state[NR_IRQS]; - -static struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - if (irq >= nr_irqs) - return NULL; - - return irq_timer_state[irq]; -} - -static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) -{ - if (irq >= nr_irqs) - return; - - irq_timer_state[irq] = state; -} +#ifndef CONFIG_SPARSE_IRQ +struct timer_rand_state *irq_timer_state[NR_IRQS]; +#endif static struct timer_rand_state input_timer_state; @@ -933,8 +919,10 @@ void rand_initialize_irq(int irq) { struct timer_rand_state *state; +#ifndef CONFIG_SPARSE_IRQ if (irq >= nr_irqs) return; +#endif state = get_timer_rand_state(irq); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 2de5a3238c94..c9958ec5e25e 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -19,17 +19,75 @@ struct irq_2_iommu { u8 irte_mask; }; -static struct irq_2_iommu irq_2_iommuX[NR_IRQS]; +#ifdef CONFIG_SPARSE_IRQ +static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) +{ + struct irq_2_iommu *iommu; + int node; + + node = cpu_to_node(cpu); + + iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); + printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); + + return iommu; +} static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { - return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + if (WARN_ON_ONCE(!desc)) + return NULL; + + return desc->irq_2_iommu; +} + +static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) +{ + struct irq_desc *desc; + struct irq_2_iommu *irq_iommu; + + /* + * alloc irq desc if not allocated already. + */ + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + return NULL; + } + + irq_iommu = desc->irq_2_iommu; + + if (!irq_iommu) + desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); + + return desc->irq_2_iommu; } +static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) +{ + return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); +} + +#else /* !CONFIG_SPARSE_IRQ */ + +static struct irq_2_iommu irq_2_iommuX[NR_IRQS]; + +static struct irq_2_iommu *irq_2_iommu(unsigned int irq) +{ + if (irq < nr_irqs) + return &irq_2_iommuX[irq]; + + return NULL; +} static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) { return irq_2_iommu(irq); } +#endif static DEFINE_SPINLOCK(irq_2_ir_lock); @@ -86,9 +144,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) if (!count) return -1; +#ifndef CONFIG_SPARSE_IRQ /* protect irq_2_iommu_alloc later */ if (irq >= nr_irqs) return -1; +#endif /* * start the IRTE search from index 0. @@ -130,6 +190,12 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) table->base[i].present = 1; irq_iommu = irq_2_iommu_alloc(irq); + if (!irq_iommu) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate irq_2_iommu\n"); + return -1; + } + irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -177,6 +243,12 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) irq_iommu = irq_2_iommu_alloc(irq); + if (!irq_iommu) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate irq_2_iommu\n"); + return -1; + } + irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = subhandle; diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 1e3b934a4cf7..2924faa7f6c4 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -141,8 +141,12 @@ static void init_evtchn_cpu_bindings(void) int i; /* By default all event channels notify CPU#0. */ - for_each_irq_desc(i, desc) + for_each_irq_desc(i, desc) { + if (!desc) + continue; + desc->affinity = cpumask_of_cpu(0); + } #endif memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); @@ -231,7 +235,7 @@ static int find_unbound_irq(void) int irq; /* Only allocate from dynirq range */ - for_each_irq_nr(irq) + for (irq = 0; irq < nr_irqs; irq++) if (irq_bindcount[irq] == 0) break; @@ -792,7 +796,7 @@ void xen_irq_resume(void) mask_evtchn(evtchn); /* No IRQ <-> event-channel mappings. */ - for_each_irq_nr(irq) + for (irq = 0; irq < nr_irqs; irq++) irq_info[irq].evtchn = 0; /* zap event-channel binding */ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) @@ -824,7 +828,7 @@ void __init xen_init_IRQ(void) mask_evtchn(i); /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for_each_irq_nr(i) + for (i = 0; i < nr_irqs; i++) irq_bindcount[i] = 0; irq_ctx_init(smp_processor_id()); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 81904f07679d..a13431ab7c65 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,6 +27,7 @@ static int show_stat(struct seq_file *p, void *v) u64 sum = 0; struct timespec boottime; unsigned int per_irq_sum; + struct irq_desc *desc; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -44,10 +45,11 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - - for_each_irq_nr(j) + for_each_irq_desc(j, desc) { + if (!desc) + continue; sum += kstat_irqs_cpu(j, i); - + } sum += arch_irq_stat_cpu(i); } sum += arch_irq_stat(); @@ -90,11 +92,14 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { + for (j = 0; j < NR_IRQS; j++) { + desc = irq_to_desc(j); per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); + if (desc) { + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); + } seq_printf(p, " %u", per_irq_sum); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f58a0cf8929a..79e915e7e8a5 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,8 @@ #include #include +extern int nr_irqs; + /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When diff --git a/include/linux/irq.h b/include/linux/irq.h index 3dddfa703ebd..63b00439d4d2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -129,6 +129,8 @@ struct irq_chip { const char *typename; }; +struct timer_rand_state; +struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @irq: interrupt number for this descriptor @@ -154,6 +156,13 @@ struct irq_chip { */ struct irq_desc { unsigned int irq; +#ifdef CONFIG_SPARSE_IRQ + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; +# ifdef CONFIG_INTR_REMAP + struct irq_2_iommu *irq_2_iommu; +# endif +#endif irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; @@ -181,14 +190,52 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; +extern void early_irq_init(void); +extern void arch_early_irq_init(void); +extern void arch_init_chip_data(struct irq_desc *desc, int cpu); +extern void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu); +extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); + +#ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < nr_irqs) ? irq_desc + irq : NULL; + return (irq < NR_IRQS) ? irq_desc + irq : NULL; +} +static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +{ + return irq_to_desc(irq); } +#ifdef CONFIG_GENERIC_HARDIRQS +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ + irq >= 0; irq--, desc--) +#endif + +#else + +extern struct irq_desc *irq_to_desc(unsigned int irq); +extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); + +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq)) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq)) + +#define kstat_irqs_this_cpu(DESC) \ + ((DESC)->kstat_irqs[smp_processor_id()]) +#define kstat_incr_irqs_this_cpu(irqno, DESC) \ + ((DESC)->kstat_irqs[smp_processor_id()]++) + +#endif + /* * Migration helpers for obsolete names, they will go away: */ @@ -380,6 +427,11 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #define get_irq_data(irq) (irq_to_desc(irq)->handler_data) #define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) +#define get_irq_desc_chip(desc) ((desc)->chip) +#define get_irq_desc_chip_data(desc) ((desc)->chip_data) +#define get_irq_desc_data(desc) ((desc)->handler_data) +#define get_irq_desc_msi(desc) ((desc)->msi_desc) + #endif /* CONFIG_GENERIC_HARDIRQS */ #endif /* !CONFIG_S390 */ diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 452c280c8115..7a299e989f8b 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -7,18 +7,10 @@ # define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) -#else -extern int nr_irqs; -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) - -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ - irq >= 0; irq--, desc--) +static inline early_sparse_irq_init(void) +{ +} #endif -#define for_each_irq_nr(irq) \ - for (irq = 0; irq < nr_irqs; irq++) - #endif diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4a145caeee07..4ee4b3d2316f 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,7 +28,9 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; - unsigned int irqs[NR_IRQS]; +#ifndef CONFIG_SPARSE_IRQ + unsigned int irqs[NR_IRQS]; +#endif }; DECLARE_PER_CPU(struct kernel_stat, kstat); @@ -39,6 +41,10 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); extern unsigned long long nr_context_switches(void); +#ifndef CONFIG_SPARSE_IRQ +#define kstat_irqs_this_cpu(irq) \ + (kstat_this_cpu.irqs[irq]) + struct irq_desc; static inline void kstat_incr_irqs_this_cpu(unsigned int irq, @@ -46,11 +52,17 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, { kstat_this_cpu.irqs[irq]++; } +#endif + +#ifndef CONFIG_SPARSE_IRQ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { return kstat_cpu(cpu).irqs[irq]; } +#else +extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); +#endif /* * Number of interrupts per specific IRQ source, since bootup diff --git a/include/linux/random.h b/include/linux/random.h index 36f125c0c603..ad9daa2374d5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -44,6 +44,57 @@ struct rand_pool_info { extern void rand_initialize_irq(int irq); +struct timer_rand_state; +#ifndef CONFIG_SPARSE_IRQ + +extern struct timer_rand_state *irq_timer_state[]; + +extern int nr_irqs; +static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) +{ + if (irq >= nr_irqs) + return NULL; + + return irq_timer_state[irq]; +} + +static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) +{ + if (irq >= nr_irqs) + return; + + irq_timer_state[irq] = state; +} + +#else + +#include +static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + if (!desc) + return NULL; + + return desc->timer_rand_state; +} + +static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + if (!desc) + return; + + desc->timer_rand_state = state; +} +#endif + + extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); extern void add_interrupt_randomness(int irq); diff --git a/init/main.c b/init/main.c index 7e117a231af1..c1f999a3cf31 100644 --- a/init/main.c +++ b/init/main.c @@ -539,6 +539,15 @@ void __init __weak thread_info_cache_init(void) { } +void __init __weak arch_early_irq_init(void) +{ +} + +void __init __weak early_irq_init(void) +{ + arch_early_irq_init(); +} + asmlinkage void __init start_kernel(void) { char * command_line; @@ -603,6 +612,8 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); rcu_init(); + /* init some links before init_ISA_irqs() */ + early_irq_init(); init_IRQ(); pidhash_init(); init_timers(); diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index cc0f7321b8ce..650ce4102a63 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -40,6 +40,9 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { /* @@ -68,6 +71,9 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { desc->status |= IRQ_AUTODETECT | IRQ_WAITING; @@ -86,6 +92,9 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); status = desc->status; @@ -124,6 +133,9 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); status = desc->status; @@ -166,6 +178,9 @@ int probe_irq_off(unsigned long val) unsigned int status; for_each_irq_desc(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); status = desc->status; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 10b5092e9bfe..8e4fce4a1b1f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -24,9 +24,10 @@ */ void dynamic_irq_init(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; unsigned long flags; + desc = irq_to_desc(irq); if (!desc) { WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); return; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c815b42d0f5b..96ca203eb51b 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -15,9 +15,16 @@ #include #include #include +#include +#include #include "internals.h" +/* + * lockdep: we want to handle all irq_desc locks as a single lock-class: + */ +static struct lock_class_key irq_desc_lock_class; + /** * handle_bad_irq - handle spurious and unhandled irqs * @irq: the interrupt number @@ -49,6 +56,155 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); +void __init __attribute__((weak)) arch_early_irq_init(void) +{ +} + +#ifdef CONFIG_SPARSE_IRQ +static struct irq_desc irq_desc_init = { + .irq = -1, + .status = IRQ_DISABLED, + .chip = &no_irq_chip, + .handle_irq = handle_bad_irq, + .depth = 1, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), +#ifdef CONFIG_SMP + .affinity = CPU_MASK_ALL +#endif +}; + +static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +{ + unsigned long bytes; + char *ptr; + int node; + + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + node = cpu_to_node(cpu); + ptr = kzalloc_node(bytes, GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node); + + if (ptr) + desc->kstat_irqs = (unsigned int *)ptr; +} + +void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) +{ +} + +static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) +{ + memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); + desc->irq = irq; +#ifdef CONFIG_SMP + desc->cpu = cpu; +#endif + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_kstat_irqs(desc, cpu, nr_cpu_ids); + if (!desc->kstat_irqs) { + printk(KERN_ERR "can not alloc kstat_irqs\n"); + BUG_ON(1); + } + arch_init_chip_data(desc, cpu); +} + +/* + * Protect the sparse_irqs: + */ +static DEFINE_SPINLOCK(sparse_irq_lock); + +struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; + +static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = { + [0 ... 15] = { + .irq = -1, + .status = IRQ_DISABLED, + .chip = &no_irq_chip, + .handle_irq = handle_bad_irq, + .depth = 1, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), +#ifdef CONFIG_SMP + .affinity = CPU_MASK_ALL +#endif + } +}; + +/* FIXME: use bootmem alloc ...*/ +static unsigned int kstat_irqs_legacy[16][NR_CPUS]; + +void __init early_irq_init(void) +{ + struct irq_desc *desc; + int legacy_count; + int i; + + desc = irq_desc_legacy; + legacy_count = ARRAY_SIZE(irq_desc_legacy); + + for (i = 0; i < legacy_count; i++) { + desc[i].irq = i; + desc[i].kstat_irqs = kstat_irqs_legacy[i]; + + irq_desc_ptrs[i] = desc + i; + } + + for (i = legacy_count; i < NR_IRQS; i++) + irq_desc_ptrs[i] = NULL; + + arch_early_irq_init(); +} + +struct irq_desc *irq_to_desc(unsigned int irq) +{ + return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL; +} + +struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +{ + struct irq_desc *desc; + unsigned long flags; + int node; + + if (irq >= NR_IRQS) { + printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n", + irq, NR_IRQS); + WARN_ON(1); + return NULL; + } + + desc = irq_desc_ptrs[irq]; + if (desc) + return desc; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + if (desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not alloc irq_desc\n"); + BUG_ON(1); + } + init_one_irq_desc(irq, desc, cpu); + + irq_desc_ptrs[irq] = desc; + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +#else + struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { [0 ... NR_IRQS-1] = { .status = IRQ_DISABLED, @@ -62,6 +218,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { } }; +#endif + /* * What should we do if we get a hw irq event on an illegal vector? * Each architecture has to answer this themself. @@ -261,17 +419,28 @@ out: #ifdef CONFIG_TRACE_IRQFLAGS -/* - * lockdep: we want to handle all irq_desc locks as a single lock-class: - */ -static struct lock_class_key irq_desc_lock_class; - void early_init_irq_lock_class(void) { +#ifndef CONFIG_SPARSE_IRQ struct irq_desc *desc; int i; - for_each_irq_desc(i, desc) + for_each_irq_desc(i, desc) { + if (!desc) + continue; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + } +#endif +} +#endif + +#ifdef CONFIG_SPARSE_IRQ +unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) +{ + struct irq_desc *desc = irq_to_desc(irq); + return desc->kstat_irqs[cpu]; } #endif +EXPORT_SYMBOL(kstat_irqs_cpu); + diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d257e7d6a8a4..f6b3440f05bc 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -243,7 +243,11 @@ void init_irq_proc(void) /* * Create entries for all existing IRQs. */ - for_each_irq_desc(irq, desc) + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + register_irq_proc(irq, desc); + } } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index dd364c11e56e..3738107531fd 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -91,6 +91,9 @@ static int misrouted_irq(int irq) int i, ok = 0; for_each_irq_desc(i, desc) { + if (!desc) + continue; + if (!i) continue; @@ -112,6 +115,8 @@ static void poll_spurious_irqs(unsigned long dummy) for_each_irq_desc(i, desc) { unsigned int status; + if (!desc) + continue; if (!i) continue; -- cgit 1.4.1 From 3145e941fcfe2548fa2270afb1a05bab3a6bc418 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:34 -0800 Subject: x86, MSI: pass irq_cfg and irq_desc Impact: simplify code Pass irq_desc and cfg around, instead of raw IRQ numbers - this way we dont have to look it up again and again. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 318 ++++++++++++++++++++++++++-------------------- drivers/pci/msi.c | 55 +++++--- include/linux/msi.h | 3 + 3 files changed, 222 insertions(+), 154 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 0dcde74abd1d..a1a2e070f31a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -231,6 +231,10 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif +static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ +} + struct io_apic { unsigned int index; unsigned int unused[3]; @@ -272,11 +276,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } -static bool io_apic_level_ack_pending(unsigned int irq) +static bool io_apic_level_ack_pending(struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); entry = cfg->irq_2_pin; @@ -358,13 +361,12 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; + u8 vector = cfg->vector; - cfg = irq_cfg(irq); entry = cfg->irq_2_pin; for (;;) { unsigned int reg; @@ -394,24 +396,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) } } -static int assign_irq_vector(int irq, cpumask_t mask); +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; unsigned long flags; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; + unsigned int irq; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, mask)) + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); /* @@ -419,12 +424,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ dest = SET_APIC_LOGICAL_ID(dest); - desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); + __target_IO_APIC_irq(irq, dest, cfg); desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } + +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + set_ioapic_affinity_irq_desc(desc, mask); +} #endif /* CONFIG_SMP */ /* @@ -432,10 +445,9 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin) +static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) { struct irq_pin_list *entry; - struct irq_cfg *cfg = irq_cfg(irq); entry = cfg->irq_2_pin; if (!entry) { @@ -468,11 +480,10 @@ static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq(unsigned int irq, int cpu, +static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_cfg *cfg = irq_cfg(irq); struct irq_pin_list *entry = cfg->irq_2_pin; int replaced = 0; @@ -489,18 +500,16 @@ static void __init replace_pin_at_irq(unsigned int irq, int cpu, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq_cpu(irq, cpu, newapic, newpin); + add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); } -static inline void io_apic_modify_irq(unsigned int irq, +static inline void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { int pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; - cfg = irq_cfg(irq); for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { unsigned int reg; pin = entry->pin; @@ -513,9 +522,9 @@ static inline void io_apic_modify_irq(unsigned int irq, } } -static void __unmask_IO_APIC_irq(unsigned int irq) +static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); } #ifdef CONFIG_X86_64 @@ -530,47 +539,64 @@ void io_apic_sync(struct irq_pin_list *entry) readl(&io_apic->data); } -static void __mask_IO_APIC_irq(unsigned int irq) +static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } #else /* CONFIG_X86_32 */ -static void __mask_IO_APIC_irq(unsigned int irq) +static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); } -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, IO_APIC_REDIR_MASKED, NULL); } -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } #endif /* CONFIG_X86_32 */ -static void mask_IO_APIC_irq (unsigned int irq) +static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; + BUG_ON(!cfg); + spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); + __mask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq (unsigned int irq) +static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } +static void mask_IO_APIC_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq_desc(desc); +} +static void unmask_IO_APIC_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + unmask_IO_APIC_irq_desc(desc); +} + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -1072,7 +1098,7 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int __assign_irq_vector(int irq, cpumask_t mask) +static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1088,16 +1114,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask) static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; int cpu; - struct irq_cfg *cfg; - cfg = irq_cfg(irq); + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - old_vector = cfg->vector; if (old_vector) { cpumask_t tmp; @@ -1151,24 +1174,22 @@ next: return -ENOSPC; } -static int assign_irq_vector(int irq, cpumask_t mask) +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { int err; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); + err = __assign_irq_vector(irq, cfg, mask); spin_unlock_irqrestore(&vector_lock, flags); return err; } -static void __clear_irq_vector(int irq) +static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { - struct irq_cfg *cfg; cpumask_t mask; int cpu, vector; - cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; @@ -1257,11 +1278,8 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(int irq, unsigned long trigger) +static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) @@ -1353,7 +1371,7 @@ static int setup_ioapic_entry(int apic, int irq, return 0; } -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, int trigger, int polarity) { struct irq_cfg *cfg; @@ -1363,10 +1381,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, if (!IO_APIC_IRQ(irq)) return; - cfg = irq_cfg(irq); + cfg = desc->chip_data; mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) + if (assign_irq_vector(irq, cfg, mask)) return; cpus_and(mask, cfg->domain, mask); @@ -1383,11 +1401,11 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); return; } - ioapic_register_intr(irq, trigger); + ioapic_register_intr(irq, desc, trigger); if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); @@ -1399,6 +1417,7 @@ static void __init setup_IO_APIC_irqs(void) int apic, pin, idx, irq; int notcon = 0; struct irq_desc *desc; + struct irq_cfg *cfg; int cpu = boot_cpu_id; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1436,9 +1455,10 @@ static void __init setup_IO_APIC_irqs(void) printk(KERN_INFO "can not get irq_desc for %d\n", irq); continue; } - add_pin_to_irq_cpu(irq, cpu, apic, pin); + cfg = desc->chip_data; + add_pin_to_irq_cpu(cfg, cpu, apic, pin); - setup_IO_APIC_irq(apic, pin, irq, + setup_IO_APIC_irq(apic, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); } } @@ -2086,7 +2106,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) was_pending = 1; } cfg = irq_cfg(irq); - __unmask_IO_APIC_irq(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; @@ -2149,35 +2169,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) +static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; - struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; + unsigned int irq; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; + irq = desc->irq; if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - desc = irq_to_desc(irq); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); + __target_IO_APIC_irq(irq, dest, cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2199,14 +2221,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) desc->affinity = mask; } -static int migrate_irq_remapped_level(int irq) +static int migrate_irq_remapped_level_desc(struct irq_desc *desc) { int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); - if (io_apic_level_ack_pending(irq)) { + if (io_apic_level_ack_pending(cfg)) { /* * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse @@ -2218,14 +2240,15 @@ static int migrate_irq_remapped_level(int irq) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; cpus_clear(desc->pending_mask); unmask: - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); + return ret; } @@ -2258,18 +2281,22 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { - struct irq_desc *desc = irq_to_desc(irq); - if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; desc->pending_mask = mask; - migrate_irq_remapped_level(irq); + migrate_irq_remapped_level_desc(desc); return; } - migrate_ioapic_irq(irq, mask); + migrate_ioapic_irq_desc(desc, mask); +} +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + set_ir_ioapic_affinity_irq_desc(desc, mask); } #endif @@ -2313,9 +2340,10 @@ unlock: irq_exit(); } -static void irq_complete_move(unsigned int irq) +static void irq_complete_move(struct irq_desc **descp) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_desc *desc = *descp; + struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; if (likely(!cfg->move_in_progress)) @@ -2333,8 +2361,9 @@ static void irq_complete_move(unsigned int irq) } } #else -static inline void irq_complete_move(unsigned int irq) {} +static inline void irq_complete_move(struct irq_desc **descp) {} #endif + #ifdef CONFIG_INTR_REMAP static void ack_x2apic_level(unsigned int irq) { @@ -2345,11 +2374,14 @@ static void ack_x2apic_edge(unsigned int irq) { ack_x2APIC_irq(); } + #endif static void ack_apic_edge(unsigned int irq) { - irq_complete_move(irq); + struct irq_desc *desc = irq_to_desc(irq); + + irq_complete_move(&desc); move_native_irq(irq); ack_APIC_irq(); } @@ -2358,18 +2390,21 @@ atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { + struct irq_desc *desc = irq_to_desc(irq); + #ifdef CONFIG_X86_32 unsigned long v; int i; #endif + struct irq_cfg *cfg; int do_unmask_irq = 0; - irq_complete_move(irq); + irq_complete_move(&desc); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + if (unlikely(desc->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); } #endif @@ -2393,7 +2428,8 @@ static void ack_apic_level(unsigned int irq) * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - i = irq_cfg(irq)->vector; + cfg = desc->chip_data; + i = cfg->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); #endif @@ -2432,17 +2468,18 @@ static void ack_apic_level(unsigned int irq) * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - if (!io_apic_level_ack_pending(irq)) + cfg = desc->chip_data; + if (!io_apic_level_ack_pending(cfg)) move_masked_irq(irq); - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); } #ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); spin_unlock(&ioapic_lock); } #endif @@ -2533,7 +2570,7 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } -static void ack_lapic_irq (unsigned int irq) +static void ack_lapic_irq(unsigned int irq) { ack_APIC_irq(); } @@ -2545,11 +2582,8 @@ static struct irq_chip lapic_chip __read_mostly = { .ack = ack_lapic_irq, }; -static void lapic_register_intr(int irq) +static void lapic_register_intr(int irq, struct irq_desc *desc) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); @@ -2653,7 +2687,9 @@ int timer_through_8259 __initdata; */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg(0); + struct irq_desc *desc = irq_to_desc(0); + struct irq_cfg *cfg = desc->chip_data; + int cpu = boot_cpu_id; int apic1, pin1, apic2, pin2; unsigned long flags; unsigned int ver; @@ -2668,7 +2704,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); + assign_irq_vector(0, cfg, TARGET_CPUS); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2719,10 +2755,10 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1); + add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -2748,9 +2784,9 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2); + replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2782,7 +2818,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); - lapic_register_intr(0); + lapic_register_intr(0, desc); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); @@ -2986,7 +3022,7 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; - if (__assign_irq_vector(new, TARGET_CPUS) == 0) + if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) irq = new; break; } @@ -3034,7 +3070,7 @@ void destroy_irq(unsigned int irq) free_irte(irq); #endif spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); } @@ -3049,12 +3085,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms unsigned dest; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (err) return err; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -3112,35 +3148,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - read_msi_msg(irq, &msg); + read_msi_msg_desc(desc, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); + write_msi_msg_desc(desc, &msg); desc->affinity = mask; } - #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is @@ -3148,11 +3184,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) */ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -3161,10 +3197,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3188,9 +3226,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) cfg->move_in_progress = 0; } - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif #endif /* CONFIG_SMP */ @@ -3249,7 +3287,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) } #endif -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { int ret; struct msi_msg msg; @@ -3258,7 +3296,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) if (ret < 0) return ret; - set_irq_msi(irq, desc); + set_irq_msi(irq, msidesc); write_msi_msg(irq, &msg); #ifdef CONFIG_INTR_REMAP @@ -3381,20 +3419,22 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3406,9 +3446,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif /* CONFIG_SMP */ struct irq_chip dmar_msi_type = { @@ -3442,8 +3482,8 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_SMP static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; - struct irq_desc *desc; struct msi_msg msg; unsigned int dest; cpumask_t tmp; @@ -3452,10 +3492,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3467,9 +3509,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif /* CONFIG_SMP */ struct irq_chip hpet_msi_type = { @@ -3524,26 +3566,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif static struct irq_chip ht_irq_chip = { @@ -3563,13 +3607,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int err; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (!err) { struct ht_irq_msg msg; unsigned dest; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -3615,7 +3659,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long flags; int err; - err = assign_irq_vector(irq, *eligible_cpu); + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, *eligible_cpu); if (err != 0) return err; @@ -3624,8 +3670,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, irq_name); spin_unlock_irqrestore(&vector_lock, flags); - cfg = irq_cfg(irq); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -3806,10 +3850,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p */ if (irq >= NR_IRQS_LEGACY) { cfg = desc->chip_data; - add_pin_to_irq_cpu(irq, cpu, ioapic, pin); + add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); } - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); return 0; } @@ -3866,7 +3910,7 @@ void __init setup_ioapic_dest(void) desc = irq_to_desc(irq); cfg = desc->chip_data; if (!cfg->vector) { - setup_IO_APIC_irq(ioapic, pin, irq, + setup_IO_APIC_irq(ioapic, pin, irq, desc, irq_trigger(irq_entry), irq_polarity(irq_entry)); continue; @@ -3884,10 +3928,10 @@ void __init setup_ioapic_dest(void) #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, mask); + set_ir_ioapic_affinity_irq_desc(desc, mask); else #endif - set_ioapic_affinity_irq(irq, mask); + set_ioapic_affinity_irq_desc(desc, mask); } } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 74801f7df9c9..11a51f8ed3b3 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -103,11 +103,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable) } } -static void msix_flush_writes(unsigned int irq) +static void msix_flush_writes(struct irq_desc *desc) { struct msi_desc *entry; - entry = get_irq_msi(irq); + entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: @@ -135,11 +135,11 @@ static void msix_flush_writes(unsigned int irq) * Returns 1 if it succeeded in masking the interrupt and 0 if the device * doesn't support MSI masking. */ -static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) +static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) { struct msi_desc *entry; - entry = get_irq_msi(irq); + entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: @@ -172,9 +172,9 @@ static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) return 1; } -void read_msi_msg(unsigned int irq, struct msi_msg *msg) +void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_msi(irq); + struct msi_desc *entry = get_irq_desc_msi(desc); switch(entry->msi_attrib.type) { case PCI_CAP_ID_MSI: { @@ -211,9 +211,16 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) } } -void write_msi_msg(unsigned int irq, struct msi_msg *msg) +void read_msi_msg(unsigned int irq, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_msi(irq); + struct irq_desc *desc = irq_to_desc(irq); + + read_msi_msg_desc(desc, msg); +} + +void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +{ + struct msi_desc *entry = get_irq_desc_msi(desc); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: { @@ -252,21 +259,31 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) entry->msg = *msg; } +void write_msi_msg(unsigned int irq, struct msi_msg *msg) +{ + struct irq_desc *desc = irq_to_desc(irq); + + write_msi_msg_desc(desc, msg); +} + void mask_msi_irq(unsigned int irq) { - msi_set_mask_bits(irq, 1, 1); - msix_flush_writes(irq); + struct irq_desc *desc = irq_to_desc(irq); + + msi_set_mask_bits(desc, 1, 1); + msix_flush_writes(desc); } void unmask_msi_irq(unsigned int irq) { - msi_set_mask_bits(irq, 1, 0); - msix_flush_writes(irq); + struct irq_desc *desc = irq_to_desc(irq); + + msi_set_mask_bits(desc, 1, 0); + msix_flush_writes(desc); } static int msi_free_irqs(struct pci_dev* dev); - static struct msi_desc* alloc_msi_entry(void) { struct msi_desc *entry; @@ -303,9 +320,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); - if (entry->msi_attrib.maskbit) - msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask, + if (entry->msi_attrib.maskbit) { + struct irq_desc *desc = irq_to_desc(dev->irq); + msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask, entry->msi_attrib.masked); + } pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); control &= ~PCI_MSI_FLAGS_QSIZE; @@ -327,8 +346,9 @@ static void __pci_restore_msix_state(struct pci_dev *dev) msix_set_enable(dev, 0); list_for_each_entry(entry, &dev->msi_list, list) { + struct irq_desc *desc = irq_to_desc(entry->irq); write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked); + msi_set_mask_bits(desc, 1, entry->msi_attrib.masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -596,7 +616,8 @@ void pci_msi_shutdown(struct pci_dev* dev) /* Return the the pci reset with msi irqs unmasked */ if (entry->msi_attrib.maskbit) { u32 mask = entry->msi_attrib.maskbits_mask; - msi_set_mask_bits(dev->irq, mask, ~mask); + struct irq_desc *desc = irq_to_desc(dev->irq); + msi_set_mask_bits(desc, mask, ~mask); } if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) return; diff --git a/include/linux/msi.h b/include/linux/msi.h index 8f2939227207..d2b8a1e8ca11 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -10,8 +10,11 @@ struct msi_msg { }; /* Helper functions */ +struct irq_desc; extern void mask_msi_irq(unsigned int irq); extern void unmask_msi_irq(unsigned int irq); +extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); +extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); -- cgit 1.4.1 From 29c0177e6a4ac094302bed54a1d4bbb6b740a9ef Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 13 Dec 2008 21:20:25 +1030 Subject: cpumask: change cpumask_scnprintf, cpumask_parse_user, cpulist_parse, and cpulist_scnprintf to take pointers. Impact: change calling convention of existing cpumask APIs Most cpumask functions started with cpus_: these have been replaced by cpumask_ ones which take struct cpumask pointers as expected. These four functions don't have good replacement names; fortunately they're rarely used, so we just change them over. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar Cc: paulus@samba.org Cc: mingo@redhat.com Cc: tony.luck@intel.com Cc: ralf@linux-mips.org Cc: Greg Kroah-Hartman Cc: cl@linux-foundation.org Cc: srostedt@redhat.com --- arch/ia64/kernel/topology.c | 2 +- arch/mips/kernel/smp-cmp.c | 4 +- arch/powerpc/platforms/pseries/xics.c | 2 +- arch/x86/kernel/cpu/intel_cacheinfo.c | 4 +- arch/x86/kernel/setup_percpu.c | 2 +- drivers/base/cpu.c | 2 +- drivers/base/node.c | 4 +- drivers/base/topology.c | 4 +- drivers/pci/pci-sysfs.c | 4 +- drivers/pci/probe.c | 4 +- include/linux/cpumask.h | 87 +++++++++++++++++++++++------------ kernel/cpuset.c | 4 +- kernel/irq/proc.c | 4 +- kernel/profile.c | 4 +- kernel/sched.c | 4 +- kernel/sched_stats.h | 2 +- kernel/taskstats.c | 2 +- kernel/trace/trace.c | 4 +- mm/slub.c | 2 +- 19 files changed, 86 insertions(+), 59 deletions(-) (limited to 'drivers/pci') diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index c75b914f2d6b..a8d61a3e9a94 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -219,7 +219,7 @@ static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) cpumask_t shared_cpu_map; cpus_and(shared_cpu_map, this_leaf->shared_cpu_map, cpu_online_map); - len = cpumask_scnprintf(buf, NR_CPUS+1, shared_cpu_map); + len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map); len += sprintf(buf+len, "\n"); return len; } diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c index 6789c1a12120..f27beca4b26d 100644 --- a/arch/mips/kernel/smp-cmp.c +++ b/arch/mips/kernel/smp-cmp.c @@ -51,10 +51,10 @@ static int __init allowcpus(char *str) int len; cpus_clear(cpu_allow_map); - if (cpulist_parse(str, cpu_allow_map) == 0) { + if (cpulist_parse(str, &cpu_allow_map) == 0) { cpu_set(0, cpu_allow_map); cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map); - len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map); + len = cpulist_scnprintf(buf, sizeof(buf)-1, &cpu_possible_map); buf[len] = '\0'; pr_debug("Allowable CPUs: %s\n", buf); return 1; diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index e1904774a70f..64d24310ce7e 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -358,7 +358,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) irq_server = get_irq_server(virq, 1); if (irq_server == -1) { char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); + cpumask_scnprintf(cpulist, sizeof(cpulist), &cpumask); printk(KERN_WARNING "%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, virq); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 3f46afbb1cf1..43ea612d3e9d 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -626,8 +626,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, cpumask_t *mask = &this_leaf->shared_cpu_map; n = type? - cpulist_scnprintf(buf, len-2, *mask): - cpumask_scnprintf(buf, len-2, *mask); + cpulist_scnprintf(buf, len-2, mask) : + cpumask_scnprintf(buf, len-2, mask); buf[n++] = '\n'; buf[n] = '\0'; } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ae0c0d3bb770..1c2084291f97 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -282,7 +282,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) else cpu_clear(cpu, *mask); - cpulist_scnprintf(buf, sizeof(buf), *mask); + cpulist_scnprintf(buf, sizeof(buf), mask); printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); } diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 64f5d54f7edc..4259072f5bd0 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -109,7 +109,7 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); */ static ssize_t print_cpus_map(char *buf, cpumask_t *map) { - int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *map); + int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map); buf[n++] = '\n'; buf[n] = '\0'; diff --git a/drivers/base/node.c b/drivers/base/node.c index f5207090885a..91636cd8b6c9 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -30,8 +30,8 @@ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf) BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); len = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, *mask): - cpumask_scnprintf(buf, PAGE_SIZE-2, *mask); + cpulist_scnprintf(buf, PAGE_SIZE-2, mask) : + cpumask_scnprintf(buf, PAGE_SIZE-2, mask); buf[len++] = '\n'; buf[len] = '\0'; return len; diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 199cd97e32e6..a8bc1cbcfa7c 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -49,8 +49,8 @@ static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf) if (len > 1) { n = type? - cpulist_scnprintf(buf, len-2, *mask): - cpumask_scnprintf(buf, len-2, *mask); + cpulist_scnprintf(buf, len-2, mask) : + cpumask_scnprintf(buf, len-2, mask); buf[n++] = '\n'; buf[n] = '\0'; } diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 5d72866897a8..c88485860a0a 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -74,7 +74,7 @@ static ssize_t local_cpus_show(struct device *dev, int len; mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - len = cpumask_scnprintf(buf, PAGE_SIZE-2, mask); + len = cpumask_scnprintf(buf, PAGE_SIZE-2, &mask); buf[len++] = '\n'; buf[len] = '\0'; return len; @@ -88,7 +88,7 @@ static ssize_t local_cpulist_show(struct device *dev, int len; mask = pcibus_to_cpumask(to_pci_dev(dev)->bus); - len = cpulist_scnprintf(buf, PAGE_SIZE-2, mask); + len = cpulist_scnprintf(buf, PAGE_SIZE-2, &mask); buf[len++] = '\n'; buf[len] = '\0'; return len; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 003a9b3c293f..5b3f5937ecf5 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -55,8 +55,8 @@ static ssize_t pci_bus_show_cpuaffinity(struct device *dev, cpumask = pcibus_to_cpumask(to_pci_bus(dev)); ret = type? - cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask): - cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask); + cpulist_scnprintf(buf, PAGE_SIZE-2, &cpumask) : + cpumask_scnprintf(buf, PAGE_SIZE-2, &cpumask); buf[ret++] = '\n'; buf[ret] = '\0'; return ret; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 21e1dd43e52a..94a2ab88ae85 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -339,36 +339,6 @@ extern cpumask_t cpu_mask_all; #endif #define CPUMASK_PTR(v, m) cpumask_t *v = &(m->v) -#define cpumask_scnprintf(buf, len, src) \ - __cpumask_scnprintf((buf), (len), &(src), NR_CPUS) -static inline int __cpumask_scnprintf(char *buf, int len, - const cpumask_t *srcp, int nbits) -{ - return bitmap_scnprintf(buf, len, srcp->bits, nbits); -} - -#define cpumask_parse_user(ubuf, ulen, dst) \ - __cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS) -static inline int __cpumask_parse_user(const char __user *buf, int len, - cpumask_t *dstp, int nbits) -{ - return bitmap_parse_user(buf, len, dstp->bits, nbits); -} - -#define cpulist_scnprintf(buf, len, src) \ - __cpulist_scnprintf((buf), (len), &(src), NR_CPUS) -static inline int __cpulist_scnprintf(char *buf, int len, - const cpumask_t *srcp, int nbits) -{ - return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); -} - -#define cpulist_parse(buf, dst) __cpulist_parse((buf), &(dst), NR_CPUS) -static inline int __cpulist_parse(const char *buf, cpumask_t *dstp, int nbits) -{ - return bitmap_parselist(buf, dstp->bits, nbits); -} - #define cpu_remap(oldbit, old, new) \ __cpu_remap((oldbit), &(old), &(new), NR_CPUS) static inline int __cpu_remap(int oldbit, @@ -945,6 +915,63 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) +/** + * cpumask_scnprintf - print a cpumask into a string as comma-separated hex + * @buf: the buffer to sprintf into + * @len: the length of the buffer + * @srcp: the cpumask to print + * + * If len is zero, returns zero. Otherwise returns the length of the + * (nul-terminated) @buf string. + */ +static inline int cpumask_scnprintf(char *buf, int len, + const struct cpumask *srcp) +{ + return bitmap_scnprintf(buf, len, srcp->bits, nr_cpumask_bits); +} + +/** + * cpumask_parse_user - extract a cpumask from a user string + * @buf: the buffer to extract from + * @len: the length of the buffer + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpumask_parse_user(const char __user *buf, int len, + struct cpumask *dstp) +{ + return bitmap_parse_user(buf, len, dstp->bits, nr_cpumask_bits); +} + +/** + * cpulist_scnprintf - print a cpumask into a string as comma-separated list + * @buf: the buffer to sprintf into + * @len: the length of the buffer + * @srcp: the cpumask to print + * + * If len is zero, returns zero. Otherwise returns the length of the + * (nul-terminated) @buf string. + */ +static inline int cpulist_scnprintf(char *buf, int len, + const struct cpumask *srcp) +{ + return bitmap_scnlistprintf(buf, len, srcp->bits, nr_cpumask_bits); +} + +/** + * cpulist_parse_user - extract a cpumask from a user string of ranges + * @buf: the buffer to extract from + * @len: the length of the buffer + * @dstp: the cpumask to set. + * + * Returns -errno, or 0 for success. + */ +static inline int cpulist_parse(const char *buf, struct cpumask *dstp) +{ + return bitmap_parselist(buf, dstp->bits, nr_cpumask_bits); +} + /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 96c0ba13b8cd..39c1a4c1c5a9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -896,7 +896,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf) if (!*buf) { cpus_clear(trialcs.cpus_allowed); } else { - retval = cpulist_parse(buf, trialcs.cpus_allowed); + retval = cpulist_parse(buf, &trialcs.cpus_allowed); if (retval < 0) return retval; @@ -1482,7 +1482,7 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs) mask = cs->cpus_allowed; mutex_unlock(&callback_mutex); - return cpulist_scnprintf(page, PAGE_SIZE, mask); + return cpulist_scnprintf(page, PAGE_SIZE, &mask); } static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d257e7d6a8a4..f293349d49d0 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -47,7 +47,7 @@ static ssize_t irq_affinity_proc_write(struct file *file, irq_balancing_disabled(irq)) return -EIO; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, &new_value); if (err) return err; @@ -95,7 +95,7 @@ static ssize_t default_affinity_write(struct file *file, cpumask_t new_value; int err; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, &new_value); if (err) return err; diff --git a/kernel/profile.c b/kernel/profile.c index dc41827fbfee..7d620dfdde59 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -442,7 +442,7 @@ void profile_tick(int type) static int prof_cpu_mask_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_scnprintf(page, count, *(cpumask_t *)data); + int len = cpumask_scnprintf(page, count, (cpumask_t *)data); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); @@ -456,7 +456,7 @@ static int prof_cpu_mask_write_proc(struct file *file, unsigned long full_count = count, err; cpumask_t new_value; - err = cpumask_parse_user(buffer, count, new_value); + err = cpumask_parse_user(buffer, count, &new_value); if (err) return err; diff --git a/kernel/sched.c b/kernel/sched.c index e4bb1dd7b308..d2d16d1273b1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6666,7 +6666,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, struct sched_group *group = sd->groups; char str[256]; - cpulist_scnprintf(str, sizeof(str), sd->span); + cpulist_scnprintf(str, sizeof(str), &sd->span); cpus_clear(*groupmask); printk(KERN_DEBUG "%*s domain %d: ", level, "", level); @@ -6720,7 +6720,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpus_or(*groupmask, *groupmask, group->cpumask); - cpulist_scnprintf(str, sizeof(str), group->cpumask); + cpulist_scnprintf(str, sizeof(str), &group->cpumask); printk(KERN_CONT " %s", str); group = group->next; diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 7dbf72a2b02c..6beff1e4eeae 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -42,7 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v) for_each_domain(cpu, sd) { enum cpu_idle_type itype; - cpumask_scnprintf(mask_str, mask_len, sd->span); + cpumask_scnprintf(mask_str, mask_len, &sd->span); seq_printf(seq, "domain%d %s", dcount++, mask_str); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { diff --git a/kernel/taskstats.c b/kernel/taskstats.c index bd6be76303cf..6d7dc4ec4aa5 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -352,7 +352,7 @@ static int parse(struct nlattr *na, cpumask_t *mask) if (!data) return -ENOMEM; nla_strlcpy(data, na, len); - ret = cpulist_parse(data, *mask); + ret = cpulist_parse(data, mask); kfree(data); return ret; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d86e3252f300..d2e75479dc50 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2126,7 +2126,7 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, tracing_cpumask); + len = cpumask_scnprintf(mask_str, count, &tracing_cpumask); if (count - len < 2) { count = -EINVAL; goto out_err; @@ -2147,7 +2147,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, int err, cpu; mutex_lock(&tracing_cpumask_update_lock); - err = cpumask_parse_user(ubuf, count, tracing_cpumask_new); + err = cpumask_parse_user(ubuf, count, &tracing_cpumask_new); if (err) goto err_unlock; diff --git a/mm/slub.c b/mm/slub.c index a2cd47d89e0a..8e516e29f989 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3626,7 +3626,7 @@ static int list_locations(struct kmem_cache *s, char *buf, len < PAGE_SIZE - 60) { len += sprintf(buf + len, " cpus="); len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50, - l->cpus); + &l->cpus); } if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && -- cgit 1.4.1 From 17483a1f34c970e6c2cb8c082d4441bfabbe88a9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 12 Dec 2008 13:14:18 -0800 Subject: sparseirq: fix !SMP building, #2 Impact: build fix make intr_remapping.c to include smp.h, so could use boot_cpu_id there also remove old change that disabling sparseirq with !SMP Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- drivers/pci/intr_remapping.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 29073532f94c..60a008857a38 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -240,7 +240,7 @@ config X86_HAS_BOOT_CPU_ID config SPARSE_IRQ bool "Support sparse irq numbering" - depends on (PCI_MSI || HT_IRQ) && SMP + depends on PCI_MSI || HT_IRQ default y help This enables support for sparse irq, esp for msi/msi-x. You may need diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index c9958ec5e25e..f78371b22529 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "intr_remapping.h" -- cgit 1.4.1 From 824877111cd7f2b4fd2fe6947c5c5cbbb3ac5bd8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Dec 2008 18:32:28 +0530 Subject: x86, pci: move arch/x86/pci/pci.h to arch/x86/include/asm/pci_x86.h Impact: cleanup Now that arch/x86/pci/pci.h is used in a number of other places as well, move the lowlevel x86 pci definitions into the architecture include files. (not to be confused with the existing arch/x86/include/asm/pci.h file, which provides public details about x86 PCI) Tested on: X86_32_UP, X86_32_SMP and X86_64_SMP Signed-off-by: Jaswinder Singh Rajput Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pci_x86.h | 165 +++++++++++++++++++++++++++++++++++++ arch/x86/kernel/mmconf-fam10h_64.c | 3 +- arch/x86/kernel/reboot.c | 3 +- arch/x86/pci/acpi.c | 2 +- arch/x86/pci/amd_bus.c | 2 +- arch/x86/pci/common.c | 3 +- arch/x86/pci/direct.c | 2 +- arch/x86/pci/early.c | 2 +- arch/x86/pci/fixup.c | 3 +- arch/x86/pci/i386.c | 2 +- arch/x86/pci/init.c | 2 +- arch/x86/pci/irq.c | 3 +- arch/x86/pci/legacy.c | 2 +- arch/x86/pci/mmconfig-shared.c | 3 +- arch/x86/pci/mmconfig_32.c | 2 +- arch/x86/pci/mmconfig_64.c | 3 +- arch/x86/pci/numaq_32.c | 2 +- arch/x86/pci/olpc.c | 2 +- arch/x86/pci/pcbios.c | 5 +- arch/x86/pci/pci.h | 162 ------------------------------------ arch/x86/pci/visws.c | 3 +- drivers/pci/hotplug/cpqphp_core.c | 2 +- drivers/pci/hotplug/cpqphp_pci.c | 2 +- drivers/pci/hotplug/ibmphp_core.c | 2 +- 24 files changed, 188 insertions(+), 194 deletions(-) create mode 100644 arch/x86/include/asm/pci_x86.h delete mode 100644 arch/x86/pci/pci.h (limited to 'drivers/pci') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h new file mode 100644 index 000000000000..e60fd3e14bdf --- /dev/null +++ b/arch/x86/include/asm/pci_x86.h @@ -0,0 +1,165 @@ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares + */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_PROBE_MMCONF 0x0008 +#define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 + +#define PCI_NO_CHECKS 0x0400 +#define PCI_USE_PIRQ_MASK 0x0800 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 +#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 +#define PCI_USE__CRS 0x10000 +#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 +#define PCI_HAS_IO_ECS 0x40000 +#define PCI_NOASSIGN_ROMS 0x80000 + +extern unsigned int pci_probe; +extern unsigned long pirq_table_addr; + +enum pci_bf_sort_state { + pci_bf_sort_default, + pci_force_nobf, + pci_force_bf, + pci_dmi_bf, +}; + +/* pci-i386.c */ + +extern unsigned int pcibios_max_latency; + +void pcibios_resource_survey(void); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_bus *pci_root_bus; +extern struct pci_ops pci_root_ops; + +/* pci-irq.c */ + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, + 0 = not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of + interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give 0 */ + struct irq_info slots[0]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +extern int pcibios_scanned; +extern spinlock_t pci_config_lock; + +extern int (*pcibios_enable_irq)(struct pci_dev *dev); +extern void (*pcibios_disable_irq)(struct pci_dev *dev); + +struct pci_raw_ops { + int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *val); + int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val); +}; + +extern struct pci_raw_ops *raw_pci_ops; +extern struct pci_raw_ops *raw_pci_ext_ops; + +extern struct pci_raw_ops pci_direct_conf1; +extern bool port_cf9_safe; + +/* arch_initcall level */ +extern int pci_direct_probe(void); +extern void pci_direct_init(int type); +extern void pci_pcbios_init(void); +extern int pci_olpc_init(void); +extern void __init dmi_check_pciprobe(void); +extern void __init dmi_check_skip_isa_align(void); + +/* some common used subsys_initcalls */ +extern int __init pci_acpi_init(void); +extern int __init pcibios_irq_init(void); +extern int __init pci_visws_init(void); +extern int __init pci_numaq_init(void); +extern int __init pcibios_init(void); + +/* pci-mmconfig.c */ + +extern int __init pci_mmcfg_arch_init(void); +extern void __init pci_mmcfg_arch_free(void); + +/* + * AMD Fam10h CPUs are buggy, and cannot access MMIO config space + * on their northbrige except through the * %eax register. As such, you MUST + * NOT use normal IOMEM accesses, you need to only use the magic mmio-config + * accessor functions. + * In fact just use pci_config_*, nothing else please. + */ +static inline unsigned char mmio_config_readb(void __iomem *pos) +{ + u8 val; + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned short mmio_config_readw(void __iomem *pos) +{ + u16 val; + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned int mmio_config_readl(void __iomem *pos) +{ + u32 val; + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline void mmio_config_writeb(void __iomem *pos, u8 val) +{ + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writew(void __iomem *pos, u16 val) +{ + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writel(void __iomem *pos, u32 val) +{ + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index efc2f361fe85..666e43df51f9 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -13,8 +13,7 @@ #include #include #include - -#include "../pci/pci.h" +#include struct pci_hostbridge_probe { u32 bus; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index b165eb0884ed..a90913cccfb7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 # include @@ -22,8 +23,6 @@ #endif #include -#include "../pci/pci.h" - /* * Power off function, if any diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1d88d2b39771..9e5752fe4d15 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -4,7 +4,7 @@ #include #include #include -#include "pci.h" +#include struct pci_root_info { char *name; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 22e057665e55..9bb09823b362 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -2,7 +2,7 @@ #include #include #include -#include "pci.h" +#include #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index bb1a01f089e2..62ddb73e09ed 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -14,8 +14,7 @@ #include #include #include - -#include "pci.h" +#include unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | PCI_PROBE_MMCONF; diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 9a5af6c8fbe9..bd13c3e4c6db 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -5,7 +5,7 @@ #include #include #include -#include "pci.h" +#include /* * Functions for accessing PCI base (first 256 bytes) and extended diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 86631ccbc25a..f6adf2c6d751 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -2,7 +2,7 @@ #include #include #include -#include "pci.h" +#include /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 2051dc96b8e9..7d388d5cf548 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -6,8 +6,7 @@ #include #include #include -#include "pci.h" - +#include static void __devinit pci_fixup_i450nx(struct pci_dev *d) { diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 844df0cbbd3e..e51bf2cda4b0 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -34,8 +34,8 @@ #include #include +#include -#include "pci.h" static int skip_isa_ioresource_align(struct pci_dev *dev) { diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index d6c950f81858..bec3b048e72b 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -1,6 +1,6 @@ #include #include -#include "pci.h" +#include /* arch_initcall has too random ordering, so call the initializers in the right sequence from here. */ diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index bf69dbe08bff..373b9afe6d44 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -16,8 +16,7 @@ #include #include #include - -#include "pci.h" +#include #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) #define PIRQ_VERSION 0x0100 diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index b722dd481b39..f1065b129e9c 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -3,7 +3,7 @@ */ #include #include -#include "pci.h" +#include /* * Discover remaining PCI buses in case there are peer host bridges. diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 654a2234f8f3..89bf9242c80a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,8 +15,7 @@ #include #include #include - -#include "pci.h" +#include /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f3c761dce695..8b2d561046a3 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -13,7 +13,7 @@ #include #include #include -#include "pci.h" +#include /* Assume systems with more busses have correct MCFG */ #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index a1994163c99d..30007ffc8e11 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -10,8 +10,7 @@ #include #include #include - -#include "pci.h" +#include /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 1177845d3186..2089354968a2 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -7,7 +7,7 @@ #include #include #include -#include "pci.h" +#include #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index e11e9e803d5f..b889d824f7c6 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -29,7 +29,7 @@ #include #include #include -#include "pci.h" +#include /* * In the tables below, the first two line (8 longwords) are the diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 37472fc6f729..b82cae970dfd 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -6,9 +6,8 @@ #include #include #include -#include "pci.h" -#include "pci-functions.h" - +#include +#include /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h deleted file mode 100644 index 1959018aac02..000000000000 --- a/arch/x86/pci/pci.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines. - * - * (c) 1999 Martin Mares - */ - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -#define PCI_PROBE_BIOS 0x0001 -#define PCI_PROBE_CONF1 0x0002 -#define PCI_PROBE_CONF2 0x0004 -#define PCI_PROBE_MMCONF 0x0008 -#define PCI_PROBE_MASK 0x000f -#define PCI_PROBE_NOEARLY 0x0010 - -#define PCI_NO_CHECKS 0x0400 -#define PCI_USE_PIRQ_MASK 0x0800 -#define PCI_ASSIGN_ROMS 0x1000 -#define PCI_BIOS_IRQ_SCAN 0x2000 -#define PCI_ASSIGN_ALL_BUSSES 0x4000 -#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 -#define PCI_USE__CRS 0x10000 -#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 -#define PCI_HAS_IO_ECS 0x40000 -#define PCI_NOASSIGN_ROMS 0x80000 - -extern unsigned int pci_probe; -extern unsigned long pirq_table_addr; - -enum pci_bf_sort_state { - pci_bf_sort_default, - pci_force_nobf, - pci_force_bf, - pci_dmi_bf, -}; - -/* pci-i386.c */ - -extern unsigned int pcibios_max_latency; - -void pcibios_resource_survey(void); - -/* pci-pc.c */ - -extern int pcibios_last_bus; -extern struct pci_bus *pci_root_bus; -extern struct pci_ops pci_root_ops; - -/* pci-irq.c */ - -struct irq_info { - u8 bus, devfn; /* Bus, device and function */ - struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ - u16 bitmap; /* Available IRQs */ - } __attribute__((packed)) irq[4]; - u8 slot; /* Slot number, 0=onboard */ - u8 rfu; -} __attribute__((packed)); - -struct irq_routing_table { - u32 signature; /* PIRQ_SIGNATURE should be here */ - u16 version; /* PIRQ_VERSION */ - u16 size; /* Table size in bytes */ - u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ - u32 miniport_data; /* Crap */ - u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ - struct irq_info slots[0]; -} __attribute__((packed)); - -extern unsigned int pcibios_irq_mask; - -extern int pcibios_scanned; -extern spinlock_t pci_config_lock; - -extern int (*pcibios_enable_irq)(struct pci_dev *dev); -extern void (*pcibios_disable_irq)(struct pci_dev *dev); - -struct pci_raw_ops { - int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 *val); - int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 val); -}; - -extern struct pci_raw_ops *raw_pci_ops; -extern struct pci_raw_ops *raw_pci_ext_ops; - -extern struct pci_raw_ops pci_direct_conf1; -extern bool port_cf9_safe; - -/* arch_initcall level */ -extern int pci_direct_probe(void); -extern void pci_direct_init(int type); -extern void pci_pcbios_init(void); -extern int pci_olpc_init(void); -extern void __init dmi_check_pciprobe(void); -extern void __init dmi_check_skip_isa_align(void); - -/* some common used subsys_initcalls */ -extern int __init pci_acpi_init(void); -extern int __init pcibios_irq_init(void); -extern int __init pci_visws_init(void); -extern int __init pci_numaq_init(void); -extern int __init pcibios_init(void); - -/* pci-mmconfig.c */ - -extern int __init pci_mmcfg_arch_init(void); -extern void __init pci_mmcfg_arch_free(void); - -/* - * AMD Fam10h CPUs are buggy, and cannot access MMIO config space - * on their northbrige except through the * %eax register. As such, you MUST - * NOT use normal IOMEM accesses, you need to only use the magic mmio-config - * accessor functions. - * In fact just use pci_config_*, nothing else please. - */ -static inline unsigned char mmio_config_readb(void __iomem *pos) -{ - u8 val; - asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); - return val; -} - -static inline unsigned short mmio_config_readw(void __iomem *pos) -{ - u16 val; - asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); - return val; -} - -static inline unsigned int mmio_config_readl(void __iomem *pos) -{ - u32 val; - asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); - return val; -} - -static inline void mmio_config_writeb(void __iomem *pos, u8 val) -{ - asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); -} - -static inline void mmio_config_writew(void __iomem *pos, u16 val) -{ - asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); -} - -static inline void mmio_config_writel(void __iomem *pos, u32 val) -{ - asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); -} diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 42f4cb19faca..16d0c0eb0d19 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -9,11 +9,10 @@ #include #include +#include #include #include -#include "pci.h" - static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } static void pci_visws_disable_irq(struct pci_dev *dev) { } diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index 8514c3a1746a..c2e1bcbb28a7 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -45,7 +45,7 @@ #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include /* Global variables */ diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 09021930589f..df146be9d2e9 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -37,7 +37,7 @@ #include "../pci.h" #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include u8 cpqhp_nic_irq; diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 633e743442ac..dd18f857dfb0 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -35,7 +35,7 @@ #include #include #include "../pci.h" -#include "../../../arch/x86/pci/pci.h" /* for struct irq_routing_table */ +#include /* for struct irq_routing_table */ #include "ibmphp.h" #define attn_on(sl) ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON) -- cgit 1.4.1 From 015ab17dc2e9de805c26e74f498b12ee5e8de07e Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 14:04:20 +0000 Subject: intel-iommu: remove some unused struct intel_iommu fields The seg, saved_msg and sysdev fields appear to be unused since before the code was first merged. linux/msi.h is not needed in linux/intel-iommu.h anymore since there is no longer a reference to struct msi_msg. The MSI code in drivers/pci/intel-iommu.c still has linux/msi.h included via linux/dmar.h. linux/sysdev.h isn't needed because there is no reference to struct sys_device. Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 1 - include/linux/intel-iommu.h | 5 ----- 2 files changed, 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5c8baa43ac9c..8e5a445ba93a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 3d017cfd245b..1bff7bf1bc2c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -23,8 +23,6 @@ #define _INTEL_IOMMU_H_ #include -#include -#include #include #include #include @@ -289,7 +287,6 @@ struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; u64 ecap; - int seg; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ @@ -302,8 +299,6 @@ struct intel_iommu { unsigned int irq; unsigned char name[7]; /* Device Name */ - struct msi_msg saved_msg; - struct sys_device sysdev; struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ -- cgit 1.4.1 From 519a05491586dad04e687660e54c57882315b22b Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 14:21:13 +0000 Subject: intel-iommu: make init_dmars() static init_dmars() is not used outside of drivers/pci/intel-iommu.c Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 2 +- include/linux/dma_remapping.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 8e5a445ba93a..95ae3a9aea8a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1589,7 +1589,7 @@ static inline void iommu_prepare_isa(void) } #endif /* !CONFIG_DMAR_FLPY_WA */ -int __init init_dmars(void) +static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 952df39c989d..cf92c4924b8c 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -141,7 +141,6 @@ struct device_domain_info { struct dmar_domain *domain; /* pointer to domain */ }; -extern int init_dmars(void); extern void free_dmar_iommu(struct intel_iommu *iommu); extern int dmar_disabled; -- cgit 1.4.1 From f27be03b271851fd54529f292c0f25b4c1f1a553 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:43 +0000 Subject: intel-iommu: move DMA_32/64BIT_PFN into intel-iommu.c Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 +++ include/linux/dma_remapping.h | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 95ae3a9aea8a..6fadbb9bc180 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -53,6 +53,9 @@ #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) static void flush_unmaps_timeout(unsigned long data); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index cf92c4924b8c..2e5a5c0b6acd 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,11 +9,6 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) - - /* * 0: Present * 1-11: Reserved -- cgit 1.4.1 From 46b08e1a76b758193b0e7b889c6486a16eb1e9e2 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:44 +0000 Subject: intel-iommu: move root entry defs from dma_remapping.h We keep the struct root_entry forward declaration for the pointer in struct intel_iommu. Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 33 +++++++++++++++++++++++++++++++++ include/linux/dma_remapping.h | 34 +--------------------------------- 2 files changed, 34 insertions(+), 33 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6fadbb9bc180..29bf2d8176e2 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -57,6 +57,39 @@ #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & VTD_PAGE_MASK; +} + +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & VTD_PAGE_MASK) : + NULL); +} + static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 2e5a5c0b6acd..d8521662a495 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,39 +9,7 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & VTD_PAGE_MASK; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & VTD_PAGE_MASK) : - NULL); -} +struct root_entry; /* * low 64 bits: -- cgit 1.4.1 From 7a8fc25e0cc6e75fa6fdb0a856490e324218550b Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:45 +0000 Subject: intel-iommu: move context entry defs out from dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/dma_remapping.h | 38 -------------------------------------- 2 files changed, 38 insertions(+), 38 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 29bf2d8176e2..9d06f4bb6b5e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -90,6 +90,44 @@ get_context_addr_from_root(struct root_entry *root) NULL); } +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & VTD_PAGE_MASK) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index d8521662a495..9a88f7d0262f 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -11,44 +11,6 @@ struct root_entry; -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & VTD_PAGE_MASK) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - /* * 0: readable * 1: writable -- cgit 1.4.1 From 622ba12a4c2148999bda9b891bfd0c6ddcb6c57e Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:46 +0000 Subject: intel-iommu: move DMA PTE defs out of dma_remapping.h DMA_PTE_READ/WRITE are needed by kvm. Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 22 ++++++++++++++++++++++ include/linux/dma_remapping.h | 22 ---------------------- 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 9d06f4bb6b5e..26c5402b6f7c 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -128,6 +128,28 @@ struct context_entry { do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) #define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 9a88f7d0262f..9d5874e3bec9 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -11,31 +11,9 @@ struct root_entry; -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - struct intel_iommu; struct dmar_domain { -- cgit 1.4.1 From 99126f7ce14aff5f9371b2fa81fddb82be815794 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:47 +0000 Subject: intel-iommu: move struct dmar_domain def out dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 18 ++++++++++++++++++ include/linux/dma_remapping.h | 22 ++-------------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 26c5402b6f7c..97c36b2ee611 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -150,6 +150,24 @@ struct dma_pte { (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) #define dma_pte_present(p) (((p).val & 3) != 0) +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 9d5874e3bec9..333014468f17 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,30 +9,12 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) -struct root_entry; - #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; +struct dmar_domain; +struct root_entry; /* PCI domain-device relationship */ struct device_domain_info { -- cgit 1.4.1 From a647dacbb1389aa6a5fa631766c1eaea35905890 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:48 +0000 Subject: intel-iommu: move struct device_domain_info out of dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 10 ++++++++++ include/linux/dma_remapping.h | 10 ---------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 97c36b2ee611..f23a02054bf7 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -168,6 +168,16 @@ struct dmar_domain { int flags; }; +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + static void flush_unmaps_timeout(unsigned long data); DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 333014468f17..4ef5f6bc0d68 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -16,16 +16,6 @@ struct intel_iommu; struct dmar_domain; struct root_entry; -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - extern void free_dmar_iommu(struct intel_iommu *iommu); extern int dmar_disabled; -- cgit 1.4.1 From 2abd7e167c1b281f99bb58d302225872bfae9123 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Thu, 20 Nov 2008 15:49:50 +0000 Subject: intel-iommu: move iommu_prepare_gfx_mapping() out of dma_remapping.h Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 5 +++++ include/linux/dma_remapping.h | 7 ------- 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f23a02054bf7..c1c59a619650 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1686,6 +1686,11 @@ static void __init iommu_prepare_gfx_mapping(void) printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); } } +#else /* !CONFIG_DMAR_GFX_WA */ +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} #endif #ifdef CONFIG_DMAR_FLOPPY_WA diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 4ef5f6bc0d68..7799a85614c1 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -20,11 +20,4 @@ extern void free_dmar_iommu(struct intel_iommu *iommu); extern int dmar_disabled; -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ - #endif -- cgit 1.4.1 From c07e7d217bef198422b7eface456ecfd4bb1ab87 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 21 Nov 2008 16:54:46 +0000 Subject: intel-iommu: trivially inline context entry macros Some macros were unused, so I just dropped them: context_fault_disable context_translation_type context_address_root context_address_width context_domain_id Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 85 ++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 30 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c1c59a619650..3be931b3bf98 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -105,28 +105,53 @@ struct context_entry { u64 lo; u64 hi; }; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & VTD_PAGE_MASK) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) + +static inline bool context_present(struct context_entry *context) +{ + return (context->lo & 1); +} +static inline void context_set_present(struct context_entry *context) +{ + context->lo |= 1; +} + +static inline void context_set_fault_enable(struct context_entry *context) +{ + context->lo &= (((u64)-1) << 2) | 1; +} + #define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +static inline void context_set_translation_type(struct context_entry *context, + unsigned long value) +{ + context->lo &= (((u64)-1) << 4) | 3; + context->lo |= (value & 3) << 2; +} + +static inline void context_set_address_root(struct context_entry *context, + unsigned long value) +{ + context->lo |= value & VTD_PAGE_MASK; +} + +static inline void context_set_address_width(struct context_entry *context, + unsigned long value) +{ + context->hi |= value & 7; +} + +static inline void context_set_domain_id(struct context_entry *context, + unsigned long value) +{ + context->hi |= (value & ((1 << 16) - 1)) << 8; +} + +static inline void context_clear_entry(struct context_entry *context) +{ + context->lo = 0; + context->hi = 0; +} /* * 0: readable @@ -349,7 +374,7 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) ret = 0; goto out; } - ret = context_present(context[devfn]); + ret = context_present(&context[devfn]); out: spin_unlock_irqrestore(&iommu->lock, flags); return ret; @@ -365,7 +390,7 @@ static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) root = &iommu->root_entry[bus]; context = get_context_addr_from_root(root); if (context) { - context_clear_entry(context[devfn]); + context_clear_entry(&context[devfn]); __iommu_flush_cache(iommu, &context[devfn], \ sizeof(*context)); } @@ -1284,17 +1309,17 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (!context) return -ENOMEM; spin_lock_irqsave(&iommu->lock, flags); - if (context_present(*context)) { + if (context_present(context)) { spin_unlock_irqrestore(&iommu->lock, flags); return 0; } - context_set_domain_id(*context, domain->id); - context_set_address_width(*context, domain->agaw); - context_set_address_root(*context, virt_to_phys(domain->pgd)); - context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); - context_set_fault_enable(*context); - context_set_present(*context); + context_set_domain_id(context, domain->id); + context_set_address_width(context, domain->agaw); + context_set_address_root(context, virt_to_phys(domain->pgd)); + context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); + context_set_fault_enable(context); + context_set_present(context); __iommu_flush_cache(iommu, context, sizeof(*context)); /* it's a non-present to present mapping */ -- cgit 1.4.1 From 19c239ce3d089fee339d1ab7e97b43d6f0557ce5 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 21 Nov 2008 16:56:53 +0000 Subject: intel-iommu: trivially inline DMA PTE macros Signed-off-by: Mark McLoughlin Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 71 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 23 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3be931b3bf98..213a5c87fde2 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -164,16 +164,41 @@ static inline void context_clear_entry(struct context_entry *context) struct dma_pte { u64 val; }; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & VTD_PAGE_MASK) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & VTD_PAGE_MASK); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) +static inline void dma_clear_pte(struct dma_pte *pte) +{ + pte->val = 0; +} + +static inline void dma_set_pte_readable(struct dma_pte *pte) +{ + pte->val |= DMA_PTE_READ; +} + +static inline void dma_set_pte_writable(struct dma_pte *pte) +{ + pte->val |= DMA_PTE_WRITE; +} + +static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) +{ + pte->val = (pte->val & ~3) | (prot & 3); +} + +static inline u64 dma_pte_addr(struct dma_pte *pte) +{ + return (pte->val & VTD_PAGE_MASK); +} + +static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) +{ + pte->val |= (addr & VTD_PAGE_MASK); +} + +static inline bool dma_pte_present(struct dma_pte *pte) +{ + return (pte->val & 3) != 0; +} struct dmar_domain { int id; /* domain id */ @@ -487,7 +512,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) if (level == 1) break; - if (!dma_pte_present(*pte)) { + if (!dma_pte_present(pte)) { tmp_page = alloc_pgtable_page(); if (!tmp_page) { @@ -497,16 +522,16 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) } __iommu_flush_cache(domain->iommu, tmp_page, PAGE_SIZE); - dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); + dma_set_pte_addr(pte, virt_to_phys(tmp_page)); /* * high level table always sets r/w, last level page * table control read/write */ - dma_set_pte_readable(*pte); - dma_set_pte_writable(*pte); + dma_set_pte_readable(pte); + dma_set_pte_writable(pte); __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); } - parent = phys_to_virt(dma_pte_addr(*pte)); + parent = phys_to_virt(dma_pte_addr(pte)); level--; } @@ -529,9 +554,9 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, if (level == total) return pte; - if (!dma_pte_present(*pte)) + if (!dma_pte_present(pte)) break; - parent = phys_to_virt(dma_pte_addr(*pte)); + parent = phys_to_virt(dma_pte_addr(pte)); total--; } return NULL; @@ -546,7 +571,7 @@ static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) pte = dma_addr_level_pte(domain, addr, 1); if (pte) { - dma_clear_pte(*pte); + dma_clear_pte(pte); __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); } } @@ -593,8 +618,8 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, pte = dma_addr_level_pte(domain, tmp, level); if (pte) { free_pgtable_page( - phys_to_virt(dma_pte_addr(*pte))); - dma_clear_pte(*pte); + phys_to_virt(dma_pte_addr(pte))); + dma_clear_pte(pte); __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); } @@ -1421,9 +1446,9 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, /* We don't need lock here, nobody else * touches the iova range */ - BUG_ON(dma_pte_addr(*pte)); - dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT); - dma_set_pte_prot(*pte, prot); + BUG_ON(dma_pte_addr(pte)); + dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); + dma_set_pte_prot(pte, prot); __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); start_pfn++; index++; @@ -2584,7 +2609,7 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) pte = addr_to_dma_pte(domain, iova); if (pte) - pfn = dma_pte_addr(*pte); + pfn = dma_pte_addr(pte); return pfn >> VTD_PAGE_SHIFT; } -- cgit 1.4.1 From 2e824f79240476d57a8589f46232cabf151efe90 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 22 Dec 2008 16:54:58 +0800 Subject: VT-d: fix segment number being ignored when searching DRHD On platforms with multiple PCI segments, any of the segments can have a DRHD with INCLUDE_PCI_ALL flag. So need to check the DRHD's segment number against the PCI device's when searching its DRHD. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse Signed-off-by: Joerg Roedel --- drivers/pci/dmar.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 691b3adeb870..5f164ff3026e 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -191,26 +191,17 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) { struct acpi_dmar_hardware_unit *drhd; - static int include_all; int ret = 0; drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; - if (!dmaru->include_all) - ret = dmar_parse_dev_scope((void *)(drhd + 1), + if (dmaru->include_all) + return 0; + + ret = dmar_parse_dev_scope((void *)(drhd + 1), ((void *)drhd) + drhd->header.length, &dmaru->devices_cnt, &dmaru->devices, drhd->segment); - else { - /* Only allow one INCLUDE_ALL */ - if (include_all) { - printk(KERN_WARNING PREFIX "Only one INCLUDE_ALL " - "device scope is allowed\n"); - ret = -EINVAL; - } - include_all = 1; - } - if (ret) { list_del(&dmaru->list); kfree(dmaru); @@ -384,12 +375,21 @@ int dmar_pci_device_match(struct pci_dev *devices[], int cnt, struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev) { - struct dmar_drhd_unit *drhd = NULL; + struct dmar_drhd_unit *dmaru = NULL; + struct acpi_dmar_hardware_unit *drhd; + + list_for_each_entry(dmaru, &dmar_drhd_units, list) { + drhd = container_of(dmaru->hdr, + struct acpi_dmar_hardware_unit, + header); + + if (dmaru->include_all && + drhd->segment == pci_domain_nr(dev->bus)) + return dmaru; - list_for_each_entry(drhd, &dmar_drhd_units, list) { - if (drhd->include_all || dmar_pci_device_match(drhd->devices, - drhd->devices_cnt, dev)) - return drhd; + if (dmar_pci_device_match(dmaru->devices, + dmaru->devices_cnt, dev)) + return dmaru; } return NULL; -- cgit 1.4.1 From d71a2f33ac466a437f316e7bb024d0175a7f3cd9 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Sun, 7 Dec 2008 21:13:41 +0800 Subject: Initialize domain flags to 0 It's random number after the domain is allocated by kmem_cache_alloc Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 213a5c87fde2..65aa1d427f43 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1180,6 +1180,7 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) set_bit(num, iommu->domain_ids); domain->id = num; domain->iommu = iommu; + domain->flags = 0; iommu->domains[num] = domain; spin_unlock_irqrestore(&iommu->lock, flags); -- cgit 1.4.1 From 3b5410e735b093060b96664230c6f9f4fe80b251 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 09:17:15 +0800 Subject: change P2P domain flags Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 65aa1d427f43..22ad8851b3e0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -200,6 +200,9 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +/* devices under the same p2p bridge are owned in one domain */ +#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0) + struct dmar_domain { int id; /* domain id */ struct intel_iommu *iommu; /* back pointer to owning iommu */ @@ -214,8 +217,7 @@ struct dmar_domain { /* adjusted guest address width, 0 is level 2 30-bit */ int agaw; -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; + int flags; /* flags to find out type of domain */ }; /* PCI domain-device relationship */ @@ -1574,7 +1576,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) info->dev = NULL; info->domain = domain; /* This domain is shared by devices under p2p bridge */ - domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; + domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; /* pcie-to-pci bridge already has a domain, uses it */ found = NULL; -- cgit 1.4.1 From d9630fe941769dd050fbc38fbbac20a708ab9461 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 11:06:32 +0800 Subject: Add global iommu list Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 22ad8851b3e0..d2ffa7a6d723 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -57,6 +57,9 @@ #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) +/* global iommu list, set NULL for ignored DMAR units */ +static struct intel_iommu **g_iommus; + /* * 0: Present * 1-11: Reserved @@ -1153,6 +1156,17 @@ void free_dmar_iommu(struct intel_iommu *iommu) kfree(iommu->domains); kfree(iommu->domain_ids); + g_iommus[iommu->seq_id] = NULL; + + /* if all iommus are freed, free g_iommus */ + for (i = 0; i < g_num_of_iommus; i++) { + if (g_iommus[i]) + break; + } + + if (i == g_num_of_iommus) + kfree(g_iommus); + /* free context mapping */ free_context_table(iommu); } @@ -1794,9 +1808,18 @@ static int __init init_dmars(void) */ } + g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *), + GFP_KERNEL); + if (!g_iommus) { + printk(KERN_ERR "Allocating global iommu array failed\n"); + ret = -ENOMEM; + goto error; + } + deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { + kfree(g_iommus); ret = -ENOMEM; goto error; } @@ -1806,6 +1829,7 @@ static int __init init_dmars(void) continue; iommu = drhd->iommu; + g_iommus[iommu->seq_id] = iommu; ret = iommu_init_domains(iommu); if (ret) @@ -1918,6 +1942,7 @@ error: iommu = drhd->iommu; free_iommu(iommu); } + kfree(g_iommus); return ret; } -- cgit 1.4.1 From a2bb8459fe46e5aaad6637b31b5593d740097cba Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 11:24:12 +0800 Subject: Get iommu from g_iommus for deferred flush deferred_flush[] uses the iommu seq_id to index, so its iommu is fixed and can get it from g_iommus. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d2ffa7a6d723..86b9f58a645e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2101,10 +2101,11 @@ static void flush_unmaps(void) /* just flush them all */ for (i = 0; i < g_num_of_iommus; i++) { - if (deferred_flush[i].next) { - struct intel_iommu *iommu = - deferred_flush[i].domain[0]->iommu; + struct intel_iommu *iommu = g_iommus[i]; + if (!iommu) + continue; + if (deferred_flush[i].next) { iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); for (j = 0; j < deferred_flush[i].next; j++) { -- cgit 1.4.1 From 8c11e798eee2ce4475134eaf61302b28ea4f205d Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 15:29:22 +0800 Subject: iommu bitmap instead of iommu pointer in dmar_domain In order to support assigning multiple devices from different iommus to a domain, iommu bitmap is used to keep all iommus the domain are related to. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 97 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 30 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 86b9f58a645e..9dca689215eb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -208,7 +208,7 @@ static inline bool dma_pte_present(struct dma_pte *pte) struct dmar_domain { int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ + unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ struct list_head devices; /* all devices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ @@ -362,6 +362,18 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } +/* in native case, each domain is related to only one iommu */ +static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) +{ + int iommu_id; + + iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); + if (iommu_id < 0 || iommu_id >= g_num_of_iommus) + return NULL; + + return g_iommus[iommu_id]; +} + /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -502,6 +514,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) int level = agaw_to_level(domain->agaw); int offset; unsigned long flags; + struct intel_iommu *iommu = domain_get_iommu(domain); BUG_ON(!domain->pgd); @@ -525,7 +538,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) flags); return NULL; } - __iommu_flush_cache(domain->iommu, tmp_page, + __iommu_flush_cache(iommu, tmp_page, PAGE_SIZE); dma_set_pte_addr(pte, virt_to_phys(tmp_page)); /* @@ -534,7 +547,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) */ dma_set_pte_readable(pte); dma_set_pte_writable(pte); - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); + __iommu_flush_cache(iommu, pte, sizeof(*pte)); } parent = phys_to_virt(dma_pte_addr(pte)); level--; @@ -571,13 +584,14 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) { struct dma_pte *pte = NULL; + struct intel_iommu *iommu = domain_get_iommu(domain); /* get last level pte */ pte = dma_addr_level_pte(domain, addr, 1); if (pte) { dma_clear_pte(pte); - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); + __iommu_flush_cache(iommu, pte, sizeof(*pte)); } } @@ -608,6 +622,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int total = agaw_to_level(domain->agaw); int level; u64 tmp; + struct intel_iommu *iommu = domain_get_iommu(domain); start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; @@ -625,7 +640,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, free_pgtable_page( phys_to_virt(dma_pte_addr(pte))); dma_clear_pte(pte); - __iommu_flush_cache(domain->iommu, + __iommu_flush_cache(iommu, pte, sizeof(*pte)); } tmp += level_size(level); @@ -1195,7 +1210,8 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) set_bit(num, iommu->domain_ids); domain->id = num; - domain->iommu = iommu; + memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); + set_bit(iommu->seq_id, &domain->iommu_bmp); domain->flags = 0; iommu->domains[num] = domain; spin_unlock_irqrestore(&iommu->lock, flags); @@ -1206,10 +1222,13 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) static void iommu_free_domain(struct dmar_domain *domain) { unsigned long flags; + struct intel_iommu *iommu; + + iommu = domain_get_iommu(domain); - spin_lock_irqsave(&domain->iommu->lock, flags); - clear_bit(domain->id, domain->iommu->domain_ids); - spin_unlock_irqrestore(&domain->iommu->lock, flags); + spin_lock_irqsave(&iommu->lock, flags); + clear_bit(domain->id, iommu->domain_ids); + spin_unlock_irqrestore(&iommu->lock, flags); } static struct iova_domain reserved_iova_list; @@ -1288,7 +1307,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain_reserve_special_ranges(domain); /* calculate AGAW */ - iommu = domain->iommu; + iommu = domain_get_iommu(domain); if (guest_width > cap_mgaw(iommu->cap)) guest_width = cap_mgaw(iommu->cap); domain->gaw = guest_width; @@ -1341,7 +1360,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, u8 bus, u8 devfn) { struct context_entry *context; - struct intel_iommu *iommu = domain->iommu; + struct intel_iommu *iommu = domain_get_iommu(domain); unsigned long flags; pr_debug("Set context mapping for %02x:%02x.%d\n", @@ -1413,8 +1432,9 @@ static int domain_context_mapped(struct dmar_domain *domain, { int ret; struct pci_dev *tmp, *parent; + struct intel_iommu *iommu = domain_get_iommu(domain); - ret = device_context_mapped(domain->iommu, + ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); if (!ret) return ret; @@ -1425,17 +1445,17 @@ static int domain_context_mapped(struct dmar_domain *domain, /* Secondary interface's bus number and devfn 0 */ parent = pdev->bus->self; while (parent != tmp) { - ret = device_context_mapped(domain->iommu, parent->bus->number, + ret = device_context_mapped(iommu, parent->bus->number, parent->devfn); if (!ret) return ret; parent = parent->bus->self; } if (tmp->is_pcie) - return device_context_mapped(domain->iommu, + return device_context_mapped(iommu, tmp->subordinate->number, 0); else - return device_context_mapped(domain->iommu, + return device_context_mapped(iommu, tmp->bus->number, tmp->devfn); } @@ -1447,6 +1467,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, struct dma_pte *pte; int index; int addr_width = agaw_to_width(domain->agaw); + struct intel_iommu *iommu = domain_get_iommu(domain); hpa &= (((u64)1) << addr_width) - 1; @@ -1466,7 +1487,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, BUG_ON(dma_pte_addr(pte)); dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); dma_set_pte_prot(pte, prot); - __iommu_flush_cache(domain->iommu, pte, sizeof(*pte)); + __iommu_flush_cache(iommu, pte, sizeof(*pte)); start_pfn++; index++; } @@ -1475,10 +1496,12 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) { - clear_context_table(domain->iommu, bus, devfn); - domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0, + struct intel_iommu *iommu = domain_get_iommu(domain); + + clear_context_table(iommu, bus, devfn); + iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL, 0); - domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0, + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, 0); } @@ -2033,6 +2056,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, struct iova *iova; int prot = 0; int ret; + struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) @@ -2042,6 +2066,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, if (!domain) return 0; + iommu = domain_get_iommu(domain); size = aligned_size((u64)paddr, size); iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); @@ -2055,7 +2080,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, * mappings.. */ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ - !cap_zlr(domain->iommu->cap)) + !cap_zlr(iommu->cap)) prot |= DMA_PTE_READ; if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; @@ -2071,10 +2096,10 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, goto error; /* it's a non-present to present mapping */ - ret = iommu_flush_iotlb_psi(domain->iommu, domain->id, + ret = iommu_flush_iotlb_psi(iommu, domain->id, start_paddr, size >> VTD_PAGE_SHIFT, 1); if (ret) - iommu_flush_write_buffer(domain->iommu); + iommu_flush_write_buffer(iommu); return start_paddr + ((u64)paddr & (~PAGE_MASK)); @@ -2132,12 +2157,14 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) { unsigned long flags; int next, iommu_id; + struct intel_iommu *iommu; spin_lock_irqsave(&async_umap_flush_lock, flags); if (list_size == HIGH_WATER_MARK) flush_unmaps(); - iommu_id = dom->iommu->seq_id; + iommu = domain_get_iommu(dom); + iommu_id = iommu->seq_id; next = deferred_flush[iommu_id].next; deferred_flush[iommu_id].domain[next] = dom; @@ -2159,12 +2186,15 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, struct dmar_domain *domain; unsigned long start_addr; struct iova *iova; + struct intel_iommu *iommu; if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return; domain = find_domain(pdev); BUG_ON(!domain); + iommu = domain_get_iommu(domain); + iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); if (!iova) return; @@ -2180,9 +2210,9 @@ void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); if (intel_iommu_strict) { - if (iommu_flush_iotlb_psi(domain->iommu, + if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(domain->iommu); + iommu_flush_write_buffer(iommu); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2243,11 +2273,15 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, size_t size = 0; void *addr; struct scatterlist *sg; + struct intel_iommu *iommu; if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return; domain = find_domain(pdev); + BUG_ON(!domain); + + iommu = domain_get_iommu(domain); iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); if (!iova) @@ -2264,9 +2298,9 @@ void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_addr, start_addr + size); - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr, + if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0)) - iommu_flush_write_buffer(domain->iommu); + iommu_flush_write_buffer(iommu); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2300,6 +2334,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, int ret; struct scatterlist *sg; unsigned long start_addr; + struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) @@ -2309,6 +2344,8 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, if (!domain) return 0; + iommu = domain_get_iommu(domain); + for_each_sg(sglist, sg, nelems, i) { addr = SG_ENT_VIRT_ADDRESS(sg); addr = (void *)virt_to_phys(addr); @@ -2326,7 +2363,7 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, * mappings.. */ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \ - !cap_zlr(domain->iommu->cap)) + !cap_zlr(iommu->cap)) prot |= DMA_PTE_READ; if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; @@ -2358,9 +2395,9 @@ int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, } /* it's a non-present to present mapping */ - if (iommu_flush_iotlb_psi(domain->iommu, domain->id, + if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr, offset >> VTD_PAGE_SHIFT, 1)) - iommu_flush_write_buffer(domain->iommu); + iommu_flush_write_buffer(iommu); return nelems; } -- cgit 1.4.1 From 1b5736839ae13dadc5947940144f95dd0f4a4a8c Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 15:34:06 +0800 Subject: calculate agaw for each iommu "SAGAW" capability may be different across iommus. Use a default agaw, but if default agaw is not supported in some iommus, choose a less supported agaw. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/dmar.c | 10 ++++++++++ drivers/pci/intel-iommu.c | 22 ++++++++++++++++++++++ include/linux/dma_remapping.h | 1 + include/linux/intel-iommu.h | 1 + 4 files changed, 34 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 5f164ff3026e..f5a662a50acb 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -491,6 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) int map_size; u32 ver; static int iommu_allocated = 0; + int agaw; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -506,6 +507,15 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + agaw = iommu_calculate_agaw(iommu); + if (agaw < 0) { + printk(KERN_ERR + "Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + goto error; + } + iommu->agaw = agaw; + /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), cap_max_fault_reg_offset(iommu->cap)); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 9dca689215eb..3ecfa2304c2c 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -362,6 +362,28 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } + +static inline int width_to_agaw(int width); + +/* calculate agaw for each iommu. + * "SAGAW" may be different across iommus, use a default agaw, and + * get a supported less agaw for iommus that don't support the default agaw. + */ +int iommu_calculate_agaw(struct intel_iommu *iommu) +{ + unsigned long sagaw; + int agaw = -1; + + sagaw = cap_sagaw(iommu->cap); + for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); + agaw >= 0; agaw--) { + if (test_bit(agaw, &sagaw)) + break; + } + + return agaw; +} + /* in native case, each domain is related to only one iommu */ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 7799a85614c1..136f170cecc2 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -17,6 +17,7 @@ struct dmar_domain; struct root_entry; extern void free_dmar_iommu(struct intel_iommu *iommu); +extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int dmar_disabled; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1bff7bf1bc2c..06349fd5871b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -290,6 +290,7 @@ struct intel_iommu { u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ + int agaw; /* agaw of this iommu */ #ifdef CONFIG_DMAR unsigned long *domain_ids; /* bitmap of domains */ -- cgit 1.4.1 From 8e604097ddc483eb1e6e99564953e4e937fe439a Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 15:49:06 +0800 Subject: iommu coherency In dmar_domain, more than one iommus may be included in iommu_bmp. Due to "Coherency" capability may be different across iommus, set this variable to indicate iommu access is coherent or not. Only when all related iommus in a dmar_domain are all coherent, iommu access of this domain is coherent. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3ecfa2304c2c..104e99df2ade 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -221,6 +221,8 @@ struct dmar_domain { int agaw; int flags; /* flags to find out type of domain */ + + int iommu_coherency;/* indicate coherency of iommu access */ }; /* PCI domain-device relationship */ @@ -396,6 +398,23 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) return g_iommus[iommu_id]; } +/* "Coherency" capability may be different across iommus */ +static void domain_update_iommu_coherency(struct dmar_domain *domain) +{ + int i; + + domain->iommu_coherency = 1; + + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); + for (; i < g_num_of_iommus; ) { + if (!ecap_coherent(g_iommus[i]->ecap)) { + domain->iommu_coherency = 0; + break; + } + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); + } +} + /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -1346,6 +1365,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->agaw = agaw; INIT_LIST_HEAD(&domain->devices); + if (ecap_coherent(iommu->ecap)) + domain->iommu_coherency = 1; + else + domain->iommu_coherency = 0; + /* always allocate the top pgd */ domain->pgd = (struct dma_pte *)alloc_pgtable_page(); if (!domain->pgd) -- cgit 1.4.1 From 1ce28feb22833645a0f3843cd873a0b56ed19ef0 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 16:35:39 +0800 Subject: Add domain flag DOMAIN_FLAG_VIRTUAL_MACHINE Add this flag for VT-d used in virtual machine, like KVM. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 104e99df2ade..ffbe4c573729 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -206,6 +206,11 @@ static inline bool dma_pte_present(struct dma_pte *pte) /* devices under the same p2p bridge are owned in one domain */ #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0) +/* domain represents a virtual machine, more than one devices + * across iommus may be owned in one domain, e.g. kvm guest. + */ +#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) + struct dmar_domain { int id; /* domain id */ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ @@ -391,6 +396,8 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { int iommu_id; + BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); + iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); if (iommu_id < 0 || iommu_id >= g_num_of_iommus) return NULL; -- cgit 1.4.1 From c7151a8dfefd11108de5b4293af2390962bcff71 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 22:51:37 +0800 Subject: Add/remove domain device info for virtual machine domain Add iommu reference count in domain, and add a lock to protect iommu setting including iommu_bmp, iommu_count and iommu_coherency. virtual machine domain may have multiple devices from different iommus, so it needs to do more things when add/remove domain device info. Thus implement separate these functions for virtual machine domain. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 171 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 166 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ffbe4c573729..6ed18faa1198 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -228,6 +228,8 @@ struct dmar_domain { int flags; /* flags to find out type of domain */ int iommu_coherency;/* indicate coherency of iommu access */ + int iommu_count; /* reference count of iommu */ + spinlock_t iommu_lock; /* protect iommu set in domain */ }; /* PCI domain-device relationship */ @@ -422,6 +424,27 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) } } +static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) +{ + struct dmar_drhd_unit *drhd = NULL; + int i; + + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + + for (i = 0; i < drhd->devices_cnt; i++) + if (drhd->devices[i]->bus->number == bus && + drhd->devices[i]->devfn == devfn) + return drhd->iommu; + + if (drhd->include_all) + return drhd->iommu; + } + + return NULL; +} + /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -1196,12 +1219,18 @@ void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; int i; + unsigned long flags; i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); for (; i < cap_ndoms(iommu->cap); ) { domain = iommu->domains[i]; clear_bit(i, iommu->domain_ids); - domain_exit(domain); + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (--domain->iommu_count == 0) + domain_exit(domain); + spin_unlock_irqrestore(&domain->iommu_lock, flags); + i = find_next_bit(iommu->domain_ids, cap_ndoms(iommu->cap), i+1); } @@ -1351,6 +1380,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) init_iova_domain(&domain->iovad, DMA_32BIT_PFN); spin_lock_init(&domain->mapping_lock); + spin_lock_init(&domain->iommu_lock); domain_reserve_special_ranges(domain); @@ -1377,6 +1407,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_coherency = 0; + domain->iommu_count = 1; + /* always allocate the top pgd */ domain->pgd = (struct dma_pte *)alloc_pgtable_page(); if (!domain->pgd) @@ -1445,6 +1477,13 @@ static int domain_context_mapping_one(struct dmar_domain *domain, iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0); spin_unlock_irqrestore(&iommu->lock, flags); + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { + domain->iommu_count++; + domain_update_iommu_coherency(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); return 0; } @@ -1547,9 +1586,10 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, return 0; } -static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) +static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) { - struct intel_iommu *iommu = domain_get_iommu(domain); + if (!iommu) + return; clear_context_table(iommu, bus, devfn); iommu->flush.flush_context(iommu, 0, 0, 0, @@ -1562,6 +1602,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) { struct device_domain_info *info; unsigned long flags; + struct intel_iommu *iommu; spin_lock_irqsave(&device_domain_lock, flags); while (!list_empty(&domain->devices)) { @@ -1573,7 +1614,8 @@ static void domain_remove_dev_info(struct dmar_domain *domain) info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); - detach_domain_for_dev(info->domain, info->bus, info->devfn); + iommu = device_to_iommu(info->bus, info->devfn); + iommu_detach_dev(iommu, info->bus, info->devfn); free_devinfo_mem(info); spin_lock_irqsave(&device_domain_lock, flags); @@ -2625,6 +2667,122 @@ int __init intel_iommu_init(void) return 0; } +static int vm_domain_add_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev) +{ + struct device_domain_info *info; + unsigned long flags; + + info = alloc_devinfo_mem(); + if (!info) + return -ENOMEM; + + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; + info->dev = pdev; + info->domain = domain; + + spin_lock_irqsave(&device_domain_lock, flags); + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + pdev->dev.archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return 0; +} + +static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev) +{ + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + int found = 0; + struct list_head *entry, *tmp; + + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); + if (!iommu) + return; + + spin_lock_irqsave(&device_domain_lock, flags); + list_for_each_safe(entry, tmp, &domain->devices) { + info = list_entry(entry, struct device_domain_info, link); + if (info->bus == pdev->bus->number && + info->devfn == pdev->devfn) { + list_del(&info->link); + list_del(&info->global); + if (info->dev) + info->dev->dev.archdata.iommu = NULL; + spin_unlock_irqrestore(&device_domain_lock, flags); + + iommu_detach_dev(iommu, info->bus, info->devfn); + free_devinfo_mem(info); + + spin_lock_irqsave(&device_domain_lock, flags); + + if (found) + break; + else + continue; + } + + /* if there is no other devices under the same iommu + * owned by this domain, clear this iommu in iommu_bmp + * update iommu count and coherency + */ + if (device_to_iommu(info->bus, info->devfn) == iommu) + found = 1; + } + + if (found == 0) { + unsigned long tmp_flags; + spin_lock_irqsave(&domain->iommu_lock, tmp_flags); + clear_bit(iommu->seq_id, &domain->iommu_bmp); + domain->iommu_count--; + domain_update_iommu_coherency(domain); + spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); + } + + spin_unlock_irqrestore(&device_domain_lock, flags); +} + +static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) +{ + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags1, flags2; + + spin_lock_irqsave(&device_domain_lock, flags1); + while (!list_empty(&domain->devices)) { + info = list_entry(domain->devices.next, + struct device_domain_info, link); + list_del(&info->link); + list_del(&info->global); + if (info->dev) + info->dev->dev.archdata.iommu = NULL; + + spin_unlock_irqrestore(&device_domain_lock, flags1); + + iommu = device_to_iommu(info->bus, info->devfn); + iommu_detach_dev(iommu, info->bus, info->devfn); + + /* clear this iommu in iommu_bmp, update iommu count + * and coherency + */ + spin_lock_irqsave(&domain->iommu_lock, flags2); + if (test_and_clear_bit(iommu->seq_id, + &domain->iommu_bmp)) { + domain->iommu_count--; + domain_update_iommu_coherency(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags2); + + free_devinfo_mem(info); + spin_lock_irqsave(&device_domain_lock, flags1); + } + spin_unlock_irqrestore(&device_domain_lock, flags1); +} + void intel_iommu_domain_exit(struct dmar_domain *domain) { u64 end; @@ -2702,7 +2860,10 @@ EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) { - detach_domain_for_dev(domain, bus, devfn); + struct intel_iommu *iommu; + + iommu = device_to_iommu(bus, devfn); + iommu_detach_dev(iommu, bus, devfn); } EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); -- cgit 1.4.1 From 5331fe6f5627e06eec7d0dc154a0a3a9c27813c5 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 23:00:00 +0800 Subject: Add domain_flush_cache Because virtual machine domain may have multiple devices from different iommus, it cannot use __iommu_flush_cache. In some common low level functions, use domain_flush_cache instead of __iommu_flush_cache. On the other hand, in some functions, iommu can is specified or domain cannot be got, still use __iommu_flush_cache Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6ed18faa1198..f0a21995b135 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -445,6 +445,13 @@ static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) return NULL; } +static void domain_flush_cache(struct dmar_domain *domain, + void *addr, int size) +{ + if (!domain->iommu_coherency) + clflush_cache_range(addr, size); +} + /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -585,7 +592,6 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) int level = agaw_to_level(domain->agaw); int offset; unsigned long flags; - struct intel_iommu *iommu = domain_get_iommu(domain); BUG_ON(!domain->pgd); @@ -609,8 +615,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) flags); return NULL; } - __iommu_flush_cache(iommu, tmp_page, - PAGE_SIZE); + domain_flush_cache(domain, tmp_page, PAGE_SIZE); dma_set_pte_addr(pte, virt_to_phys(tmp_page)); /* * high level table always sets r/w, last level page @@ -618,7 +623,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) */ dma_set_pte_readable(pte); dma_set_pte_writable(pte); - __iommu_flush_cache(iommu, pte, sizeof(*pte)); + domain_flush_cache(domain, pte, sizeof(*pte)); } parent = phys_to_virt(dma_pte_addr(pte)); level--; @@ -655,14 +660,13 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) { struct dma_pte *pte = NULL; - struct intel_iommu *iommu = domain_get_iommu(domain); /* get last level pte */ pte = dma_addr_level_pte(domain, addr, 1); if (pte) { dma_clear_pte(pte); - __iommu_flush_cache(iommu, pte, sizeof(*pte)); + domain_flush_cache(domain, pte, sizeof(*pte)); } } @@ -693,7 +697,6 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int total = agaw_to_level(domain->agaw); int level; u64 tmp; - struct intel_iommu *iommu = domain_get_iommu(domain); start &= (((u64)1) << addr_width) - 1; end &= (((u64)1) << addr_width) - 1; @@ -711,8 +714,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, free_pgtable_page( phys_to_virt(dma_pte_addr(pte))); dma_clear_pte(pte); - __iommu_flush_cache(iommu, - pte, sizeof(*pte)); + domain_flush_cache(domain, pte, sizeof(*pte)); } tmp += level_size(level); } @@ -1445,12 +1447,17 @@ static int domain_context_mapping_one(struct dmar_domain *domain, u8 bus, u8 devfn) { struct context_entry *context; - struct intel_iommu *iommu = domain_get_iommu(domain); unsigned long flags; + struct intel_iommu *iommu; pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); BUG_ON(!domain->pgd); + + iommu = device_to_iommu(bus, devfn); + if (!iommu) + return -ENODEV; + context = device_to_context_entry(iommu, bus, devfn); if (!context) return -ENOMEM; @@ -1466,7 +1473,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); context_set_fault_enable(context); context_set_present(context); - __iommu_flush_cache(iommu, context, sizeof(*context)); + domain_flush_cache(domain, context, sizeof(*context)); /* it's a non-present to present mapping */ if (iommu->flush.flush_context(iommu, domain->id, @@ -1519,12 +1526,15 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) tmp->bus->number, tmp->devfn); } -static int domain_context_mapped(struct dmar_domain *domain, - struct pci_dev *pdev) +static int domain_context_mapped(struct pci_dev *pdev) { int ret; struct pci_dev *tmp, *parent; - struct intel_iommu *iommu = domain_get_iommu(domain); + struct intel_iommu *iommu; + + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); + if (!iommu) + return -ENODEV; ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); @@ -1559,7 +1569,6 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, struct dma_pte *pte; int index; int addr_width = agaw_to_width(domain->agaw); - struct intel_iommu *iommu = domain_get_iommu(domain); hpa &= (((u64)1) << addr_width) - 1; @@ -1579,7 +1588,7 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, BUG_ON(dma_pte_addr(pte)); dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); dma_set_pte_prot(pte, prot); - __iommu_flush_cache(iommu, pte, sizeof(*pte)); + domain_flush_cache(domain, pte, sizeof(*pte)); start_pfn++; index++; } @@ -2129,7 +2138,7 @@ get_valid_domain_for_dev(struct pci_dev *pdev) } /* make sure context mapping is ok */ - if (unlikely(!domain_context_mapped(domain, pdev))) { + if (unlikely(!domain_context_mapped(pdev))) { ret = domain_context_mapping(domain, pdev); if (ret) { printk(KERN_ERR -- cgit 1.4.1 From 5e98c4b1d6e89676193c355e430eddf369bcf195 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 23:03:27 +0800 Subject: Allocation and free functions of virtual machine domain virtual machine domain is different from native DMA-API domain, implement separate allocation and free functions for virtual machine domain. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 107 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f0a21995b135..171f6c61fa1d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1216,6 +1216,7 @@ static int iommu_init_domains(struct intel_iommu *iommu) static void domain_exit(struct dmar_domain *domain); +static void vm_domain_exit(struct dmar_domain *domain); void free_dmar_iommu(struct intel_iommu *iommu) { @@ -1229,8 +1230,12 @@ void free_dmar_iommu(struct intel_iommu *iommu) clear_bit(i, iommu->domain_ids); spin_lock_irqsave(&domain->iommu_lock, flags); - if (--domain->iommu_count == 0) - domain_exit(domain); + if (--domain->iommu_count == 0) { + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) + vm_domain_exit(domain); + else + domain_exit(domain); + } spin_unlock_irqrestore(&domain->iommu_lock, flags); i = find_next_bit(iommu->domain_ids, @@ -2792,6 +2797,104 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags1); } +/* domain id for virtual machine, it won't be set in context */ +static unsigned long vm_domid; + +static struct dmar_domain *iommu_alloc_vm_domain(void) +{ + struct dmar_domain *domain; + + domain = alloc_domain_mem(); + if (!domain) + return NULL; + + domain->id = vm_domid++; + memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); + domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; + + return domain; +} + +static int vm_domain_init(struct dmar_domain *domain, int guest_width) +{ + int adjust_width; + + init_iova_domain(&domain->iovad, DMA_32BIT_PFN); + spin_lock_init(&domain->mapping_lock); + spin_lock_init(&domain->iommu_lock); + + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + domain->gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + domain->agaw = width_to_agaw(adjust_width); + + INIT_LIST_HEAD(&domain->devices); + + domain->iommu_count = 0; + domain->iommu_coherency = 0; + + /* always allocate the top pgd */ + domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + if (!domain->pgd) + return -ENOMEM; + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + return 0; +} + +static void iommu_free_vm_domain(struct dmar_domain *domain) +{ + unsigned long flags; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + unsigned long i; + unsigned long ndomains; + + for_each_drhd_unit(drhd) { + if (drhd->ignored) + continue; + iommu = drhd->iommu; + + ndomains = cap_ndoms(iommu->cap); + i = find_first_bit(iommu->domain_ids, ndomains); + for (; i < ndomains; ) { + if (iommu->domains[i] == domain) { + spin_lock_irqsave(&iommu->lock, flags); + clear_bit(i, iommu->domain_ids); + iommu->domains[i] = NULL; + spin_unlock_irqrestore(&iommu->lock, flags); + break; + } + i = find_next_bit(iommu->domain_ids, ndomains, i+1); + } + } +} + +static void vm_domain_exit(struct dmar_domain *domain) +{ + u64 end; + + /* Domain 0 is reserved, so dont process it */ + if (!domain) + return; + + vm_domain_remove_all_dev_info(domain); + /* destroy iovas */ + put_iova_domain(&domain->iovad); + end = DOMAIN_MAX_ADDR(domain->gaw); + end = end & (~VTD_PAGE_MASK); + + /* clear ptes */ + dma_pte_clear_range(domain, 0, end); + + /* free page tables */ + dma_pte_free_pagetable(domain, 0, end); + + iommu_free_vm_domain(domain); + free_domain_mem(domain); +} + void intel_iommu_domain_exit(struct dmar_domain *domain) { u64 end; -- cgit 1.4.1 From ea6606b02fc3192f2edab2db669fa0b9756b4e67 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 23:08:15 +0800 Subject: Change domain_context_mapping_one for virtual machine domain vm_domid won't be set in context, find available domain id for a device from its iommu. For a virtual machine domain, a default agaw will be set, and skip top levels of page tables for iommu which has less agaw than default. Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 55 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 171f6c61fa1d..8a204d5bb427 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1454,6 +1454,11 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; unsigned long flags; struct intel_iommu *iommu; + struct dma_pte *pgd; + unsigned long num; + unsigned long ndomains; + int id; + int agaw; pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1472,9 +1477,53 @@ static int domain_context_mapping_one(struct dmar_domain *domain, return 0; } - context_set_domain_id(context, domain->id); - context_set_address_width(context, domain->agaw); - context_set_address_root(context, virt_to_phys(domain->pgd)); + id = domain->id; + pgd = domain->pgd; + + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { + int found = 0; + + /* find an available domain id for this device in iommu */ + ndomains = cap_ndoms(iommu->cap); + num = find_first_bit(iommu->domain_ids, ndomains); + for (; num < ndomains; ) { + if (iommu->domains[num] == domain) { + id = num; + found = 1; + break; + } + num = find_next_bit(iommu->domain_ids, + cap_ndoms(iommu->cap), num+1); + } + + if (found == 0) { + num = find_first_zero_bit(iommu->domain_ids, ndomains); + if (num >= ndomains) { + spin_unlock_irqrestore(&iommu->lock, flags); + printk(KERN_ERR "IOMMU: no free domain ids\n"); + return -EFAULT; + } + + set_bit(num, iommu->domain_ids); + iommu->domains[num] = domain; + id = num; + } + + /* Skip top levels of page tables for + * iommu which has less agaw than default. + */ + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } + } + } + + context_set_domain_id(context, id); + context_set_address_width(context, iommu->agaw); + context_set_address_root(context, virt_to_phys(pgd)); context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL); context_set_fault_enable(context); context_set_present(context); -- cgit 1.4.1 From faa3d6f5ffe7bf60ebfd0d36513fbcda0eb0ea1a Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 23:09:29 +0800 Subject: Change intel iommu APIs of virtual machine domain These APIs are used by KVM to use VT-d Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 129 ++++++++++++++++++++------------------------ include/linux/intel-iommu.h | 20 +++---- 2 files changed, 70 insertions(+), 79 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 8a204d5bb427..f1380269cabd 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2944,96 +2944,87 @@ static void vm_domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } -void intel_iommu_domain_exit(struct dmar_domain *domain) +struct dmar_domain *intel_iommu_alloc_domain(void) { - u64 end; - - /* Domain 0 is reserved, so dont process it */ - if (!domain) - return; - - end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~VTD_PAGE_MASK); - - /* clear ptes */ - dma_pte_clear_range(domain, 0, end); - - /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); - - iommu_free_domain(domain); - free_domain_mem(domain); -} -EXPORT_SYMBOL_GPL(intel_iommu_domain_exit); - -struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) -{ - struct dmar_drhd_unit *drhd; struct dmar_domain *domain; - struct intel_iommu *iommu; - drhd = dmar_find_matched_drhd_unit(pdev); - if (!drhd) { - printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n"); - return NULL; - } - - iommu = drhd->iommu; - if (!iommu) { - printk(KERN_ERR - "intel_iommu_domain_alloc: iommu == NULL\n"); - return NULL; - } - domain = iommu_alloc_domain(iommu); + domain = iommu_alloc_vm_domain(); if (!domain) { printk(KERN_ERR "intel_iommu_domain_alloc: domain == NULL\n"); return NULL; } - if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { printk(KERN_ERR "intel_iommu_domain_alloc: domain_init() failed\n"); - intel_iommu_domain_exit(domain); + vm_domain_exit(domain); return NULL; } + return domain; } -EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc); +EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain); -int intel_iommu_context_mapping( - struct dmar_domain *domain, struct pci_dev *pdev) +void intel_iommu_free_domain(struct dmar_domain *domain) { - int rc; - rc = domain_context_mapping(domain, pdev); - return rc; + vm_domain_exit(domain); } -EXPORT_SYMBOL_GPL(intel_iommu_context_mapping); +EXPORT_SYMBOL_GPL(intel_iommu_free_domain); -int intel_iommu_page_mapping( - struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) +int intel_iommu_attach_device(struct dmar_domain *domain, + struct pci_dev *pdev) { - int rc; - rc = domain_page_mapping(domain, iova, hpa, size, prot); - return rc; + int ret; + + /* normally pdev is not mapped */ + if (unlikely(domain_context_mapped(pdev))) { + struct dmar_domain *old_domain; + + old_domain = find_domain(pdev); + if (old_domain) { + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) + vm_domain_remove_one_dev_info(old_domain, pdev); + else + domain_remove_dev_info(old_domain); + } + } + + ret = domain_context_mapping(domain, pdev); + if (ret) + return ret; + + ret = vm_domain_add_dev_info(domain, pdev); + return ret; } -EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); +EXPORT_SYMBOL_GPL(intel_iommu_attach_device); -void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) +void intel_iommu_detach_device(struct dmar_domain *domain, + struct pci_dev *pdev) { - struct intel_iommu *iommu; + vm_domain_remove_one_dev_info(domain, pdev); +} +EXPORT_SYMBOL_GPL(intel_iommu_detach_device); - iommu = device_to_iommu(bus, devfn); - iommu_detach_dev(iommu, bus, devfn); +int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot) +{ + int ret; + ret = domain_page_mapping(domain, iova, hpa, size, prot); + return ret; } -EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); +EXPORT_SYMBOL_GPL(intel_iommu_map_address); -struct dmar_domain * -intel_iommu_find_domain(struct pci_dev *pdev) +void intel_iommu_unmap_address(struct dmar_domain *domain, + dma_addr_t iova, size_t size) { - return find_domain(pdev); + dma_addr_t base; + + /* The address might not be aligned */ + base = iova & VTD_PAGE_MASK; + size = VTD_PAGE_ALIGN(size); + dma_pte_clear_range(domain, base, base + size); } -EXPORT_SYMBOL_GPL(intel_iommu_find_domain); +EXPORT_SYMBOL_GPL(intel_iommu_unmap_address); int intel_iommu_found(void) { @@ -3041,17 +3032,15 @@ int intel_iommu_found(void) } EXPORT_SYMBOL_GPL(intel_iommu_found); -u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) +u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova) { struct dma_pte *pte; - u64 pfn; + u64 phys = 0; - pfn = 0; pte = addr_to_dma_pte(domain, iova); - if (pte) - pfn = dma_pte_addr(pte); + phys = dma_pte_addr(pte); - return pfn >> VTD_PAGE_SHIFT; + return phys; } -EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); +EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 06349fd5871b..07973c4e4acc 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,15 +330,17 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -void intel_iommu_domain_exit(struct dmar_domain *domain); -struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); -int intel_iommu_context_mapping(struct dmar_domain *domain, - struct pci_dev *pdev); -int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot); -void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); -struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); -u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); +struct dmar_domain *intel_iommu_alloc_domain(void); +void intel_iommu_free_domain(struct dmar_domain *domain); +int intel_iommu_attach_device(struct dmar_domain *domain, + struct pci_dev *pdev); +void intel_iommu_detach_device(struct dmar_domain *domain, + struct pci_dev *pdev); +int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot); +void intel_iommu_unmap_address(struct dmar_domain *domain, + dma_addr_t iova, size_t size); +u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova); #ifdef CONFIG_DMAR int intel_iommu_found(void); -- cgit 1.4.1 From fe40f1e020d0923f5f35ca15f02a206c75a28053 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Mon, 8 Dec 2008 23:10:23 +0800 Subject: Check agaw is sufficient for mapped memory When domain is related to multiple iommus, need to check if the minimum agaw is sufficient for the mapped memory Signed-off-by: Weidong Han Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index f1380269cabd..772fb22e1be0 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -230,6 +230,7 @@ struct dmar_domain { int iommu_coherency;/* indicate coherency of iommu access */ int iommu_count; /* reference count of iommu */ spinlock_t iommu_lock; /* protect iommu set in domain */ + u64 max_addr; /* maximum mapped address */ }; /* PCI domain-device relationship */ @@ -2849,6 +2850,22 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) /* domain id for virtual machine, it won't be set in context */ static unsigned long vm_domid; +static int vm_domain_min_agaw(struct dmar_domain *domain) +{ + int i; + int min_agaw = domain->agaw; + + i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus); + for (; i < g_num_of_iommus; ) { + if (min_agaw > g_iommus[i]->agaw) + min_agaw = g_iommus[i]->agaw; + + i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1); + } + + return min_agaw; +} + static struct dmar_domain *iommu_alloc_vm_domain(void) { struct dmar_domain *domain; @@ -2883,6 +2900,7 @@ static int vm_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; + domain->max_addr = 0; /* always allocate the top pgd */ domain->pgd = (struct dma_pte *)alloc_pgtable_page(); @@ -2974,6 +2992,9 @@ EXPORT_SYMBOL_GPL(intel_iommu_free_domain); int intel_iommu_attach_device(struct dmar_domain *domain, struct pci_dev *pdev) { + struct intel_iommu *iommu; + int addr_width; + u64 end; int ret; /* normally pdev is not mapped */ @@ -2989,6 +3010,21 @@ int intel_iommu_attach_device(struct dmar_domain *domain, } } + iommu = device_to_iommu(pdev->bus->number, pdev->devfn); + if (!iommu) + return -ENODEV; + + /* check if this iommu agaw is sufficient for max mapped address */ + addr_width = agaw_to_width(iommu->agaw); + end = DOMAIN_MAX_ADDR(addr_width); + end = end & VTD_PAGE_MASK; + if (end < domain->max_addr) { + printk(KERN_ERR "%s: iommu agaw (%d) is not " + "sufficient for the mapped address (%llx)\n", + __func__, iommu->agaw, domain->max_addr); + return -EFAULT; + } + ret = domain_context_mapping(domain, pdev); if (ret) return ret; @@ -3008,7 +3044,29 @@ EXPORT_SYMBOL_GPL(intel_iommu_detach_device); int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, u64 hpa, size_t size, int prot) { + u64 max_addr; + int addr_width; int ret; + + max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); + if (domain->max_addr < max_addr) { + int min_agaw; + u64 end; + + /* check if minimum agaw is sufficient for mapped address */ + min_agaw = vm_domain_min_agaw(domain); + addr_width = agaw_to_width(min_agaw); + end = DOMAIN_MAX_ADDR(addr_width); + end = end & VTD_PAGE_MASK; + if (end < max_addr) { + printk(KERN_ERR "%s: iommu agaw (%d) is not " + "sufficient for the mapped address (%llx)\n", + __func__, min_agaw, max_addr); + return -EFAULT; + } + domain->max_addr = max_addr; + } + ret = domain_page_mapping(domain, iova, hpa, size, prot); return ret; } @@ -3023,6 +3081,9 @@ void intel_iommu_unmap_address(struct dmar_domain *domain, base = iova & VTD_PAGE_MASK; size = VTD_PAGE_ALIGN(size); dma_pte_clear_range(domain, base, base + size); + + if (domain->max_addr == base + size) + domain->max_addr = base; } EXPORT_SYMBOL_GPL(intel_iommu_unmap_address); -- cgit 1.4.1 From 5d450806eb0e569c5846a5825e7f535980b0da32 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 14:52:32 +0100 Subject: VT-d: adapt domain init and destroy functions for IOMMU API Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 33 ++++++++++++++++++--------------- include/linux/intel-iommu.h | 2 -- 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 772fb22e1be0..5c95a5a65440 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -2962,32 +2963,34 @@ static void vm_domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } -struct dmar_domain *intel_iommu_alloc_domain(void) +static int intel_iommu_domain_init(struct iommu_domain *domain) { - struct dmar_domain *domain; + struct dmar_domain *dmar_domain; - domain = iommu_alloc_vm_domain(); - if (!domain) { + dmar_domain = iommu_alloc_vm_domain(); + if (!dmar_domain) { printk(KERN_ERR - "intel_iommu_domain_alloc: domain == NULL\n"); - return NULL; + "intel_iommu_domain_init: dmar_domain == NULL\n"); + return -ENOMEM; } - if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { printk(KERN_ERR - "intel_iommu_domain_alloc: domain_init() failed\n"); - vm_domain_exit(domain); - return NULL; + "intel_iommu_domain_init() failed\n"); + vm_domain_exit(dmar_domain); + return -ENOMEM; } + domain->priv = dmar_domain; - return domain; + return 0; } -EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain); -void intel_iommu_free_domain(struct dmar_domain *domain) +static void intel_iommu_domain_destroy(struct iommu_domain *domain) { - vm_domain_exit(domain); + struct dmar_domain *dmar_domain = domain->priv; + + domain->priv = NULL; + vm_domain_exit(dmar_domain); } -EXPORT_SYMBOL_GPL(intel_iommu_free_domain); int intel_iommu_attach_device(struct dmar_domain *domain, struct pci_dev *pdev) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 07973c4e4acc..0a7ba0cefc74 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -struct dmar_domain *intel_iommu_alloc_domain(void); -void intel_iommu_free_domain(struct dmar_domain *domain); int intel_iommu_attach_device(struct dmar_domain *domain, struct pci_dev *pdev); void intel_iommu_detach_device(struct dmar_domain *domain, -- cgit 1.4.1 From 4c5478c94eb29e6101f1f13175f7455bc8b5d953 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 14:58:24 +0100 Subject: VT-d: adapt device attach and detach functions for IOMMU API Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 27 +++++++++++++++------------ include/linux/intel-iommu.h | 4 ---- 2 files changed, 15 insertions(+), 16 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5c95a5a65440..db9a26cfeb8f 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2992,9 +2992,11 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain) vm_domain_exit(dmar_domain); } -int intel_iommu_attach_device(struct dmar_domain *domain, - struct pci_dev *pdev) +static int intel_iommu_attach_device(struct iommu_domain *domain, + struct device *dev) { + struct dmar_domain *dmar_domain = domain->priv; + struct pci_dev *pdev = to_pci_dev(dev); struct intel_iommu *iommu; int addr_width; u64 end; @@ -3006,7 +3008,7 @@ int intel_iommu_attach_device(struct dmar_domain *domain, old_domain = find_domain(pdev); if (old_domain) { - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) + if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) vm_domain_remove_one_dev_info(old_domain, pdev); else domain_remove_dev_info(old_domain); @@ -3021,28 +3023,29 @@ int intel_iommu_attach_device(struct dmar_domain *domain, addr_width = agaw_to_width(iommu->agaw); end = DOMAIN_MAX_ADDR(addr_width); end = end & VTD_PAGE_MASK; - if (end < domain->max_addr) { + if (end < dmar_domain->max_addr) { printk(KERN_ERR "%s: iommu agaw (%d) is not " "sufficient for the mapped address (%llx)\n", - __func__, iommu->agaw, domain->max_addr); + __func__, iommu->agaw, dmar_domain->max_addr); return -EFAULT; } - ret = domain_context_mapping(domain, pdev); + ret = domain_context_mapping(dmar_domain, pdev); if (ret) return ret; - ret = vm_domain_add_dev_info(domain, pdev); + ret = vm_domain_add_dev_info(dmar_domain, pdev); return ret; } -EXPORT_SYMBOL_GPL(intel_iommu_attach_device); -void intel_iommu_detach_device(struct dmar_domain *domain, - struct pci_dev *pdev) +static void intel_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) { - vm_domain_remove_one_dev_info(domain, pdev); + struct dmar_domain *dmar_domain = domain->priv; + struct pci_dev *pdev = to_pci_dev(dev); + + vm_domain_remove_one_dev_info(dmar_domain, pdev); } -EXPORT_SYMBOL_GPL(intel_iommu_detach_device); int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, u64 hpa, size_t size, int prot) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0a7ba0cefc74..9909c5a1b20f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -int intel_iommu_attach_device(struct dmar_domain *domain, - struct pci_dev *pdev); -void intel_iommu_detach_device(struct dmar_domain *domain, - struct pci_dev *pdev); int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, u64 hpa, size_t size, int prot); void intel_iommu_unmap_address(struct dmar_domain *domain, -- cgit 1.4.1 From dde57a210dcdce85e2813bab8f88687761d9f6a6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 15:04:09 +0100 Subject: VT-d: adapt domain map and unmap functions for IOMMU API Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 33 ++++++++++++++++++++------------- include/linux/intel-iommu.h | 4 ---- 2 files changed, 20 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index db9a26cfeb8f..8af6c96f31b3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3047,20 +3047,28 @@ static void intel_iommu_detach_device(struct iommu_domain *domain, vm_domain_remove_one_dev_info(dmar_domain, pdev); } -int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) +static int intel_iommu_map_range(struct iommu_domain *domain, + unsigned long iova, phys_addr_t hpa, + size_t size, int iommu_prot) { + struct dmar_domain *dmar_domain = domain->priv; u64 max_addr; int addr_width; + int prot = 0; int ret; + if (iommu_prot & IOMMU_READ) + prot |= DMA_PTE_READ; + if (iommu_prot & IOMMU_WRITE) + prot |= DMA_PTE_WRITE; + max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); - if (domain->max_addr < max_addr) { + if (dmar_domain->max_addr < max_addr) { int min_agaw; u64 end; /* check if minimum agaw is sufficient for mapped address */ - min_agaw = vm_domain_min_agaw(domain); + min_agaw = vm_domain_min_agaw(dmar_domain); addr_width = agaw_to_width(min_agaw); end = DOMAIN_MAX_ADDR(addr_width); end = end & VTD_PAGE_MASK; @@ -3070,28 +3078,27 @@ int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, __func__, min_agaw, max_addr); return -EFAULT; } - domain->max_addr = max_addr; + dmar_domain->max_addr = max_addr; } - ret = domain_page_mapping(domain, iova, hpa, size, prot); + ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); return ret; } -EXPORT_SYMBOL_GPL(intel_iommu_map_address); -void intel_iommu_unmap_address(struct dmar_domain *domain, - dma_addr_t iova, size_t size) +static void intel_iommu_unmap_range(struct iommu_domain *domain, + unsigned long iova, size_t size) { + struct dmar_domain *dmar_domain = domain->priv; dma_addr_t base; /* The address might not be aligned */ base = iova & VTD_PAGE_MASK; size = VTD_PAGE_ALIGN(size); - dma_pte_clear_range(domain, base, base + size); + dma_pte_clear_range(dmar_domain, base, base + size); - if (domain->max_addr == base + size) - domain->max_addr = base; + if (dmar_domain->max_addr == base + size) + dmar_domain->max_addr = base; } -EXPORT_SYMBOL_GPL(intel_iommu_unmap_address); int intel_iommu_found(void) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 9909c5a1b20f..6bc26e03858c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,10 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot); -void intel_iommu_unmap_address(struct dmar_domain *domain, - dma_addr_t iova, size_t size); u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova); #ifdef CONFIG_DMAR -- cgit 1.4.1 From d14d65777c2491dd5baf1e17f444b8f653f3cbb1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 15:06:57 +0100 Subject: VT-d: adapt domain iova_to_phys function for IOMMU API Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 7 ++++--- include/linux/intel-iommu.h | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 8af6c96f31b3..712810598a2e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3106,15 +3106,16 @@ int intel_iommu_found(void) } EXPORT_SYMBOL_GPL(intel_iommu_found); -u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova) +static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova) { + struct dmar_domain *dmar_domain = domain->priv; struct dma_pte *pte; u64 phys = 0; - pte = addr_to_dma_pte(domain, iova); + pte = addr_to_dma_pte(dmar_domain, iova); if (pte) phys = dma_pte_addr(pte); return phys; } -EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 6bc26e03858c..26ccc0294567 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,8 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova); - #ifdef CONFIG_DMAR int intel_iommu_found(void); #else /* CONFIG_DMAR */ -- cgit 1.4.1 From a8bcbb0de4a52f07fef7412ddc877348311ebf2a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 15:14:02 +0100 Subject: VT-d: register functions for the IOMMU API Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 712810598a2e..81e04ec85d97 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -277,6 +277,8 @@ static int intel_iommu_strict; static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); +static struct iommu_ops intel_iommu_ops; + static int __init intel_iommu_setup(char *str) { if (!str) @@ -2729,6 +2731,9 @@ int __init intel_iommu_init(void) init_timer(&unmap_timer); force_iommu = 1; dma_ops = &intel_dma_ops; + + register_iommu(&intel_iommu_ops); + return 0; } @@ -3119,3 +3124,13 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } + +static struct iommu_ops intel_iommu_ops = { + .domain_init = intel_iommu_domain_init, + .domain_destroy = intel_iommu_domain_destroy, + .attach_dev = intel_iommu_attach_device, + .detach_dev = intel_iommu_detach_device, + .map = intel_iommu_map_range, + .unmap = intel_iommu_unmap_range, + .iova_to_phys = intel_iommu_iova_to_phys, +}; -- cgit 1.4.1 From e4754c96cf8b82a754dc5ba791d6c0bf1fbe8e8e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 15:26:42 +0100 Subject: VT-d: remove now unused intel_iommu_found function Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 6 ------ include/linux/intel-iommu.h | 9 --------- 2 files changed, 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 81e04ec85d97..ecb5fd3b71f7 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3105,12 +3105,6 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, dmar_domain->max_addr = base; } -int intel_iommu_found(void) -{ - return g_num_of_iommus; -} -EXPORT_SYMBOL_GPL(intel_iommu_found); - static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, unsigned long iova) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 26ccc0294567..c4f6c101dbcd 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -330,15 +330,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -#ifdef CONFIG_DMAR -int intel_iommu_found(void); -#else /* CONFIG_DMAR */ -static inline int intel_iommu_found(void) -{ - return 0; -} -#endif /* CONFIG_DMAR */ - extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int); -- cgit 1.4.1 From cdc7b83726297b43deed0455d8732163cc59802a Mon Sep 17 00:00:00 2001 From: Mike Day Date: Fri, 12 Dec 2008 17:16:30 +0100 Subject: intel-iommu: fix bit shift at DOMAIN_FLAG_P2P_MULTIPLE_DEVICES Signed-off-by: Mike Day Signed-off-by: Joerg Roedel --- drivers/pci/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ecb5fd3b71f7..235fb7a5a8a5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -205,7 +205,7 @@ static inline bool dma_pte_present(struct dma_pte *pte) } /* devices under the same p2p bridge are owned in one domain */ -#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0) +#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) /* domain represents a virtual machine, more than one devices * across iommus may be owned in one domain, e.g. kvm guest. -- cgit 1.4.1 From adf094931ffb25ef4b381559918f1a34181a5273 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 6 Oct 2008 22:46:05 +0200 Subject: PM: Simplify the new suspend/hibernation framework for devices PM: Simplify the new suspend/hibernation framework for devices Following the discussion at the Kernel Summit, simplify the new device PM framework by merging 'struct pm_ops' and 'struct pm_ext_ops' and removing pointers to 'struct pm_ext_ops' from 'struct platform_driver' and 'struct pci_driver'. After this change, the suspend/hibernation callbacks will only reside in 'struct device_driver' as well as at the bus type/ device class/device type level. Accordingly, PCI and platform device drivers are now expected to put their suspend/hibernation callbacks into the 'struct device_driver' embedded in 'struct pci_driver' or 'struct platform_driver', respectively. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 115 +++++++++++++++++++++------------------- drivers/base/power/main.c | 19 +++---- drivers/pci/pci-driver.c | 46 +++++++--------- drivers/usb/core/usb.c | 4 +- include/linux/device.h | 8 +-- include/linux/pci.h | 1 - include/linux/platform_device.h | 1 - include/linux/pm.h | 76 +++++++++----------------- 8 files changed, 119 insertions(+), 151 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index dfcbfe504867..6c743b6008d9 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -503,8 +503,6 @@ int platform_driver_register(struct platform_driver *drv) drv->driver.suspend = platform_drv_suspend; if (drv->resume) drv->driver.resume = platform_drv_resume; - if (drv->pm) - drv->driver.pm = &drv->pm->base; return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(platform_driver_register); @@ -686,7 +684,10 @@ static int platform_pm_suspend(struct device *dev) struct device_driver *drv = dev->driver; int ret = 0; - if (drv && drv->pm) { + if (!drv) + return 0; + + if (drv->pm) { if (drv->pm->suspend) ret = drv->pm->suspend(dev); } else { @@ -698,16 +699,15 @@ static int platform_pm_suspend(struct device *dev) static int platform_pm_suspend_noirq(struct device *dev) { - struct platform_driver *pdrv; + struct device_driver *drv = dev->driver; int ret = 0; - if (!dev->driver) + if (!drv) return 0; - pdrv = to_platform_driver(dev->driver); - if (pdrv->pm) { - if (pdrv->pm->suspend_noirq) - ret = pdrv->pm->suspend_noirq(dev); + if (drv->pm) { + if (drv->pm->suspend_noirq) + ret = drv->pm->suspend_noirq(dev); } else { ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND); } @@ -720,7 +720,10 @@ static int platform_pm_resume(struct device *dev) struct device_driver *drv = dev->driver; int ret = 0; - if (drv && drv->pm) { + if (!drv) + return 0; + + if (drv->pm) { if (drv->pm->resume) ret = drv->pm->resume(dev); } else { @@ -732,16 +735,15 @@ static int platform_pm_resume(struct device *dev) static int platform_pm_resume_noirq(struct device *dev) { - struct platform_driver *pdrv; + struct device_driver *drv = dev->driver; int ret = 0; - if (!dev->driver) + if (!drv) return 0; - pdrv = to_platform_driver(dev->driver); - if (pdrv->pm) { - if (pdrv->pm->resume_noirq) - ret = pdrv->pm->resume_noirq(dev); + if (drv->pm) { + if (drv->pm->resume_noirq) + ret = drv->pm->resume_noirq(dev); } else { ret = platform_legacy_resume_early(dev); } @@ -780,16 +782,15 @@ static int platform_pm_freeze(struct device *dev) static int platform_pm_freeze_noirq(struct device *dev) { - struct platform_driver *pdrv; + struct device_driver *drv = dev->driver; int ret = 0; - if (!dev->driver) + if (!drv) return 0; - pdrv = to_platform_driver(dev->driver); - if (pdrv->pm) { - if (pdrv->pm->freeze_noirq) - ret = pdrv->pm->freeze_noirq(dev); + if (drv->pm) { + if (drv->pm->freeze_noirq) + ret = drv->pm->freeze_noirq(dev); } else { ret = platform_legacy_suspend_late(dev, PMSG_FREEZE); } @@ -802,7 +803,10 @@ static int platform_pm_thaw(struct device *dev) struct device_driver *drv = dev->driver; int ret = 0; - if (drv && drv->pm) { + if (!drv) + return 0; + + if (drv->pm) { if (drv->pm->thaw) ret = drv->pm->thaw(dev); } else { @@ -814,16 +818,15 @@ static int platform_pm_thaw(struct device *dev) static int platform_pm_thaw_noirq(struct device *dev) { - struct platform_driver *pdrv; + struct device_driver *drv = dev->driver; int ret = 0; - if (!dev->driver) + if (!drv) return 0; - pdrv = to_platform_driver(dev->driver); - if (pdrv->pm) { - if (pdrv->pm->thaw_noirq) - ret = pdrv->pm->thaw_noirq(dev); + if (drv->pm) { + if (drv->pm->thaw_noirq) + ret = drv->pm->thaw_noirq(dev); } else { ret = platform_legacy_resume_early(dev); } @@ -836,7 +839,10 @@ static int platform_pm_poweroff(struct device *dev) struct device_driver *drv = dev->driver; int ret = 0; - if (drv && drv->pm) { + if (!drv) + return 0; + + if (drv->pm) { if (drv->pm->poweroff) ret = drv->pm->poweroff(dev); } else { @@ -848,16 +854,15 @@ static int platform_pm_poweroff(struct device *dev) static int platform_pm_poweroff_noirq(struct device *dev) { - struct platform_driver *pdrv; + struct device_driver *drv = dev->driver; int ret = 0; - if (!dev->driver) + if (!drv) return 0; - pdrv = to_platform_driver(dev->driver); - if (pdrv->pm) { - if (pdrv->pm->poweroff_noirq) - ret = pdrv->pm->poweroff_noirq(dev); + if (drv->pm) { + if (drv->pm->poweroff_noirq) + ret = drv->pm->poweroff_noirq(dev); } else { ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE); } @@ -870,7 +875,10 @@ static int platform_pm_restore(struct device *dev) struct device_driver *drv = dev->driver; int ret = 0; - if (drv && drv->pm) { + if (!drv) + return 0; + + if (drv->pm) { if (drv->pm->restore) ret = drv->pm->restore(dev); } else { @@ -882,16 +890,15 @@ static int platform_pm_restore(struct device *dev) static int platform_pm_restore_noirq(struct device *dev) { - struct platform_driver *pdrv; + struct device_driver *drv = dev->driver; int ret = 0; - if (!dev->driver) + if (!drv) return 0; - pdrv = to_platform_driver(dev->driver); - if (pdrv->pm) { - if (pdrv->pm->restore_noirq) - ret = pdrv->pm->restore_noirq(dev); + if (drv->pm) { + if (drv->pm->restore_noirq) + ret = drv->pm->restore_noirq(dev); } else { ret = platform_legacy_resume_early(dev); } @@ -912,17 +919,15 @@ static int platform_pm_restore_noirq(struct device *dev) #endif /* !CONFIG_HIBERNATION */ -static struct pm_ext_ops platform_pm_ops = { - .base = { - .prepare = platform_pm_prepare, - .complete = platform_pm_complete, - .suspend = platform_pm_suspend, - .resume = platform_pm_resume, - .freeze = platform_pm_freeze, - .thaw = platform_pm_thaw, - .poweroff = platform_pm_poweroff, - .restore = platform_pm_restore, - }, +static struct dev_pm_ops platform_dev_pm_ops = { + .prepare = platform_pm_prepare, + .complete = platform_pm_complete, + .suspend = platform_pm_suspend, + .resume = platform_pm_resume, + .freeze = platform_pm_freeze, + .thaw = platform_pm_thaw, + .poweroff = platform_pm_poweroff, + .restore = platform_pm_restore, .suspend_noirq = platform_pm_suspend_noirq, .resume_noirq = platform_pm_resume_noirq, .freeze_noirq = platform_pm_freeze_noirq, @@ -931,7 +936,7 @@ static struct pm_ext_ops platform_pm_ops = { .restore_noirq = platform_pm_restore_noirq, }; -#define PLATFORM_PM_OPS_PTR &platform_pm_ops +#define PLATFORM_PM_OPS_PTR (&platform_dev_pm_ops) #else /* !CONFIG_PM_SLEEP */ diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 692c20ba5144..a8e4dcbcaf7a 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -112,7 +112,8 @@ void device_pm_remove(struct device *dev) * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. */ -static int pm_op(struct device *dev, struct pm_ops *ops, pm_message_t state) +static int pm_op(struct device *dev, struct dev_pm_ops *ops, + pm_message_t state) { int error = 0; @@ -174,7 +175,7 @@ static int pm_op(struct device *dev, struct pm_ops *ops, pm_message_t state) * The operation is executed with interrupts disabled by the only remaining * functional CPU in the system. */ -static int pm_noirq_op(struct device *dev, struct pm_ext_ops *ops, +static int pm_noirq_op(struct device *dev, struct dev_pm_ops *ops, pm_message_t state) { int error = 0; @@ -354,7 +355,7 @@ static int resume_device(struct device *dev, pm_message_t state) if (dev->bus) { if (dev->bus->pm) { pm_dev_dbg(dev, state, ""); - error = pm_op(dev, &dev->bus->pm->base, state); + error = pm_op(dev, dev->bus->pm, state); } else if (dev->bus->resume) { pm_dev_dbg(dev, state, "legacy "); error = dev->bus->resume(dev); @@ -451,9 +452,9 @@ static void complete_device(struct device *dev, pm_message_t state) dev->type->pm->complete(dev); } - if (dev->bus && dev->bus->pm && dev->bus->pm->base.complete) { + if (dev->bus && dev->bus->pm && dev->bus->pm->complete) { pm_dev_dbg(dev, state, "completing "); - dev->bus->pm->base.complete(dev); + dev->bus->pm->complete(dev); } up(&dev->sem); @@ -624,7 +625,7 @@ static int suspend_device(struct device *dev, pm_message_t state) if (dev->bus) { if (dev->bus->pm) { pm_dev_dbg(dev, state, ""); - error = pm_op(dev, &dev->bus->pm->base, state); + error = pm_op(dev, dev->bus->pm, state); } else if (dev->bus->suspend) { pm_dev_dbg(dev, state, "legacy "); error = dev->bus->suspend(dev, state); @@ -685,10 +686,10 @@ static int prepare_device(struct device *dev, pm_message_t state) down(&dev->sem); - if (dev->bus && dev->bus->pm && dev->bus->pm->base.prepare) { + if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) { pm_dev_dbg(dev, state, "preparing "); - error = dev->bus->pm->base.prepare(dev); - suspend_report_result(dev->bus->pm->base.prepare, error); + error = dev->bus->pm->prepare(dev); + suspend_report_result(dev->bus->pm->prepare, error); if (error) goto End; } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index b4cdd690ae71..4042d211c3e5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -433,8 +433,7 @@ static int pci_pm_suspend(struct device *dev) static int pci_pm_suspend_noirq(struct device *dev) { - struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; + struct device_driver *drv = dev->driver; int error = 0; if (drv && drv->pm) { @@ -469,11 +468,10 @@ static int pci_pm_resume(struct device *dev) static int pci_pm_resume_noirq(struct device *dev) { - struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; + struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_resume_early, pci_dev); + pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); if (drv && drv->pm) { if (drv->pm->resume_noirq) @@ -519,8 +517,7 @@ static int pci_pm_freeze(struct device *dev) static int pci_pm_freeze_noirq(struct device *dev) { - struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; + struct device_driver *drv = dev->driver; int error = 0; if (drv && drv->pm) { @@ -553,15 +550,14 @@ static int pci_pm_thaw(struct device *dev) static int pci_pm_thaw_noirq(struct device *dev) { - struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; + struct device_driver *drv = dev->driver; int error = 0; if (drv && drv->pm) { if (drv->pm->thaw_noirq) error = drv->pm->thaw_noirq(dev); } else { - pci_fixup_device(pci_fixup_resume_early, pci_dev); + pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); error = pci_legacy_resume_early(dev); } @@ -589,8 +585,7 @@ static int pci_pm_poweroff(struct device *dev) static int pci_pm_poweroff_noirq(struct device *dev) { - struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; + struct device_driver *drv = dev->driver; int error = 0; if (drv && drv->pm) { @@ -625,7 +620,7 @@ static int pci_pm_restore(struct device *dev) static int pci_pm_restore_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct pci_driver *drv = pci_dev->driver; + struct device_driver *drv = dev->driver; int error = 0; pci_fixup_device(pci_fixup_resume, pci_dev); @@ -654,17 +649,15 @@ static int pci_pm_restore_noirq(struct device *dev) #endif /* !CONFIG_HIBERNATION */ -struct pm_ext_ops pci_pm_ops = { - .base = { - .prepare = pci_pm_prepare, - .complete = pci_pm_complete, - .suspend = pci_pm_suspend, - .resume = pci_pm_resume, - .freeze = pci_pm_freeze, - .thaw = pci_pm_thaw, - .poweroff = pci_pm_poweroff, - .restore = pci_pm_restore, - }, +struct dev_pm_ops pci_dev_pm_ops = { + .prepare = pci_pm_prepare, + .complete = pci_pm_complete, + .suspend = pci_pm_suspend, + .resume = pci_pm_resume, + .freeze = pci_pm_freeze, + .thaw = pci_pm_thaw, + .poweroff = pci_pm_poweroff, + .restore = pci_pm_restore, .suspend_noirq = pci_pm_suspend_noirq, .resume_noirq = pci_pm_resume_noirq, .freeze_noirq = pci_pm_freeze_noirq, @@ -673,7 +666,7 @@ struct pm_ext_ops pci_pm_ops = { .restore_noirq = pci_pm_restore_noirq, }; -#define PCI_PM_OPS_PTR &pci_pm_ops +#define PCI_PM_OPS_PTR (&pci_dev_pm_ops) #else /* !CONFIG_PM_SLEEP */ @@ -703,9 +696,6 @@ int __pci_register_driver(struct pci_driver *drv, struct module *owner, drv->driver.owner = owner; drv->driver.mod_name = mod_name; - if (drv->pm) - drv->driver.pm = &drv->pm->base; - spin_lock_init(&drv->dynids.lock); INIT_LIST_HEAD(&drv->dynids.list); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index be1fa0723f2c..399e15fc5052 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -286,7 +286,7 @@ static int usb_dev_restore(struct device *dev) return usb_resume(dev); } -static struct pm_ops usb_device_pm_ops = { +static struct dev_pm_ops usb_device_pm_ops = { .prepare = usb_dev_prepare, .complete = usb_dev_complete, .suspend = usb_dev_suspend, @@ -301,7 +301,7 @@ static struct pm_ops usb_device_pm_ops = { #define ksuspend_usb_init() 0 #define ksuspend_usb_cleanup() do {} while (0) -#define usb_device_pm_ops (*(struct pm_ops *)0) +#define usb_device_pm_ops (*(struct dev_pm_ops *)0) #endif /* CONFIG_PM */ diff --git a/include/linux/device.h b/include/linux/device.h index 1a3686d15f98..4a520051c315 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -65,7 +65,7 @@ struct bus_type { int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); - struct pm_ext_ops *pm; + struct dev_pm_ops *pm; struct bus_type_private *p; }; @@ -133,7 +133,7 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; - struct pm_ops *pm; + struct dev_pm_ops *pm; struct driver_private *p; }; @@ -198,7 +198,7 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct pm_ops *pm; + struct dev_pm_ops *pm; struct class_private *p; }; @@ -291,7 +291,7 @@ struct device_type { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); - struct pm_ops *pm; + struct dev_pm_ops *pm; }; /* interface for exporting device attributes */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 03b0b8c3c81b..4bb156ba854a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -421,7 +421,6 @@ struct pci_driver { int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - struct pm_ext_ops *pm; struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 4b8cc6a32479..9a342699c607 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -55,7 +55,6 @@ struct platform_driver { int (*suspend_late)(struct platform_device *, pm_message_t state); int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); - struct pm_ext_ops *pm; struct device_driver driver; }; diff --git a/include/linux/pm.h b/include/linux/pm.h index 42de4003c4ee..5785666d0cc4 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -41,7 +41,7 @@ typedef struct pm_message { } pm_message_t; /** - * struct pm_ops - device PM callbacks + * struct dev_pm_ops - device PM callbacks * * Several driver power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) @@ -126,46 +126,6 @@ typedef struct pm_message { * On most platforms, there are no restrictions on availability of * resources like clocks during @restore(). * - * All of the above callbacks, except for @complete(), return error codes. - * However, the error codes returned by the resume operations, @resume(), - * @thaw(), and @restore(), do not cause the PM core to abort the resume - * transition during which they are returned. The error codes returned in - * that cases are only printed by the PM core to the system logs for debugging - * purposes. Still, it is recommended that drivers only return error codes - * from their resume methods in case of an unrecoverable failure (i.e. when the - * device being handled refuses to resume and becomes unusable) to allow us to - * modify the PM core in the future, so that it can avoid attempting to handle - * devices that failed to resume and their children. - * - * It is allowed to unregister devices while the above callbacks are being - * executed. However, it is not allowed to unregister a device from within any - * of its own callbacks. - */ - -struct pm_ops { - int (*prepare)(struct device *dev); - void (*complete)(struct device *dev); - int (*suspend)(struct device *dev); - int (*resume)(struct device *dev); - int (*freeze)(struct device *dev); - int (*thaw)(struct device *dev); - int (*poweroff)(struct device *dev); - int (*restore)(struct device *dev); -}; - -/** - * struct pm_ext_ops - extended device PM callbacks - * - * Some devices require certain operations related to suspend and hibernation - * to be carried out with interrupts disabled. Thus, 'struct pm_ext_ops' below - * is defined, adding callbacks to be executed with interrupts disabled to - * 'struct pm_ops'. - * - * The following callbacks included in 'struct pm_ext_ops' are executed with - * the nonboot CPUs switched off and with interrupts disabled on the only - * functional CPU. They also are executed with the PM core list of devices - * locked, so they must NOT unregister any devices. - * * @suspend_noirq: Complete the operations of ->suspend() by carrying out any * actions required for suspending the device that need interrupts to be * disabled @@ -190,18 +150,32 @@ struct pm_ops { * actions required for restoring the operations of the device that need * interrupts to be disabled * - * All of the above callbacks return error codes, but the error codes returned - * by the resume operations, @resume_noirq(), @thaw_noirq(), and - * @restore_noirq(), do not cause the PM core to abort the resume transition - * during which they are returned. The error codes returned in that cases are - * only printed by the PM core to the system logs for debugging purposes. - * Still, as stated above, it is recommended that drivers only return error - * codes from their resume methods if the device being handled fails to resume - * and is not usable any more. + * All of the above callbacks, except for @complete(), return error codes. + * However, the error codes returned by the resume operations, @resume(), + * @thaw(), @restore(), @resume_noirq(), @thaw_noirq(), and @restore_noirq() do + * not cause the PM core to abort the resume transition during which they are + * returned. The error codes returned in that cases are only printed by the PM + * core to the system logs for debugging purposes. Still, it is recommended + * that drivers only return error codes from their resume methods in case of an + * unrecoverable failure (i.e. when the device being handled refuses to resume + * and becomes unusable) to allow us to modify the PM core in the future, so + * that it can avoid attempting to handle devices that failed to resume and + * their children. + * + * It is allowed to unregister devices while the above callbacks are being + * executed. However, it is not allowed to unregister a device from within any + * of its own callbacks. */ -struct pm_ext_ops { - struct pm_ops base; +struct dev_pm_ops { + int (*prepare)(struct device *dev); + void (*complete)(struct device *dev); + int (*suspend)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*restore)(struct device *dev); int (*suspend_noirq)(struct device *dev); int (*resume_noirq)(struct device *dev); int (*freeze_noirq)(struct device *dev); -- cgit 1.4.1 From 355a72d75b3b4f4877db4c9070c798238028ecb5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 8 Dec 2008 00:34:57 +0100 Subject: PCI: Rework default handling of suspend and resume Rework the handling of suspend and resume of PCI devices which have no drivers or the drivers of which do not provide any suspend-resume callbacks in such a way that their standard PCI configuration registers will be saved and restored with interrupts disabled. This should prevent such devices, including PCI bridges, from being resumed too late to be able to function correctly during the resume of the other PCI devices that may depend on them. Also, to remove one possible source of future confusion, drop the default handling of suspend and resume for PCI devices with drivers providing the 'pm' object introduced by the new suspend-resume framework (there are no such PCI drivers at the moment). This patch addresses the regression from 2.6.26 tracked as http://bugzilla.kernel.org/show_bug.cgi?id=12121 . Signed-off-by: Rafael J. Wysocki Cc: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 94 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 63 insertions(+), 31 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 4042d211c3e5..99d867bcf22a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -300,6 +300,14 @@ static void pci_device_shutdown(struct device *dev) #ifdef CONFIG_PM_SLEEP +static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) +{ + struct pci_driver *drv = pci_dev->driver; + + return drv && (drv->suspend || drv->suspend_late || drv->resume + || drv->resume_early); +} + /* * Default "suspend" method for devices that have no driver provided suspend, * or not even a driver at all. @@ -317,14 +325,22 @@ static void pci_default_pm_suspend(struct pci_dev *pci_dev) /* * Default "resume" method for devices that have no driver provided resume, - * or not even a driver at all. + * or not even a driver at all (first part). */ -static int pci_default_pm_resume(struct pci_dev *pci_dev) +static void pci_default_pm_resume_early(struct pci_dev *pci_dev) { - int retval = 0; - /* restore the PCI config space */ pci_restore_state(pci_dev); +} + +/* + * Default "resume" method for devices that have no driver provided resume, + * or not even a driver at all (second part). + */ +static int pci_default_pm_resume_late(struct pci_dev *pci_dev) +{ + int retval; + /* if the device was enabled before suspend, reenable */ retval = pci_reenable_device(pci_dev); /* @@ -371,10 +387,12 @@ static int pci_legacy_resume(struct device *dev) struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - if (drv && drv->resume) + if (drv && drv->resume) { error = drv->resume(pci_dev); - else - error = pci_default_pm_resume(pci_dev); + } else { + pci_default_pm_resume_early(pci_dev); + error = pci_default_pm_resume_late(pci_dev); + } return error; } @@ -420,10 +438,8 @@ static int pci_pm_suspend(struct device *dev) if (drv->pm->suspend) { error = drv->pm->suspend(dev); suspend_report_result(drv->pm->suspend, error); - } else { - pci_default_pm_suspend(pci_dev); } - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_SUSPEND); } pci_fixup_device(pci_fixup_suspend, pci_dev); @@ -433,6 +449,7 @@ static int pci_pm_suspend(struct device *dev) static int pci_pm_suspend_noirq(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; @@ -441,8 +458,10 @@ static int pci_pm_suspend_noirq(struct device *dev) error = drv->pm->suspend_noirq(dev); suspend_report_result(drv->pm->suspend_noirq, error); } - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend_late(dev, PMSG_SUSPEND); + } else { + pci_default_pm_suspend(pci_dev); } return error; @@ -452,15 +471,17 @@ static int pci_pm_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; - int error; + int error = 0; pci_fixup_device(pci_fixup_resume, pci_dev); if (drv && drv->pm) { - error = drv->pm->resume ? drv->pm->resume(dev) : - pci_default_pm_resume(pci_dev); - } else { + if (drv->pm->resume) + error = drv->pm->resume(dev); + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_resume(dev); + } else { + error = pci_default_pm_resume_late(pci_dev); } return error; @@ -468,6 +489,7 @@ static int pci_pm_resume(struct device *dev) static int pci_pm_resume_noirq(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; @@ -476,8 +498,10 @@ static int pci_pm_resume_noirq(struct device *dev) if (drv && drv->pm) { if (drv->pm->resume_noirq) error = drv->pm->resume_noirq(dev); - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_resume_early(dev); + } else { + pci_default_pm_resume_early(pci_dev); } return error; @@ -504,10 +528,8 @@ static int pci_pm_freeze(struct device *dev) if (drv->pm->freeze) { error = drv->pm->freeze(dev); suspend_report_result(drv->pm->freeze, error); - } else { - pci_default_pm_suspend(pci_dev); } - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_FREEZE); pci_fixup_device(pci_fixup_suspend, pci_dev); } @@ -517,6 +539,7 @@ static int pci_pm_freeze(struct device *dev) static int pci_pm_freeze_noirq(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; @@ -525,8 +548,10 @@ static int pci_pm_freeze_noirq(struct device *dev) error = drv->pm->freeze_noirq(dev); suspend_report_result(drv->pm->freeze_noirq, error); } - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend_late(dev, PMSG_FREEZE); + } else { + pci_default_pm_suspend(pci_dev); } return error; @@ -534,14 +559,15 @@ static int pci_pm_freeze_noirq(struct device *dev) static int pci_pm_thaw(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (drv && drv->pm) { if (drv->pm->thaw) error = drv->pm->thaw(dev); - } else { - pci_fixup_device(pci_fixup_resume, to_pci_dev(dev)); + } else if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume, pci_dev); error = pci_legacy_resume(dev); } @@ -550,13 +576,14 @@ static int pci_pm_thaw(struct device *dev) static int pci_pm_thaw_noirq(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (drv && drv->pm) { if (drv->pm->thaw_noirq) error = drv->pm->thaw_noirq(dev); - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); error = pci_legacy_resume_early(dev); } @@ -566,17 +593,18 @@ static int pci_pm_thaw_noirq(struct device *dev) static int pci_pm_poweroff(struct device *dev) { + struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_suspend, to_pci_dev(dev)); + pci_fixup_device(pci_fixup_suspend, pci_dev); if (drv && drv->pm) { if (drv->pm->poweroff) { error = drv->pm->poweroff(dev); suspend_report_result(drv->pm->poweroff, error); } - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_HIBERNATE); } @@ -593,7 +621,7 @@ static int pci_pm_poweroff_noirq(struct device *dev) error = drv->pm->poweroff_noirq(dev); suspend_report_result(drv->pm->poweroff_noirq, error); } - } else { + } else if (pci_has_legacy_pm_support(to_pci_dev(dev))) { error = pci_legacy_suspend_late(dev, PMSG_HIBERNATE); } @@ -604,13 +632,15 @@ static int pci_pm_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; - int error; + int error = 0; if (drv && drv->pm) { - error = drv->pm->restore ? drv->pm->restore(dev) : - pci_default_pm_resume(pci_dev); - } else { + if (drv->pm->restore) + error = drv->pm->restore(dev); + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_resume(dev); + } else { + error = pci_default_pm_resume_late(pci_dev); } pci_fixup_device(pci_fixup_resume, pci_dev); @@ -628,8 +658,10 @@ static int pci_pm_restore_noirq(struct device *dev) if (drv && drv->pm) { if (drv->pm->restore_noirq) error = drv->pm->restore_noirq(dev); - } else { + } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_resume_early(dev); + } else { + pci_default_pm_resume_early(pci_dev); } pci_fixup_device(pci_fixup_resume_early, pci_dev); -- cgit 1.4.1 From 9eff02e2042f96fb2aedd02e032eca1c5333d767 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 24 Oct 2008 10:32:33 -0700 Subject: PCI: check mmap range of /proc/bus/pci files too /proc/bus/pci allows you to mmap resource ranges too, so we should probably be checking to make sure the mapping is somewhat valid. Uses the same code as the recent sysfs mmap range checking patch from Linus. Acked-by: David Miller Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 2 +- drivers/pci/pci.h | 4 ++++ drivers/pci/proc.c | 11 ++++++++++- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index c88485860a0a..388440e0d222 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -569,7 +569,7 @@ void pci_remove_legacy_files(struct pci_bus *b) #ifdef HAVE_PCI_MMAP -static int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) +int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) { unsigned long nr, start, size; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9de87e9f98f5..d3e65e29df51 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -10,6 +10,10 @@ extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env); extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern void pci_cleanup_rom(struct pci_dev *dev); +#ifdef HAVE_PCI_MMAP +extern int pci_mmap_fits(struct pci_dev *pdev, int resno, + struct vm_area_struct *vma); +#endif /** * Firmware PM callbacks diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index e1098c302c45..7fb086d39617 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -252,11 +252,20 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) const struct proc_dir_entry *dp = PDE(inode); struct pci_dev *dev = dp->data; struct pci_filp_private *fpriv = file->private_data; - int ret; + int i, ret; if (!capable(CAP_SYS_RAWIO)) return -EPERM; + /* Make sure the caller is mapping a real resource for this device */ + for (i = 0; i < PCI_ROM_RESOURCE; i++) { + if (pci_mmap_fits(dev, i, vma)) + break; + } + + if (i >= PCI_ROM_RESOURCE) + return -ENODEV; + ret = pci_mmap_page_range(dev, vma, fpriv->mmap_state, fpriv->write_combine); -- cgit 1.4.1 From c7b4fee3808a061ee0e704ba596ace56bf65a83d Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 24 Oct 2008 14:26:35 +0900 Subject: PCI hotplug: pciehp: remove unnecessary wait after turning power off The pciehp driver waits for 1000 msec after turning power off to make sure the power has been completely removed. But this 1000 msec wait is not needed if a slot doesn't implement power control because software cannot control the power. Power will be automatically removed at adapter removal time on such a slot Tested-by: "Phil Endecott" Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_ctrl.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index fead63c6b49e..ff4034502d24 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -178,15 +178,14 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot) "Issue of Slot Power Off command failed\n"); return; } + /* + * After turning power off, we must wait for at least 1 second + * before taking any action that relies on power having been + * removed from the slot/adapter. + */ + msleep(1000); } - /* - * After turning power off, we must wait for at least 1 second - * before taking any action that relies on power having been - * removed from the slot/adapter. - */ - msleep(1000); - if (PWR_LED(ctrl)) pslot->hpc_ops->green_led_off(pslot); @@ -286,15 +285,14 @@ static int remove_board(struct slot *p_slot) "Issue of Slot Disable command failed\n"); return retval; } + /* + * After turning power off, we must wait for at least 1 second + * before taking any action that relies on power having been + * removed from the slot/adapter. + */ + msleep(1000); } - /* - * After turning power off, we must wait for at least 1 second - * before taking any action that relies on power having been - * removed from the slot/adapter. - */ - msleep(1000); - if (PWR_LED(ctrl)) /* turn off Green LED */ p_slot->hpc_ops->green_led_off(p_slot); -- cgit 1.4.1 From bfb0f330a6c833fd12c35b907434256b4211a1dc Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 27 Oct 2008 17:50:21 -0700 Subject: PCI: fixup whitespace in quirks.c Had a space before tab in do_fixups, prototype wasn't wrapped properly either. Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ce0985615133..e915a17b36c3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2074,11 +2074,12 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x4375, #endif /* CONFIG_PCI_MSI */ -static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) +static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, + struct pci_fixup *end) { while (f < end) { if ((f->vendor == dev->vendor || f->vendor == (u16) PCI_ANY_ID) && - (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) { + (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) { dev_dbg(&dev->dev, "calling %pF\n", f->hook); f->hook(dev); } -- cgit 1.4.1 From 1a9271331ab663f3c7cda78d86b884f2ea86d4d7 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 30 Oct 2008 02:17:49 +0100 Subject: PCI: struct device - replace bus_id with dev_name(), dev_set_name() This patch is part of a larger patch series which will remove the "char bus_id[20]" name string from struct device. The device name is managed in the kobject anyway, and without any size limitation, and just needlessly copied into "struct device". To set and read the device name dev_name(dev) and dev_set_name(dev) must be used. If your code uses static kobjects, which it shouldn't do, "const char *init_name" can be used to statically provide the name the registered device should have. At registration time, the init_name field is cleared, to enforce the use of dev_name(dev) to access the device name at a later time. We need to get rid of all occurrences of bus_id in the entire tree to be able to enable the new interface. Please apply this patch, and possibly convert any remaining remaining occurrences of bus_id. Acked-by: Greg Kroah-Hartman Signed-Off-By: Kay Sievers Signed-off-by: Jesse Barnes --- arch/x86/kernel/pci-dma.c | 2 +- drivers/pci/hotplug/acpiphp_ibm.c | 2 +- drivers/pci/irq.c | 2 +- drivers/pci/pci-acpi.c | 2 +- drivers/pci/pcie/portdrv_core.c | 2 +- drivers/pci/probe.c | 6 +++--- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 19a1044a0cd9..b25428533141 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -38,7 +38,7 @@ EXPORT_SYMBOL(bad_dma_address); be probably a smaller DMA mask, but this is bug-to-bug compatible to older i386. */ struct device x86_dma_fallback_dev = { - .bus_id = "fallback device", + .init_name = "fallback device", .coherent_dma_mask = DMA_32BIT_MASK, .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, }; diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c index 881fdd2b7313..5befa7e379b7 100644 --- a/drivers/pci/hotplug/acpiphp_ibm.c +++ b/drivers/pci/hotplug/acpiphp_ibm.c @@ -271,7 +271,7 @@ static void ibm_handle_events(acpi_handle handle, u32 event, void *context) dbg("%s: generationg bus event\n", __func__); acpi_bus_generate_proc_event(note->device, note->event, detail); acpi_bus_generate_netlink_event(note->device->pnp.device_class, - note->device->dev.bus_id, + dev_name(¬e->device->dev), note->event, detail); } else note->event = event; diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c index 6441dfa969a3..de01174aff06 100644 --- a/drivers/pci/irq.c +++ b/drivers/pci/irq.c @@ -15,7 +15,7 @@ static void pci_note_irq_problem(struct pci_dev *pdev, const char *reason) dev_printk(KERN_ERR, &pdev->dev, "Potentially misrouted IRQ (Bridge %s %04x:%04x)\n", - parent->dev.bus_id, parent->vendor, parent->device); + dev_name(&parent->dev), parent->vendor, parent->device); dev_printk(KERN_ERR, &pdev->dev, "%s\n", reason); dev_printk(KERN_ERR, &pdev->dev, "Please report to linux-kernel@vger.kernel.org\n"); WARN_ON(1); diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index ae5ec76dca77..2ed3f10d0860 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -375,7 +375,7 @@ static int acpi_pci_find_root_bridge(struct device *dev, acpi_handle *handle) * The string should be the same as root bridge's name * Please look at 'pci_scan_bus_parented' */ - num = sscanf(dev->bus_id, "pci%04x:%02x", &seg, &bus); + num = sscanf(dev_name(dev), "pci%04x:%02x", &seg, &bus); if (num != 2) return -ENODEV; *handle = acpi_get_pci_rootbridge_handle(seg, bus); diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 2e091e014829..75f501ab6468 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -224,7 +224,7 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, device->driver = NULL; device->driver_data = NULL; device->release = release_pcie_device; /* callback to free pcie dev */ - snprintf(device->bus_id, sizeof(device->bus_id), "%s:pcie%02x", + dev_set_name(device, "%s:pcie%02x", pci_name(parent), get_descriptor_id(port_type, service_type)); device->parent = &parent->dev; } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5b3f5937ecf5..eb2b985beb48 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -412,7 +412,7 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent, * registered later in pci_bus_add_devices() */ child->dev.class = &pcibus_class; - sprintf(child->dev.bus_id, "%04x:%02x", pci_domain_nr(child), busnr); + dev_set_name(&child->dev, "%04x:%02x", pci_domain_nr(child), busnr); /* * Set up the primary, secondary and subordinate @@ -1130,7 +1130,7 @@ struct pci_bus * pci_create_bus(struct device *parent, memset(dev, 0, sizeof(*dev)); dev->parent = parent; dev->release = pci_release_bus_bridge_dev; - sprintf(dev->bus_id, "pci%04x:%02x", pci_domain_nr(b), bus); + dev_set_name(dev, "pci%04x:%02x", pci_domain_nr(b), bus); error = device_register(dev); if (error) goto dev_reg_err; @@ -1141,7 +1141,7 @@ struct pci_bus * pci_create_bus(struct device *parent, b->dev.class = &pcibus_class; b->dev.parent = b->bridge; - sprintf(b->dev.bus_id, "%04x:%02x", pci_domain_nr(b), bus); + dev_set_name(&b->dev, "%04x:%02x", pci_domain_nr(b), bus); error = device_register(&b->dev); if (error) goto class_dev_reg_err; -- cgit 1.4.1 From d91cdc745524a1b1ff537712a62803b8413c12d6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 11 Nov 2008 17:17:47 +0800 Subject: PCI: Refactor pci_reset_function() Separate out function level reset so that pci_reset_function can be more easily extended. Signed-off-by: Sheng Yang Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 66 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 26 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 061d1ee0046a..62978f644a92 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1751,24 +1751,7 @@ int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask) EXPORT_SYMBOL(pci_set_dma_seg_boundary); #endif -/** - * pci_execute_reset_function() - Reset a PCI device function - * @dev: Device function to reset - * - * Some devices allow an individual function to be reset without affecting - * other functions in the same device. The PCI device must be responsive - * to PCI config space in order to use this function. - * - * The device function is presumed to be unused when this function is called. - * Resetting the device will make the contents of PCI configuration space - * random, so any caller of this must be prepared to reinitialise the - * device including MSI, bus mastering, BARs, decoding IO and memory spaces, - * etc. - * - * Returns 0 if the device function was successfully reset or -ENOTTY if the - * device doesn't support resetting a single function. - */ -int pci_execute_reset_function(struct pci_dev *dev) +static int __pcie_flr(struct pci_dev *dev, int probe) { u16 status; u32 cap; @@ -1780,6 +1763,9 @@ int pci_execute_reset_function(struct pci_dev *dev) if (!(cap & PCI_EXP_DEVCAP_FLR)) return -ENOTTY; + if (probe) + return 0; + pci_block_user_cfg_access(dev); /* Wait for Transaction Pending bit clean */ @@ -1802,6 +1788,39 @@ int pci_execute_reset_function(struct pci_dev *dev) pci_unblock_user_cfg_access(dev); return 0; } + +static int __pci_reset_function(struct pci_dev *pdev, int probe) +{ + int res; + + res = __pcie_flr(pdev, probe); + if (res != -ENOTTY) + return res; + + return res; +} + +/** + * pci_execute_reset_function() - Reset a PCI device function + * @dev: Device function to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device. The PCI device must be responsive + * to PCI config space in order to use this function. + * + * The device function is presumed to be unused when this function is called. + * Resetting the device will make the contents of PCI configuration space + * random, so any caller of this must be prepared to reinitialise the + * device including MSI, bus mastering, BARs, decoding IO and memory spaces, + * etc. + * + * Returns 0 if the device function was successfully reset or -ENOTTY if the + * device doesn't support resetting a single function. + */ +int pci_execute_reset_function(struct pci_dev *dev) +{ + return __pci_reset_function(dev, 0); +} EXPORT_SYMBOL_GPL(pci_execute_reset_function); /** @@ -1822,15 +1841,10 @@ EXPORT_SYMBOL_GPL(pci_execute_reset_function); */ int pci_reset_function(struct pci_dev *dev) { - u32 cap; - int exppos = pci_find_capability(dev, PCI_CAP_ID_EXP); - int r; + int r = __pci_reset_function(dev, 1); - if (!exppos) - return -ENOTTY; - pci_read_config_dword(dev, exppos + PCI_EXP_DEVCAP, &cap); - if (!(cap & PCI_EXP_DEVCAP_FLR)) - return -ENOTTY; + if (r < 0) + return r; if (!dev->msi_enabled && !dev->msix_enabled && dev->irq != 0) disable_irq(dev->irq); -- cgit 1.4.1 From 1ca887970a3971a22e4875b7c6ad5ae3ce49f61a Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 11 Nov 2008 17:17:48 +0800 Subject: PCI: Extend pci_reset_function() to support PCI Advanced Features Some PCI devices implement PCI Advanced Features, which means they support Function Level Reset(FLR). Implement support for that in pci_reset_function. Signed-off-by: Sheng Yang Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 62978f644a92..3c2fa2fdc9cd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1789,6 +1789,43 @@ static int __pcie_flr(struct pci_dev *dev, int probe) return 0; } +static int __pci_af_flr(struct pci_dev *dev, int probe) +{ + int cappos = pci_find_capability(dev, PCI_CAP_ID_AF); + u8 status; + u8 cap; + + if (!cappos) + return -ENOTTY; + pci_read_config_byte(dev, cappos + PCI_AF_CAP, &cap); + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) + return -ENOTTY; + + if (probe) + return 0; + + pci_block_user_cfg_access(dev); + + /* Wait for Transaction Pending bit clean */ + msleep(100); + pci_read_config_byte(dev, cappos + PCI_AF_STATUS, &status); + if (status & PCI_AF_STATUS_TP) { + dev_info(&dev->dev, "Busy after 100ms while trying to" + " reset; sleeping for 1 second\n"); + ssleep(1); + pci_read_config_byte(dev, + cappos + PCI_AF_STATUS, &status); + if (status & PCI_AF_STATUS_TP) + dev_info(&dev->dev, "Still busy after 1s; " + "proceeding with reset anyway\n"); + } + pci_write_config_byte(dev, cappos + PCI_AF_CTRL, PCI_AF_CTRL_FLR); + mdelay(100); + + pci_unblock_user_cfg_access(dev); + return 0; +} + static int __pci_reset_function(struct pci_dev *pdev, int probe) { int res; @@ -1797,6 +1834,10 @@ static int __pci_reset_function(struct pci_dev *pdev, int probe) if (res != -ENOTTY) return res; + res = __pci_af_flr(pdev, probe); + if (res != -ENOTTY) + return res; + return res; } -- cgit 1.4.1 From d3a54014e2a94bd37b7dee5e76e03f7bc4fab49a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 12 Nov 2008 14:38:53 +1100 Subject: PCI: Add legacy_io/mem to all busses Currently, only PHBs get the legacy_* files, which makes it tricky for userland to get access to the legacy space. This commit exposes them in every bus, since even child buses may forward legacy cycles if configured properly. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 999cc4088b59..3e1c135b174a 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -158,6 +158,10 @@ void pci_bus_add_devices(struct pci_bus *bus) dev_err(&dev->dev, "Error creating cpulistaffinity" " file, continuing...\n"); + + /* Create legacy_io and legacy_mem files for this bus */ + pci_create_legacy_files(child_bus); + } } } -- cgit 1.4.1 From 990a7ac5645883a833a11b900bb6f25b65dea65b Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:45 -0700 Subject: ACPI/PCI: call _OSC support during root bridge discovery Add pci_acpi_osc_support() and call it when a PCI bridge is added. This allows us to avoid having every individual PCI root bridge driver call _OSC support for every root bridge in their probe functions, a significant savings in boot time. Signed-off-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 9 +++++++++ drivers/pci/pci-acpi.c | 24 +++++++++++++++++++----- include/linux/pci-acpi.h | 1 + 3 files changed, 29 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 642554b1b60c..de4d57114fe4 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -193,6 +194,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) unsigned long long value = 0; acpi_handle handle = NULL; struct acpi_device *child; + u32 flags; if (!device) @@ -210,6 +212,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) device->ops.bind = acpi_pci_bind; + /* + * All supported architectures that use ACPI have support for + * PCI domains, so we indicate this in _OSC support capabilities. + */ + flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; + pci_acpi_osc_support(device->handle, flags); + /* * Segment * ------- diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 2ed3f10d0860..8a1f02c3c915 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -143,28 +143,42 @@ static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data, return status; } -static acpi_status acpi_query_osc(acpi_handle handle, - u32 level, void *context, void **retval) +/* + * pci_acpi_osc_support: Invoke _OSC indicating support for the given feature + * @flags: Bitmask of flags to support + * + * See the ACPI spec for the definition of the flags + */ +int pci_acpi_osc_support(acpi_handle handle, u32 flags) { + u32 dummy; acpi_status status; - struct acpi_osc_data *osc_data; - u32 flags = (unsigned long)context, dummy; acpi_handle tmp; + struct acpi_osc_data *osc_data; + int rc = 0; status = acpi_get_handle(handle, "_OSC", &tmp); if (ACPI_FAILURE(status)) - return AE_OK; + return -ENOTTY; mutex_lock(&pci_acpi_lock); osc_data = acpi_get_osc_data(handle); if (!osc_data) { printk(KERN_ERR "acpi osc data array is full\n"); + rc = -ENOMEM; goto out; } __acpi_query_osc(flags, osc_data, &dummy); out: mutex_unlock(&pci_acpi_lock); + return rc; +} + +static acpi_status acpi_query_osc(acpi_handle handle, u32 level, + void *context, void **retval) +{ + pci_acpi_osc_support(handle, (unsigned long)context); return AE_OK; } diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index a9e4c34e9389..424f06f84cab 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -51,6 +51,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); +int pci_acpi_osc_support(acpi_handle handle, u32 flags); static inline acpi_status pci_osc_support_set(u32 flags) { return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -- cgit 1.4.1 From 0ef5f8f6159e44b4faa997be08d1a3bcbf44ad08 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:50 -0700 Subject: ACPI/PCI: PCI extended config _OSC support called when root bridge added The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root bridge is added with pci_acpi_osc_support() if we can access PCI extended config space. This adds the function pci_ext_cfg_avail which returns true if we can access PCI extended config space (offset greater than 0xff). It currently only returns false if arch=x86 and raw_pci_ext_ops is not set (which might happen if pci=nommcfg is set on the kernel command-line). Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- arch/x86/pci/common.c | 8 ++++++++ drivers/acpi/pci_root.c | 10 ++++++++-- drivers/pci/pci.c | 13 +++++++++++++ include/linux/pci.h | 2 ++ 4 files changed, 31 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 62ddb73e09ed..9ab8509f7b15 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -562,6 +562,14 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } +int pci_ext_cfg_avail(struct pci_dev *dev) +{ + if (raw_pci_ext_ops) + return 1; + else + return 0; +} + struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index de4d57114fe4..96e68e841539 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -194,7 +194,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) unsigned long long value = 0; acpi_handle handle = NULL; struct acpi_device *child; - u32 flags; + u32 flags, base_flags; if (!device) @@ -216,7 +216,7 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) * All supported architectures that use ACPI have support for * PCI domains, so we indicate this in _OSC support capabilities. */ - flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; + flags = base_flags = OSC_PCI_SEGMENT_GROUPS_SUPPORT; pci_acpi_osc_support(device->handle, flags); /* @@ -344,6 +344,12 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) list_for_each_entry(child, &device->children, node) acpi_pci_bridge_scan(child); + /* Indicate support for various _OSC capabilities. */ + if (pci_ext_cfg_avail(root->bus->self)) + flags |= OSC_EXT_PCI_CONFIG_SUPPORT; + if (flags != base_flags) + pci_acpi_osc_support(device->handle, flags); + end: if (result) { if (!list_empty(&root->node)) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3c2fa2fdc9cd..48fa860276d4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2084,6 +2084,19 @@ static void __devinit pci_no_domains(void) #endif } +/** + * pci_ext_cfg_enabled - can we access extended PCI config space? + * @dev: The PCI device of the root bridge. + * + * Returns 1 if we can access PCI extended config space (offsets + * greater than 0xff). This is the default implementation. Architecture + * implementations can override this. + */ +int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev) +{ + return 1; +} + static int __devinit pci_init(void) { struct pci_dev *dev = NULL; diff --git a/include/linux/pci.h b/include/linux/pci.h index 4bb156ba854a..6fd47654ca4e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1140,6 +1140,8 @@ static inline void pci_mmcfg_early_init(void) { } static inline void pci_mmcfg_late_init(void) { } #endif +int pci_ext_cfg_avail(struct pci_dev *dev); + #ifdef CONFIG_HAS_IOMEM static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) { -- cgit 1.4.1 From 3e1b16002af29758b6bc9c38939d43838d9335bc Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:30:55 -0700 Subject: ACPI/PCI: PCIe ASPM _OSC support capabilities called when root bridge added The _OSC capabilities OSC_ACTIVE_STATE_PWR_SUPPORT and OSC_CLOCK_PWR_CAPABILITY_SUPPORT are set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the ASPM driver. Also add the function pcie_aspm_enabled, which returns true if pcie_aspm=off is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 3 +++ drivers/pci/pcie/aspm.c | 27 +++++++++------------------ include/linux/pci.h | 9 +++++++++ 3 files changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 96e68e841539..9fe026b1c9d0 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -347,6 +347,9 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) /* Indicate support for various _OSC capabilities. */ if (pci_ext_cfg_avail(root->bus->self)) flags |= OSC_EXT_PCI_CONFIG_SUPPORT; + if (pcie_aspm_enabled()) + flags |= OSC_ACTIVE_STATE_PWR_SUPPORT | + OSC_CLOCK_PWR_CAPABILITY_SUPPORT; if (flags != base_flags) pci_acpi_osc_support(device->handle, flags); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 9aad608bcf3f..e361c7dc726f 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -857,24 +857,15 @@ void pcie_no_aspm(void) aspm_disabled = 1; } -#ifdef CONFIG_ACPI -#include -#include -static void pcie_aspm_platform_init(void) -{ - pcie_osc_support_set(OSC_ACTIVE_STATE_PWR_SUPPORT| - OSC_CLOCK_PWR_CAPABILITY_SUPPORT); -} -#else -static inline void pcie_aspm_platform_init(void) { } -#endif - -static int __init pcie_aspm_init(void) +/** + * pcie_aspm_enabled - is PCIe ASPM enabled? + * + * Returns true if ASPM has not been disabled by the command-line option + * pcie_aspm=off. + **/ +int pcie_aspm_enabled(void) { - if (aspm_disabled) - return 0; - pcie_aspm_platform_init(); - return 0; + return !aspm_disabled; } +EXPORT_SYMBOL(pcie_aspm_enabled); -fs_initcall(pcie_aspm_init); diff --git a/include/linux/pci.h b/include/linux/pci.h index 6fd47654ca4e..eae97a2bf603 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -791,6 +791,15 @@ extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); #endif +#ifndef CONFIG_PCIEASPM +static inline int pcie_aspm_enabled(void) +{ + return 0; +} +#else +extern int pcie_aspm_enabled(void); +#endif + #ifdef CONFIG_HT_IRQ /* The functions a driver should call */ int ht_create_irq(struct pci_dev *dev, int idx); -- cgit 1.4.1 From eb9188bdb9d65aeead2382ec3dd656a17ec8936d Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:00 -0700 Subject: ACPI/PCI: PCIe AER _OSC support capabilities called when root bridge added The _OSC capability OSC_EXT_PCI_CONFIG_SUPPORT is set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the PCIe AER driver. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_acpi.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index 6dd7b13e9808..ebce26c37049 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -38,7 +38,6 @@ int aer_osc_setup(struct pcie_device *pciedev) handle = acpi_find_root_bridge_handle(pdev); if (handle) { - pcie_osc_support_set(OSC_EXT_PCI_CONFIG_SUPPORT); status = pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL | OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); -- cgit 1.4.1 From 07ae95f988a34465bdcb384bfa73c03424fe2312 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:05 -0700 Subject: ACPI/PCI: PCI MSI _OSC support capabilities called when root bridge added The _OSC capability OSC_MSI_SUPPORT is set when the root bridge is added with pci_acpi_osc_support(), so we no longer need to do it in the PCI MSI driver. Also adds the function pci_msi_enabled, which returns true if pci=nomsi is not on the kernel command-line. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 2 ++ drivers/pci/msi.c | 31 +++++++++++-------------------- drivers/pci/pci.c | 2 -- drivers/pci/pci.h | 2 -- include/linux/pci.h | 5 +++++ 5 files changed, 18 insertions(+), 24 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 9fe026b1c9d0..5b38a026d122 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -350,6 +350,8 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (pcie_aspm_enabled()) flags |= OSC_ACTIVE_STATE_PWR_SUPPORT | OSC_CLOCK_PWR_CAPABILITY_SUPPORT; + if (pci_msi_enabled()) + flags |= OSC_MSI_SUPPORT; if (flags != base_flags) pci_acpi_osc_support(device->handle, flags); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 11a51f8ed3b3..b4a90badd0a6 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -776,28 +776,19 @@ void pci_no_msi(void) pci_msi_enable = 0; } -void pci_msi_init_pci_dev(struct pci_dev *dev) -{ - INIT_LIST_HEAD(&dev->msi_list); -} - -#ifdef CONFIG_ACPI -#include -#include -static void __devinit msi_acpi_init(void) +/** + * pci_msi_enabled - is MSI enabled? + * + * Returns true if MSI has not been disabled by the command-line option + * pci=nomsi. + **/ +int pci_msi_enabled(void) { - if (acpi_pci_disabled) - return; - pci_osc_support_set(OSC_MSI_SUPPORT); - pcie_osc_support_set(OSC_MSI_SUPPORT); + return pci_msi_enable; } -#else -static inline void msi_acpi_init(void) { } -#endif /* CONFIG_ACPI */ +EXPORT_SYMBOL(pci_msi_enabled); -void __devinit msi_init(void) +void pci_msi_init_pci_dev(struct pci_dev *dev) { - if (!pci_msi_enable) - return; - msi_acpi_init(); + INIT_LIST_HEAD(&dev->msi_list); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 48fa860276d4..2cfa41e367a7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2105,8 +2105,6 @@ static int __devinit pci_init(void) pci_fixup_device(pci_fixup_final, dev); } - msi_init(); - return 0; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d3e65e29df51..9162e242b99e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -102,11 +102,9 @@ extern unsigned int pci_pm_d3_delay; #ifdef CONFIG_PCI_MSI void pci_no_msi(void); extern void pci_msi_init_pci_dev(struct pci_dev *dev); -extern void __devinit msi_init(void); #else static inline void pci_no_msi(void) { } static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { } -static inline void msi_init(void) { } #endif #ifdef CONFIG_PCIEAER diff --git a/include/linux/pci.h b/include/linux/pci.h index eae97a2bf603..59a3dc2059d3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -779,6 +779,10 @@ static inline void msi_remove_pci_irq_vectors(struct pci_dev *dev) static inline void pci_restore_msi_state(struct pci_dev *dev) { } +static inline int pci_msi_enabled(void) +{ + return 0; +} #else extern int pci_enable_msi(struct pci_dev *dev); extern void pci_msi_shutdown(struct pci_dev *dev); @@ -789,6 +793,7 @@ extern void pci_msix_shutdown(struct pci_dev *dev); extern void pci_disable_msix(struct pci_dev *dev); extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); extern void pci_restore_msi_state(struct pci_dev *dev); +extern int pci_msi_enabled(void); #endif #ifndef CONFIG_PCIEASPM -- cgit 1.4.1 From 23616941914917cf25b94789856b5326b68d8ee8 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 10 Nov 2008 15:31:10 -0700 Subject: ACPI/PCI: remove obsolete _OSC capability support functions The acpi_query_osc, __pci_osc_support_set, pci_osc_support_set, and pcie_osc_support_set functions have been obsoleted in favor of setting these capabilities during root bridge discovery with pci_acpi_osc_support. There are no longer any callers of these functions, so remove them. Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/pci/pci-acpi.c | 25 ------------------------- include/linux/pci-acpi.h | 11 ----------- 2 files changed, 36 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 8a1f02c3c915..8f923ee5177f 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -175,31 +175,6 @@ out: return rc; } -static acpi_status acpi_query_osc(acpi_handle handle, u32 level, - void *context, void **retval) -{ - pci_acpi_osc_support(handle, (unsigned long)context); - return AE_OK; -} - -/** - * __pci_osc_support_set - register OS support to Firmware - * @flags: OS support bits - * @hid: hardware ID - * - * Update OS support fields and doing a _OSC Query to obtain an update - * from Firmware on supported control bits. - **/ -acpi_status __pci_osc_support_set(u32 flags, const char *hid) -{ - if (!(flags & OSC_SUPPORT_MASKS)) - return AE_TYPE; - - acpi_get_devices(hid, acpi_query_osc, - (void *)(unsigned long)flags, NULL); - return AE_OK; -} - /** * pci_osc_control_set - commit requested control to Firmware * @handle: acpi_handle for the target ACPI object diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 424f06f84cab..871e096e0fbc 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -50,16 +50,7 @@ #ifdef CONFIG_ACPI extern acpi_status pci_osc_control_set(acpi_handle handle, u32 flags); -extern acpi_status __pci_osc_support_set(u32 flags, const char *hid); int pci_acpi_osc_support(acpi_handle handle, u32 flags); -static inline acpi_status pci_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_ROOT_HID_STRING); -} -static inline acpi_status pcie_osc_support_set(u32 flags) -{ - return __pci_osc_support_set(flags, PCI_EXPRESS_ROOT_HID_STRING); -} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { /* Find root host bridge */ @@ -76,8 +67,6 @@ typedef u32 acpi_status; #endif static inline acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) {return AE_ERROR;} -static inline acpi_status pci_osc_support_set(u32 flags) {return AE_ERROR;} -static inline acpi_status pcie_osc_support_set(u32 flags) {return AE_ERROR;} static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) { return NULL; } #endif -- cgit 1.4.1 From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 22 Oct 2008 19:55:31 -0700 Subject: resource: allow MMIO exclusivity for device drivers Device drivers that use pci_request_regions() (and similar APIs) have a reasonable expectation that they are the only ones accessing their device. As part of the e1000e hunt, we were afraid that some userland (X or some bootsplash stuff) was mapping the MMIO region that the driver thought it had exclusively via /dev/mem or via various sysfs resource mappings. This patch adds the option for device drivers to cause their reserved regions to the "banned from /dev/mem use" list, so now both kernel memory and device-exclusive MMIO regions are banned. NOTE: This is only active when CONFIG_STRICT_DEVMEM is set. In addition to the config option, a kernel parameter iomem=relaxed is provided for the cases where developers want to diagnose, in the field, drivers issues from userspace. Reviewed-by: Matthew Wilcox Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 4 ++ arch/x86/mm/init_32.c | 2 + arch/x86/mm/init_64.c | 2 + drivers/net/e1000e/netdev.c | 2 +- drivers/pci/pci-sysfs.c | 3 + drivers/pci/pci.c | 107 ++++++++++++++++++++++++++++++++---- include/linux/ioport.h | 11 +++- include/linux/pci.h | 3 + kernel/resource.c | 61 +++++++++++++++++++- 9 files changed, 176 insertions(+), 19 deletions(-) (limited to 'drivers/pci') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b3f6711d2f1..0072fabb1dd1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -918,6 +918,10 @@ and is between 256 and 4096 characters. It is defined in the file inttest= [IA64] + iomem= Disable strict checking of access to MMIO memory + strict regions from userspace. + relaxed + iommu= [x86] off force diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 544d724caeee..88f1b10de3be 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -328,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 54c437e96541..23f68e77ad1f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index d4639facd1bd..91817d0afcaf 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4807,7 +4807,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } } - err = pci_request_selected_regions(pdev, + err = pci_request_selected_regions_exclusive(pdev, pci_select_bars(pdev, IORESOURCE_MEM), e1000e_driver_name); if (err) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 388440e0d222..d5cdccf27a69 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -620,6 +620,9 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, vma->vm_pgoff += start >> PAGE_SHIFT; mmap_type = res->flags & IORESOURCE_MEM ? pci_mmap_mem : pci_mmap_io; + if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(start)) + return -EINVAL; + return pci_mmap_page_range(pdev, vma, mmap_type, write_combine); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2cfa41e367a7..47663dc0daf7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1395,7 +1395,8 @@ void pci_release_region(struct pci_dev *pdev, int bar) * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. */ -int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +static int __pci_request_region(struct pci_dev *pdev, int bar, const char *res_name, + int exclusive) { struct pci_devres *dr; @@ -1408,8 +1409,9 @@ int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) goto err_out; } else if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) { - if (!request_mem_region(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar), res_name)) + if (!__request_mem_region(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar), res_name, + exclusive)) goto err_out; } @@ -1427,6 +1429,47 @@ err_out: return -EBUSY; } +/** + * pci_request_region - Reserved PCI I/O and memory resource + * @pdev: PCI device whose resources are to be reserved + * @bar: BAR to be reserved + * @res_name: Name to be associated with resource. + * + * Mark the PCI region associated with PCI device @pdev BR @bar as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + */ +int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) +{ + return __pci_request_region(pdev, bar, res_name, 0); +} + +/** + * pci_request_region_exclusive - Reserved PCI I/O and memory resource + * @pdev: PCI device whose resources are to be reserved + * @bar: BAR to be reserved + * @res_name: Name to be associated with resource. + * + * Mark the PCI region associated with PCI device @pdev BR @bar as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + * + * The key difference that _exclusive makes it that userspace is + * explicitly not allowed to map the resource via /dev/mem or + * sysfs. + */ +int pci_request_region_exclusive(struct pci_dev *pdev, int bar, const char *res_name) +{ + return __pci_request_region(pdev, bar, res_name, IORESOURCE_EXCLUSIVE); +} /** * pci_release_selected_regions - Release selected PCI I/O and memory resources * @pdev: PCI device whose resources were previously reserved @@ -1444,20 +1487,14 @@ void pci_release_selected_regions(struct pci_dev *pdev, int bars) pci_release_region(pdev, i); } -/** - * pci_request_selected_regions - Reserve selected PCI I/O and memory resources - * @pdev: PCI device whose resources are to be reserved - * @bars: Bitmask of BARs to be requested - * @res_name: Name to be associated with resource - */ -int pci_request_selected_regions(struct pci_dev *pdev, int bars, - const char *res_name) +int __pci_request_selected_regions(struct pci_dev *pdev, int bars, + const char *res_name, int excl) { int i; for (i = 0; i < 6; i++) if (bars & (1 << i)) - if(pci_request_region(pdev, i, res_name)) + if (__pci_request_region(pdev, i, res_name, excl)) goto err_out; return 0; @@ -1469,6 +1506,26 @@ err_out: return -EBUSY; } + +/** + * pci_request_selected_regions - Reserve selected PCI I/O and memory resources + * @pdev: PCI device whose resources are to be reserved + * @bars: Bitmask of BARs to be requested + * @res_name: Name to be associated with resource + */ +int pci_request_selected_regions(struct pci_dev *pdev, int bars, + const char *res_name) +{ + return __pci_request_selected_regions(pdev, bars, res_name, 0); +} + +int pci_request_selected_regions_exclusive(struct pci_dev *pdev, + int bars, const char *res_name) +{ + return __pci_request_selected_regions(pdev, bars, res_name, + IORESOURCE_EXCLUSIVE); +} + /** * pci_release_regions - Release reserved PCI I/O and memory resources * @pdev: PCI device whose resources were previously reserved by pci_request_regions @@ -1501,6 +1558,29 @@ int pci_request_regions(struct pci_dev *pdev, const char *res_name) return pci_request_selected_regions(pdev, ((1 << 6) - 1), res_name); } +/** + * pci_request_regions_exclusive - Reserved PCI I/O and memory resources + * @pdev: PCI device whose resources are to be reserved + * @res_name: Name to be associated with resource. + * + * Mark all PCI regions associated with PCI device @pdev as + * being reserved by owner @res_name. Do not access any + * address inside the PCI regions unless this call returns + * successfully. + * + * pci_request_regions_exclusive() will mark the region so that + * /dev/mem and the sysfs MMIO access will not be allowed. + * + * Returns 0 on success, or %EBUSY on error. A warning + * message is also printed on failure. + */ +int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name) +{ + return pci_request_selected_regions_exclusive(pdev, + ((1 << 6) - 1), res_name); +} + + /** * pci_set_master - enables bus-mastering for device dev * @dev: the PCI device to enable @@ -2149,10 +2229,13 @@ EXPORT_SYMBOL(pci_find_capability); EXPORT_SYMBOL(pci_bus_find_capability); EXPORT_SYMBOL(pci_release_regions); EXPORT_SYMBOL(pci_request_regions); +EXPORT_SYMBOL(pci_request_regions_exclusive); EXPORT_SYMBOL(pci_release_region); EXPORT_SYMBOL(pci_request_region); +EXPORT_SYMBOL(pci_request_region_exclusive); EXPORT_SYMBOL(pci_release_selected_regions); EXPORT_SYMBOL(pci_request_selected_regions); +EXPORT_SYMBOL(pci_request_selected_regions_exclusive); EXPORT_SYMBOL(pci_set_master); EXPORT_SYMBOL(pci_set_mwi); EXPORT_SYMBOL(pci_try_set_mwi); diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 041e95aac2bf..f6bb2ca8e3ba 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,6 +49,7 @@ struct resource_list { #define IORESOURCE_SIZEALIGN 0x00020000 /* size indicates alignment */ #define IORESOURCE_STARTALIGN 0x00040000 /* start field is alignment */ +#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 @@ -133,13 +134,16 @@ static inline unsigned long resource_type(struct resource *res) } /* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) +#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) +#define __request_mem_region(start,n,name, excl) __request_region(&iomem_resource, (start), (n), (name), excl) +#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name), 0) +#define request_mem_region_exclusive(start,n,name) \ + __request_region(&iomem_resource, (start), (n), (name), IORESOURCE_EXCLUSIVE) #define rename_region(region, newname) do { (region)->name = (newname); } while (0) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name); + resource_size_t n, const char *name, int relaxed); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) @@ -175,6 +179,7 @@ extern struct resource * __devm_request_region(struct device *dev, extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); +extern int iomem_is_exclusive(u64 addr); #endif /* __ASSEMBLY__ */ #endif /* _LINUX_IOPORT_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 59a3dc2059d3..bfcb39ca8879 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -686,10 +686,13 @@ void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 int __must_check pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); int __must_check pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region_exclusive(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); int pci_request_selected_regions(struct pci_dev *, int, const char *); +int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); /* drivers/pci/bus.c */ diff --git a/kernel/resource.c b/kernel/resource.c index e633106b12f6..ca6a1536b205 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -623,7 +623,7 @@ resource_size_t resource_alignment(struct resource *res) */ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, - const char *name) + const char *name, int flags) { struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); @@ -634,6 +634,7 @@ struct resource * __request_region(struct resource *parent, res->start = start; res->end = start + n - 1; res->flags = IORESOURCE_BUSY; + res->flags |= flags; write_lock(&resource_lock); @@ -679,7 +680,7 @@ int __check_region(struct resource *parent, resource_size_t start, { struct resource * res; - res = __request_region(parent, start, n, "check-region"); + res = __request_region(parent, start, n, "check-region", 0); if (!res) return -EBUSY; @@ -776,7 +777,7 @@ struct resource * __devm_request_region(struct device *dev, dr->start = start; dr->n = n; - res = __request_region(parent, start, n, name); + res = __request_region(parent, start, n, name, 0); if (res) devres_add(dev, dr); else @@ -876,3 +877,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size) return err; } + +#ifdef CONFIG_STRICT_DEVMEM +static int strict_iomem_checks = 1; +#else +static int strict_iomem_checks; +#endif + +/* + * check if an address is reserved in the iomem resource tree + * returns 1 if reserved, 0 if not reserved. + */ +int iomem_is_exclusive(u64 addr) +{ + struct resource *p = &iomem_resource; + int err = 0; + loff_t l; + int size = PAGE_SIZE; + + if (!strict_iomem_checks) + return 0; + + addr = addr & PAGE_MASK; + + read_lock(&resource_lock); + for (p = p->child; p ; p = r_next(NULL, p, &l)) { + /* + * We can probably skip the resources without + * IORESOURCE_IO attribute? + */ + if (p->start >= addr + size) + break; + if (p->end < addr) + continue; + if (p->flags & IORESOURCE_BUSY && + p->flags & IORESOURCE_EXCLUSIVE) { + err = 1; + break; + } + } + read_unlock(&resource_lock); + + return err; +} + +static int __init strict_iomem(char *str) +{ + if (strstr(str, "relaxed")) + strict_iomem_checks = 0; + if (strstr(str, "strict")) + strict_iomem_checks = 1; + return 1; +} + +__setup("iomem=", strict_iomem); -- cgit 1.4.1 From 4ba7d0f0eb68cf7731ead4ca20e540d0266cfa8e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 23 Nov 2008 09:08:28 +0100 Subject: drivers/pci/hotplug: Add missing pci_dev_get pci_get_slot does a pci_dev_get, so pci_dev_put needs to be called in an error case. An alterative would be to move the test_and_set_bit before the call to pci_get_slot. The problem was fixed using the following semantic patch. (http://www.emn.fr/x-info/coccinelle/) // @@ local idexpression *n; statement S1,S2; expression E,E1; expression *ptr != NULL; type T,T1; @@ ( if (!(n = pci_get_slot(...))) S1 | n = pci_get_slot(...) ) <... when != pci_dev_put(n) when != if (...) { <+... pci_dev_put(n) ...+> } when != true !n || ... when != n = (T)E when != E = n if (!n || ...) S2 ...> ( return \(0\|<+...n...+>\|ptr\); | + pci_dev_put(n); return ...; | pci_dev_put(n); | n = (T1)E1 | E1 = n ) // Signed-off-by: Julia Lawall Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/fakephp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index 3a2637a00934..b0e7de9e536d 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -324,6 +324,7 @@ static int disable_slot(struct hotplug_slot *slot) if (test_and_set_bit(0, &dslot->removed)) { dbg("Slot already scheduled for removal\n"); + pci_dev_put(dev); return -ENODEV; } -- cgit 1.4.1 From 753e3aca735dc446f66d501b625122661738b57d Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Thu, 20 Nov 2008 15:22:32 +0900 Subject: PCI: revert additional _OSC evaluation Reverts adf411b819adc9fa96e9b3e638c7480d5e71d270. The commit adf411b819adc9fa96e9b3e638c7480d5e71d270 was based on the improper assumption that queried result was not updated when _OSC support field was changed. But, in fact, queried result is updated whenever _OSC support field was changed through __acpi_query_osc(). As a result, the commit adf411b819adc9fa96e9b3e638c7480d5e71d270 only introduced unnecessary additional _OSC evaluation... Tested-by: Andrew Patterson Reviewed-by: Andrew Patterson Signed-off-by: Kenji Kaneshige Signed-off-by: Taku Izumi Signed-off-by: Jesse Barnes --- drivers/pci/pci-acpi.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 8f923ee5177f..c580a525d9ce 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -24,13 +24,15 @@ struct acpi_osc_data { acpi_handle handle; u32 support_set; u32 control_set; + int is_queried; + u32 query_result; struct list_head sibiling; }; static LIST_HEAD(acpi_osc_data_list); struct acpi_osc_args { u32 capbuf[3]; - u32 ctrl_result; + u32 query_result; }; static DEFINE_MUTEX(pci_acpi_lock); @@ -112,8 +114,9 @@ static acpi_status acpi_run_osc(acpi_handle handle, goto out_kfree; } out_success: - osc_args->ctrl_result = - *((u32 *)(out_obj->buffer.pointer + 8)); + if (flags & OSC_QUERY_ENABLE) + osc_args->query_result = + *((u32 *)(out_obj->buffer.pointer + 8)); status = AE_OK; out_kfree: @@ -121,8 +124,7 @@ out_kfree: return status; } -static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data, - u32 *result) +static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data) { acpi_status status; u32 support_set; @@ -137,7 +139,8 @@ static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data, status = acpi_run_osc(osc_data->handle, &osc_args); if (ACPI_SUCCESS(status)) { osc_data->support_set = support_set; - *result = osc_args.ctrl_result; + osc_data->query_result = osc_args.query_result; + osc_data->is_queried = 1; } return status; @@ -151,7 +154,6 @@ static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data, */ int pci_acpi_osc_support(acpi_handle handle, u32 flags) { - u32 dummy; acpi_status status; acpi_handle tmp; struct acpi_osc_data *osc_data; @@ -169,7 +171,7 @@ int pci_acpi_osc_support(acpi_handle handle, u32 flags) goto out; } - __acpi_query_osc(flags, osc_data, &dummy); + __acpi_query_osc(flags, osc_data); out: mutex_unlock(&pci_acpi_lock); return rc; @@ -185,7 +187,7 @@ out: acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) { acpi_status status; - u32 ctrlset, control_set, result; + u32 ctrlset, control_set; acpi_handle tmp; struct acpi_osc_data *osc_data; struct acpi_osc_args osc_args; @@ -208,11 +210,13 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) goto out; } - status = __acpi_query_osc(osc_data->support_set, osc_data, &result); - if (ACPI_FAILURE(status)) - goto out; + if (!osc_data->is_queried) { + status = __acpi_query_osc(osc_data->support_set, osc_data); + if (ACPI_FAILURE(status)) + goto out; + } - if ((result & ctrlset) != ctrlset) { + if ((osc_data->query_result & ctrlset) != ctrlset) { status = AE_SUPPORT; goto out; } -- cgit 1.4.1 From e0fa3b43df0b57967857b2c83bad9a5193fbddf5 Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Thu, 20 Nov 2008 15:22:37 +0900 Subject: PCI/ACPI: Remove unnecessary _OSC evaluation for control request If a control had already been granted, we don't need to re-evaluate _OSC for it because firmware may not reject control of any feature it has previously granted control to. Reviewed-by: Andrew Patterson Tested-by: Andrew Patterson Signed-off-by: Kenji Kaneshige Signed-off-by: Taku Izumi Signed-off-by: Jesse Barnes --- drivers/pci/pci-acpi.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index c580a525d9ce..736bb248008e 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -210,6 +210,10 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) goto out; } + /* No need to evaluate _OSC if the control was already granted. */ + if ((osc_data->control_set & ctrlset) == ctrlset) + goto out; + if (!osc_data->is_queried) { status = __acpi_query_osc(osc_data->support_set, osc_data); if (ACPI_FAILURE(status)) -- cgit 1.4.1 From 86d8698027fd30cc067d2aeeb1e3603d43c83df0 Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Thu, 20 Nov 2008 15:22:39 +0900 Subject: pci-acpi: Cleanup _OSC evaluation code Cleanup _OSC evaluation code. Some whitespace changes and a few other minor cleanups. Reviewed-by: Andrew Patterson Tested-by: Andrew Patterson Signed-off-by: Kenji Kaneshige Signed-off-by: Taku Izumi Signed-off-by: Jesse Barnes --- drivers/pci/pci-acpi.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 736bb248008e..3582512e7226 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -24,15 +24,14 @@ struct acpi_osc_data { acpi_handle handle; u32 support_set; u32 control_set; + u32 control_query; int is_queried; - u32 query_result; struct list_head sibiling; }; static LIST_HEAD(acpi_osc_data_list); struct acpi_osc_args { u32 capbuf[3]; - u32 query_result; }; static DEFINE_MUTEX(pci_acpi_lock); @@ -58,7 +57,7 @@ static u8 OSC_UUID[16] = {0x5B, 0x4D, 0xDB, 0x33, 0xF7, 0x1F, 0x1C, 0x40, 0x96, 0x57, 0x74, 0x41, 0xC0, 0x3D, 0xD7, 0x66}; static acpi_status acpi_run_osc(acpi_handle handle, - struct acpi_osc_args *osc_args) + struct acpi_osc_args *osc_args, u32 *retval) { acpi_status status; struct acpi_object_list input; @@ -114,9 +113,7 @@ static acpi_status acpi_run_osc(acpi_handle handle, goto out_kfree; } out_success: - if (flags & OSC_QUERY_ENABLE) - osc_args->query_result = - *((u32 *)(out_obj->buffer.pointer + 8)); + *retval = *((u32 *)(out_obj->buffer.pointer + 8)); status = AE_OK; out_kfree: @@ -127,7 +124,7 @@ out_kfree: static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data) { acpi_status status; - u32 support_set; + u32 support_set, result; struct acpi_osc_args osc_args; /* do _OSC query for all possible controls */ @@ -136,10 +133,10 @@ static acpi_status __acpi_query_osc(u32 flags, struct acpi_osc_data *osc_data) osc_args.capbuf[OSC_SUPPORT_TYPE] = support_set; osc_args.capbuf[OSC_CONTROL_TYPE] = OSC_CONTROL_MASKS; - status = acpi_run_osc(osc_data->handle, &osc_args); + status = acpi_run_osc(osc_data->handle, &osc_args, &result); if (ACPI_SUCCESS(status)) { osc_data->support_set = support_set; - osc_data->query_result = osc_args.query_result; + osc_data->control_query = result; osc_data->is_queried = 1; } @@ -187,7 +184,7 @@ out: acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) { acpi_status status; - u32 ctrlset, control_set; + u32 control_req, control_set, result; acpi_handle tmp; struct acpi_osc_data *osc_data; struct acpi_osc_args osc_args; @@ -204,14 +201,14 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) goto out; } - ctrlset = (flags & OSC_CONTROL_MASKS); - if (!ctrlset) { + control_req = (flags & OSC_CONTROL_MASKS); + if (!control_req) { status = AE_TYPE; goto out; } /* No need to evaluate _OSC if the control was already granted. */ - if ((osc_data->control_set & ctrlset) == ctrlset) + if ((osc_data->control_set & control_req) == control_req) goto out; if (!osc_data->is_queried) { @@ -220,18 +217,18 @@ acpi_status pci_osc_control_set(acpi_handle handle, u32 flags) goto out; } - if ((osc_data->query_result & ctrlset) != ctrlset) { + if ((osc_data->control_query & control_req) != control_req) { status = AE_SUPPORT; goto out; } - control_set = osc_data->control_set | ctrlset; + control_set = osc_data->control_set | control_req; osc_args.capbuf[OSC_QUERY_TYPE] = 0; osc_args.capbuf[OSC_SUPPORT_TYPE] = osc_data->support_set; osc_args.capbuf[OSC_CONTROL_TYPE] = control_set; - status = acpi_run_osc(handle, &osc_args); + status = acpi_run_osc(handle, &osc_args, &result); if (ACPI_SUCCESS(status)) - osc_data->control_set = control_set; + osc_data->control_set = result; out: mutex_unlock(&pci_acpi_lock); return status; -- cgit 1.4.1 From 56ee325e25a0f76fc3267872867b3d70af179aad Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 25 Nov 2008 21:48:14 +0000 Subject: PCI/ACPI: acpiphp: Identify more removable slots According to section 6.3.6 of the ACPI spec, the presence of an _RMV method that evaluates to 1 is sufficient to indicate that a slot is removable without needing an eject method. This patch refactors the ejectable slot detection code a little in order to flag these slots as ejectable and register them. Acpihp then binds to the expresscard slot on my HP test machine. Acked-by: Kristen Carlson Accardi Signed-off-by: Matthew Garrett Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpiphp_glue.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 3affc6472e65..1b19b7ec3e8c 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -74,7 +74,7 @@ static void handle_hotplug_event_func(acpi_handle handle, u32 type, void *contex * Ejectable slot should satisfy at least these conditions: * * 1. has _ADR method - * 2. has _EJ0 method + * 2. has _EJ0 method or _RMV method * * optionally * @@ -87,18 +87,25 @@ static int is_ejectable(acpi_handle handle) { acpi_status status; acpi_handle tmp; + unsigned long long removable; status = acpi_get_handle(handle, "_ADR", &tmp); - if (ACPI_FAILURE(status)) { + if (ACPI_FAILURE(status)) return 0; - } status = acpi_get_handle(handle, "_EJ0", &tmp); - if (ACPI_FAILURE(status)) { - return 0; + if (ACPI_SUCCESS(status)) + return 1; + + status = acpi_get_handle(handle, "_RMV", &tmp); + if (ACPI_SUCCESS(status)) { + status = acpi_evaluate_integer(handle, "_RMV", NULL, + &removable); + if (ACPI_SUCCESS(status) && removable) + return 1; } - return 1; + return 0; } @@ -185,16 +192,10 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) unsigned long long adr, sun; int device, function, retval; - status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); - - if (ACPI_FAILURE(status)) - return AE_OK; - - status = acpi_get_handle(handle, "_EJ0", &tmp); - - if (ACPI_FAILURE(status) && !(is_dock_device(handle))) + if (!is_ejectable(handle) && !is_dock_device(handle)) return AE_OK; + acpi_evaluate_integer(handle, "_ADR", NULL, &adr); device = (adr >> 16) & 0xffff; function = adr & 0xffff; @@ -205,7 +206,8 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) INIT_LIST_HEAD(&newfunc->sibling); newfunc->handle = handle; newfunc->function = function; - if (ACPI_SUCCESS(status)) + + if (ACPI_SUCCESS(acpi_get_handle(handle, "_EJ0", &tmp))) newfunc->flags = FUNC_HAS_EJ0; if (ACPI_SUCCESS(acpi_get_handle(handle, "_STA", &tmp))) -- cgit 1.4.1 From 2debb4d2019fa05a0896f1591dea0e0dc21bc046 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Tue, 25 Nov 2008 19:36:10 -0800 Subject: PCI: allow pci driver to support only dynids commit b41d6cf38e27 (PCI: Check dynids driver_data value for validity) requires all drivers to include an id table to try and match driver_data. Before validating driver_data check driver has an id table. Acked-by: Jean Delvare Cc: Milton Miller Signed-off-by: Chris Wright Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 99d867bcf22a..888191a3b0d1 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -48,7 +48,7 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) subdevice=PCI_ANY_ID, class=0, class_mask=0; unsigned long driver_data=0; int fields=0; - int retval; + int retval=0; fields = sscanf(buf, "%x %x %x %x %x %x %lx", &vendor, &device, &subvendor, &subdevice, @@ -58,16 +58,18 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count) /* Only accept driver_data values that match an existing id_table entry */ - retval = -EINVAL; - while (ids->vendor || ids->subvendor || ids->class_mask) { - if (driver_data == ids->driver_data) { - retval = 0; - break; + if (ids) { + retval = -EINVAL; + while (ids->vendor || ids->subvendor || ids->class_mask) { + if (driver_data == ids->driver_data) { + retval = 0; + break; + } + ids++; } - ids++; + if (retval) /* No match */ + return retval; } - if (retval) /* No match */ - return retval; dynid = kzalloc(sizeof(*dynid), GFP_KERNEL); if (!dynid) -- cgit 1.4.1 From c70e0d9dfef3d826c8ae4f7544acc53887cb161d Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Tue, 25 Nov 2008 21:17:13 -0800 Subject: PCI: pci-stub module to reserve pci device When doing device assignment with KVM there's currently nothing to protect the device from having a driver in the host as well as the guest. This trivial module just binds the pci device on the host to a stub driver so that a real host driver can't bind to the device. It has no pci id table, it supports only dynamic ids. # echo "8086 10f5" > /sys/bus/pci/drivers/pci-stub/new_id # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/pci-stub/bind # ls -l /sys/bus/pci/devices/0000:00:19.0/driver lrwxrwxrwx 1 root root 0 2008-11-25 19:10 /sys/bus/pci/devices/0000:00:19.0/driver -> ../../../bus/pci/drivers/pci-stub Cc: "Kay, Allen M" Cc: "Nakajima, Jun" Signed-off-by: Chris Wright Acked-by: Greg Kroah-Hartman Signed-off-by: Jesse Barnes --- drivers/pci/Kconfig | 9 +++++++++ drivers/pci/Makefile | 2 ++ drivers/pci/pci-stub.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 drivers/pci/pci-stub.c (limited to 'drivers/pci') diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index e1ca42591ac4..2a4501dd2515 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -42,6 +42,15 @@ config PCI_DEBUG When in doubt, say N. +config PCI_STUB + tristate "PCI Stub driver" + depends on PCI + help + Say Y or M here if you want be able to reserve a PCI device + when it is going to be assigned to a guest operating system. + + When in doubt, say N. + config HT_IRQ bool "Interrupts on hypertransport devices" default y diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index af3bfe22847b..3d07ce24f6a8 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -53,6 +53,8 @@ obj-$(CONFIG_HOTPLUG) += setup-bus.o obj-$(CONFIG_PCI_SYSCALL) += syscall.o +obj-$(CONFIG_PCI_STUB) += pci-stub.o + ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c new file mode 100644 index 000000000000..74fbec0bf6cb --- /dev/null +++ b/drivers/pci/pci-stub.c @@ -0,0 +1,47 @@ +/* pci-stub - simple stub driver to reserve a pci device + * + * Copyright (C) 2008 Red Hat, Inc. + * Author: + * Chris Wright + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Usage is simple, allocate a new id to the stub driver and bind the + * device to it. For example: + * + * # echo "8086 10f5" > /sys/bus/pci/drivers/pci-stub/new_id + * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind + * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/pci-stub/bind + * # ls -l /sys/bus/pci/devices/0000:00:19.0/driver + * .../0000:00:19.0/driver -> ../../../bus/pci/drivers/pci-stub + */ + +#include +#include + +static int pci_stub_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + return 0; +} + +static struct pci_driver stub_driver = { + .name = "pci-stub", + .id_table = NULL, /* only dynamic id's */ + .probe = pci_stub_probe, +}; + +static int __init pci_stub_init(void) +{ + return pci_register_driver(&stub_driver); +} + +static void __exit pci_stub_exit(void) +{ + pci_unregister_driver(&stub_driver); +} + +module_init(pci_stub_init); +module_exit(pci_stub_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Chris Wright "); -- cgit 1.4.1 From 90d25f246ddefbb743764f8d45ae97e545a6ee86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 6 Dec 2008 15:07:59 +0100 Subject: PCI: Suspend and resume PCI Express ports with interrupts disabled I don't see why the suspend and resume of PCI Express ports should be handled with interrupts enabled and it may even lead to problems in some situations. For this reason, move the suspending and resuming of PCI Express ports into ->suspend_late() and ->resume_early() callbacks executed with interrupts disabled. This patch addresses the regression from 2.6.26 tracked as http://bugzilla.kernel.org/show_bug.cgi?id=12121 . Acked-by: Linus Torvalds Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 584422da8d8b..0549fe2bdac9 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -50,7 +50,7 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev) } #ifdef CONFIG_PM -static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) +static int pcie_portdrv_suspend_late(struct pci_dev *dev, pm_message_t state) { int ret = pcie_port_device_suspend(dev, state); @@ -59,14 +59,14 @@ static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) return ret; } -static int pcie_portdrv_resume(struct pci_dev *dev) +static int pcie_portdrv_resume_early(struct pci_dev *dev) { pcie_portdrv_restore_config(dev); return pcie_port_device_resume(dev); } #else -#define pcie_portdrv_suspend NULL -#define pcie_portdrv_resume NULL +#define pcie_portdrv_suspend_late NULL +#define pcie_portdrv_resume_early NULL #endif /* @@ -282,8 +282,8 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, - .suspend = pcie_portdrv_suspend, - .resume = pcie_portdrv_resume, + .suspend_late = pcie_portdrv_suspend_late, + .resume_early = pcie_portdrv_resume_early, .err_handler = &pcie_portdrv_err_handler, }; -- cgit 1.4.1 From 894886e5d3de0bde2eded8a39bf7e76023fbd791 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 Dec 2008 10:10:10 -0800 Subject: PCI: extend on the ICH motherboard IO decode quirk list This adds more LPC controller IO range decode quirks for the Intel ICH family of chipsets. They differ a bit between the older ICH6 chipset and the more modern layout of the ICH7-ICH10 chipsets. This patch just prints out the IO decode information found by the quirks, but eventually we may want to add them to the resource tree, in order to know to avoid allocating things over them. That's especially true if it turns out that any firmware ends up putting the magic motherboard resources in an address range that we use for dynamic allocations (ie above PCIBIOS_MIN_IO, which is 0x1000 on x86). Signed-off-by: Linus Torvalds Cc: Rafael J. Wysocki Cc: Ivan Kokshaysky Cc: Greg KH Cc: Frans Pop Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 105 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 90 insertions(+), 15 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index e915a17b36c3..d3a9e0f38682 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -449,7 +449,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, quirk_ich4_lpc_acpi); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, quirk_ich4_lpc_acpi); -static void __devinit quirk_ich6_lpc_acpi(struct pci_dev *dev) +static void __devinit ich6_lpc_acpi_gpio(struct pci_dev *dev) { u32 region; @@ -459,20 +459,95 @@ static void __devinit quirk_ich6_lpc_acpi(struct pci_dev *dev) pci_read_config_dword(dev, 0x48, ®ion); quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1, "ICH6 GPIO"); } -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_0, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_2, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_3, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_2, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_4, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, quirk_ich6_lpc_acpi); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_8, quirk_ich6_lpc_acpi); + +static void __devinit ich6_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name, int dynsize) +{ + u32 val; + u32 size, base; + + pci_read_config_dword(dev, reg, &val); + + /* Enabled? */ + if (!(val & 1)) + return; + base = val & 0xfffc; + if (dynsize) { + /* + * This is not correct. It is 16, 32 or 64 bytes depending on + * register D31:F0:ADh bits 5:4. + * + * But this gets us at least _part_ of it. + */ + size = 16; + } else { + size = 128; + } + base &= ~(size-1); + + /* Just print it out for now. We should reserve it after more debugging */ + dev_info(&dev->dev, "%s PIO at %04x-%04x\n", name, base, base+size-1); +} + +static void __devinit quirk_ich6_lpc(struct pci_dev *dev) +{ + /* Shared ACPI/GPIO decode with all ICH6+ */ + ich6_lpc_acpi_gpio(dev); + + /* ICH6-specific generic IO decode */ + ich6_lpc_generic_decode(dev, 0x84, "LPC Generic IO decode 1", 0); + ich6_lpc_generic_decode(dev, 0x88, "LPC Generic IO decode 2", 1); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_0, quirk_ich6_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, quirk_ich6_lpc); + +static void __devinit ich7_lpc_generic_decode(struct pci_dev *dev, unsigned reg, const char *name) +{ + u32 val; + u32 mask, base; + + pci_read_config_dword(dev, reg, &val); + + /* Enabled? */ + if (!(val & 1)) + return; + + /* + * IO base in bits 15:2, mask in bits 23:18, both + * are dword-based + */ + base = val & 0xfffc; + mask = (val >> 16) & 0xfc; + mask |= 3; + + /* Just print it out for now. We should reserve it after more debugging */ + dev_info(&dev->dev, "%s PIO at %04x (mask %04x)\n", name, base, mask); +} + +/* ICH7-10 has the same common LPC generic IO decode registers */ +static void __devinit quirk_ich7_lpc(struct pci_dev *dev) +{ + /* We share the common ACPI/DPIO decode with ICH6 */ + ich6_lpc_acpi_gpio(dev); + + /* And have 4 ICH7+ generic decodes */ + ich7_lpc_generic_decode(dev, 0x84, "ICH7 LPC Generic IO decode 1"); + ich7_lpc_generic_decode(dev, 0x88, "ICH7 LPC Generic IO decode 2"); + ich7_lpc_generic_decode(dev, 0x8c, "ICH7 LPC Generic IO decode 3"); + ich7_lpc_generic_decode(dev, 0x90, "ICH7 LPC Generic IO decode 4"); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_0, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_2, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_3, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_2, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_4, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_8, quirk_ich7_lpc); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH10_1, quirk_ich7_lpc); /* * VIA ACPI: One IO region pointed to by longword at -- cgit 1.4.1 From 63f4898ace2788a89ed685672aab092e1c3e50e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 7 Dec 2008 22:02:58 +0100 Subject: PCI: handle PCI state saving with interrupts disabled Since interrupts will soon be disabled at PCI resume time, we need to pre-allocate memory to save/restore PCI config space (or use GFP_ATOMIC, but this is safer). Reported-by: Linus Torvalds Signed-off-by: "Rafael J. Wysocki" Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 73 +++++++++++++++++++++++++++++++++++++---------------- drivers/pci/pci.h | 1 + drivers/pci/probe.c | 3 +++ 3 files changed, 55 insertions(+), 22 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 47663dc0daf7..3222f9022707 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -640,19 +640,14 @@ static int pci_save_pcie_state(struct pci_dev *dev) int pos, i = 0; struct pci_cap_saved_state *save_state; u16 *cap; - int found = 0; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (pos <= 0) return 0; save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP); - if (!save_state) - save_state = kzalloc(sizeof(*save_state) + sizeof(u16) * 4, GFP_KERNEL); - else - found = 1; if (!save_state) { - dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n"); + dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); return -ENOMEM; } cap = (u16 *)&save_state->data[0]; @@ -661,9 +656,7 @@ static int pci_save_pcie_state(struct pci_dev *dev) pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_SLTCTL, &cap[i++]); pci_read_config_word(dev, pos + PCI_EXP_RTCTL, &cap[i++]); - save_state->cap_nr = PCI_CAP_ID_EXP; - if (!found) - pci_add_saved_cap(dev, save_state); + return 0; } @@ -688,30 +681,21 @@ static void pci_restore_pcie_state(struct pci_dev *dev) static int pci_save_pcix_state(struct pci_dev *dev) { - int pos, i = 0; + int pos; struct pci_cap_saved_state *save_state; - u16 *cap; - int found = 0; pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); if (pos <= 0) return 0; save_state = pci_find_saved_cap(dev, PCI_CAP_ID_PCIX); - if (!save_state) - save_state = kzalloc(sizeof(*save_state) + sizeof(u16), GFP_KERNEL); - else - found = 1; if (!save_state) { - dev_err(&dev->dev, "out of memory in pci_save_pcie_state\n"); + dev_err(&dev->dev, "buffer not found in %s\n", __FUNCTION__); return -ENOMEM; } - cap = (u16 *)&save_state->data[0]; - pci_read_config_word(dev, pos + PCI_X_CMD, &cap[i++]); - save_state->cap_nr = PCI_CAP_ID_PCIX; - if (!found) - pci_add_saved_cap(dev, save_state); + pci_read_config_word(dev, pos + PCI_X_CMD, (u16 *)save_state->data); + return 0; } @@ -1300,6 +1284,51 @@ void pci_pm_init(struct pci_dev *dev) } } +/** + * pci_add_save_buffer - allocate buffer for saving given capability registers + * @dev: the PCI device + * @cap: the capability to allocate the buffer for + * @size: requested size of the buffer + */ +static int pci_add_cap_save_buffer( + struct pci_dev *dev, char cap, unsigned int size) +{ + int pos; + struct pci_cap_saved_state *save_state; + + pos = pci_find_capability(dev, cap); + if (pos <= 0) + return 0; + + save_state = kzalloc(sizeof(*save_state) + size, GFP_KERNEL); + if (!save_state) + return -ENOMEM; + + save_state->cap_nr = cap; + pci_add_saved_cap(dev, save_state); + + return 0; +} + +/** + * pci_allocate_cap_save_buffers - allocate buffers for saving capabilities + * @dev: the PCI device + */ +void pci_allocate_cap_save_buffers(struct pci_dev *dev) +{ + int error; + + error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_EXP, 4 * sizeof(u16)); + if (error) + dev_err(&dev->dev, + "unable to preallocate PCI Express save buffer\n"); + + error = pci_add_cap_save_buffer(dev, PCI_CAP_ID_PCIX, sizeof(u16)); + if (error) + dev_err(&dev->dev, + "unable to preallocate PCI-X save buffer\n"); +} + /** * pci_enable_ari - enable ARI forwarding if hardware support it * @dev: the PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9162e242b99e..7242b511a93f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -45,6 +45,7 @@ struct pci_platform_pm_ops { extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); extern void pci_pm_init(struct pci_dev *dev); +extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); extern int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index eb2b985beb48..5dcf2b65e3f9 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -958,6 +958,9 @@ static void pci_init_capabilities(struct pci_dev *dev) /* MSI/MSI-X list */ pci_msi_init_pci_dev(dev); + /* Buffers for saving PCIe and PCI-X capabilities */ + pci_allocate_cap_save_buffers(dev); + /* Power Management */ pci_pm_init(dev); -- cgit 1.4.1 From a367f74cb6f9c49850a4ee86e45fd3a8e83065e4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Fri, 5 Dec 2008 14:05:23 +0900 Subject: PCI hotplug: aerdrv: fix a typo in error message "TLP" is an acronym for "Transaction Layer Packet." Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_errprint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 3933d4f30e8c..0fc29ae80df8 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -233,7 +233,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) if (info->flags & AER_TLP_HEADER_VALID_FLAG) { unsigned char *tlp = (unsigned char *) &info->tlp; - printk("%sTLB Header:\n", loglevel); + printk("%sTLP Header:\n", loglevel); printk("%s%02x%02x%02x%02x %02x%02x%02x%02x" " %02x%02x%02x%02x %02x%02x%02x%02x\n", loglevel, -- cgit 1.4.1 From 104bafcfab7ce3031399e60069949f10acecc022 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 12 Dec 2008 06:49:40 +0100 Subject: PCI: Don't carp about BAR allocation failures in quiet boot These are easy to trigger (more or less harmlessly) with multiple video cards, since the ROM BAR will typically not be given any space by the BIOS bridge setup. No reason to punish quiet boot for this. Signed-off-by: Adam Jackson Signed-off-by: Jesse Barnes --- arch/x86/pci/i386.c | 4 ++-- drivers/pci/setup-res.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index e51bf2cda4b0..f884740da318 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) pr = pci_find_parent_resource(dev, r); if (!r->start || !pr || request_resource(pr, r) < 0) { - dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -170,7 +170,7 @@ static void __init pcibios_allocate_resources(int pass) r->flags, disabled, pass); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { - dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 2dbd96cce2d8..4e375632499a 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -134,7 +134,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) align = resource_alignment(res); if (!align) { - dev_err(&dev->dev, "BAR %d: can't allocate resource (bogus " + dev_info(&dev->dev, "BAR %d: can't allocate resource (bogus " "alignment) %pR flags %#lx\n", resno, res, res->flags); return -EINVAL; @@ -157,7 +157,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } if (ret) { - dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n", + dev_info(&dev->dev, "BAR %d: can't allocate %s resource %pR\n", resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res); } else { res->flags &= ~IORESOURCE_STARTALIGN; -- cgit 1.4.1 From 2a9d35219c593bdf46ec21f2b75a6370af7af1b0 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Thu, 11 Dec 2008 11:17:55 -0700 Subject: PCI hotplug: acpiphp whitespace cleanup Clean up whitespace. Setting 'let c_space_errors=1' in .vimrc shows all sorts of ugliness. ;) Signed-off-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpiphp.h | 2 +- drivers/pci/hotplug/acpiphp_glue.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h index 9bcb6cbd5aa9..4fc168b70095 100644 --- a/drivers/pci/hotplug/acpiphp.h +++ b/drivers/pci/hotplug/acpiphp.h @@ -44,7 +44,7 @@ do { \ if (acpiphp_debug) \ printk(KERN_DEBUG "%s: " format, \ - MY_NAME , ## arg); \ + MY_NAME , ## arg); \ } while (0) #define err(format, arg...) printk(KERN_ERR "%s: " format, MY_NAME , ## arg) #define info(format, arg...) printk(KERN_INFO "%s: " format, MY_NAME , ## arg) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 1b19b7ec3e8c..7a5760426897 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -167,9 +167,9 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val, if (((buses >> 8) & 0xff) != bus->secondary) { buses = (buses & 0xff000000) - | ((unsigned int)(bus->primary) << 0) - | ((unsigned int)(bus->secondary) << 8) - | ((unsigned int)(bus->subordinate) << 16); + | ((unsigned int)(bus->primary) << 0) + | ((unsigned int)(bus->secondary) << 8) + | ((unsigned int)(bus->subordinate) << 16); pci_write_config_dword(bus->self, PCI_PRIMARY_BUS, buses); } return NOTIFY_OK; -- cgit 1.4.1 From 29935282f24a6ce2f0a65c3d77fb7f695461f583 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Dec 2008 13:02:18 -0700 Subject: PCI: make PCI bus resource messages more meaningful Change PCI bus resource messages so they have a bit more context and look like the rest of PCI, e.g., - bus: 00 index 0 io port: [0x00-0xffff] - bus: 00 index 1 mmio: [0x000000-0xffffffff] + pci 0000:00: bus resource 0 io : [0x00-0xffff] + pci 0000:00: bus resource 1 mem: [0x000000-0xffffffff] This also changes them from KERN_INFO to KERN_DEBUG. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index ea979f2bc6db..09e2c3cd2bef 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -536,9 +536,9 @@ static void pci_bus_dump_res(struct pci_bus *bus) if (!res) continue; - printk(KERN_INFO "bus: %02x index %x %s: %pR\n", - bus->number, i, - (res->flags & IORESOURCE_IO) ? "io port" : "mmio", res); + printk(KERN_DEBUG "pci %04x:%02x: bus resource %d %s %pR\n", + pci_domain_nr(bus), bus->number, i, + (res->flags & IORESOURCE_IO) ? "io: " : "mem:", res); } } -- cgit 1.4.1 From 0e331bf1c44401a5b6d7b778dd1f254724132dff Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 10 Dec 2008 13:00:21 -0700 Subject: PCI: pcie port driver: remove extra printks These printks don't contain enough information to be useful. I think it would be more useful to have a message when a service driver binds to a root port. That could contain the service type, the interrupt mode and IRQ, etc. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 75f501ab6468..6a9f83ccaff4 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -103,7 +103,6 @@ static int pcie_port_resume_service(struct device *dev) */ static void release_pcie_device(struct device *dev) { - dev_printk(KERN_DEBUG, dev, "free port service\n"); kfree(to_pcie_device(dev)); } @@ -150,7 +149,6 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) if (pos) { struct msix_entry msix_entries[PCIE_PORT_DEVICE_MAXSERVICES] = {{0, 0}, {0, 1}, {0, 2}, {0, 3}}; - dev_info(&dev->dev, "found MSI-X capability\n"); status = pci_enable_msix(dev, msix_entries, nvec); if (!status) { int j = 0; @@ -165,7 +163,6 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) if (status) { pos = pci_find_capability(dev, PCI_CAP_ID_MSI); if (pos) { - dev_info(&dev->dev, "found MSI capability\n"); status = pci_enable_msi(dev); if (!status) { interrupt_mode = PCIE_PORT_MSI_MODE; @@ -239,7 +236,6 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, return NULL; pcie_device_init(parent, device, port_type, service_type, irq,irq_mode); - dev_printk(KERN_DEBUG, &device->device, "allocate port service\n"); return device; } -- cgit 1.4.1 From 98d3333a13029ab07ca1d1bfb9bfa138ea76c3c0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 9 Dec 2008 16:11:41 -0700 Subject: PCI hotplug: cpqphp: use config space PCI interrupt pin encoding This patch changes cpqphp to use interrupt pin values just as they come from PCI config space, i.e., 1=INTA, ..., 4=INTD. pcibios_set_irq_routing() takes pin arguments in the range 0=INTA, ..., 3=INTD, so we'll adjust the pin just before calling it. Signed-off-by: Bjorn Helgaas Acked-by: Alex Chiang Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/cpqphp_ctrl.c | 4 ++-- drivers/pci/hotplug/cpqphp_pci.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index a60a25290995..d94722149e02 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -2604,7 +2604,7 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func for (cloop = 0; cloop < 4; cloop++) { if (irqs.valid_INT & (0x01 << cloop)) { rc = cpqhp_set_irq(func->bus, func->device, - 0x0A + cloop, irqs.interrupt[cloop]); + cloop + 1, irqs.interrupt[cloop]); if (rc) goto free_and_out; } @@ -2945,7 +2945,7 @@ static int configure_new_function(struct controller *ctrl, struct pci_func *func } if (!behind_bridge) { - rc = cpqhp_set_irq(func->bus, func->device, temp_byte + 0x09, IRQ); + rc = cpqhp_set_irq(func->bus, func->device, temp_byte, IRQ); if (rc) return 1; } else { diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index df146be9d2e9..6c0ed0fcb8ee 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -171,7 +171,7 @@ int cpqhp_set_irq (u8 bus_num, u8 dev_num, u8 int_pin, u8 irq_num) fakebus->number = bus_num; dbg("%s: dev %d, bus %d, pin %d, num %d\n", __func__, dev_num, bus_num, int_pin, irq_num); - rc = pcibios_set_irq_routing(fakedev, int_pin - 0x0a, irq_num); + rc = pcibios_set_irq_routing(fakedev, int_pin - 1, irq_num); kfree(fakedev); kfree(fakebus); dbg("%s: rc %d\n", __func__, rc); -- cgit 1.4.1 From 878f2e50fd1cfea575cdca5bf019c2175dc64131 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 9 Dec 2008 16:11:46 -0700 Subject: PCI: use config space encoding in pci_get_interrupt_pin() This patch makes pci_get_interrupt_pin() return values encoded the same way as the "Interrupt Pin" value in PCI config space, i.e., 1=INTA, ..., 4=INTD. pirq_bios_set() is the only in-tree caller of pci_get_interrupt_pin() and pci_get_interrupt_pin() is not exported. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: hpa@zytor.com Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 2 +- drivers/pci/pci.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 373b9afe6d44..399a172f047d 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -533,7 +533,7 @@ static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, { struct pci_dev *bridge; int pin = pci_get_interrupt_pin(dev, &bridge); - return pcibios_set_irq_routing(bridge, pin, irq); + return pcibios_set_irq_routing(bridge, pin - 1, irq); } #endif diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3222f9022707..bd52ca4c2893 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1374,9 +1374,9 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) pin = dev->pin; if (!pin) return -1; - pin--; + while (dev->bus->self) { - pin = (pin + PCI_SLOT(dev->devfn)) % 4; + pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1; dev = dev->bus->self; } *bridge = dev; -- cgit 1.4.1 From 57c2cf71c12318b72ebaa5720d210476b6bac4d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 11 Dec 2008 11:24:23 -0700 Subject: PCI: add pci_swizzle_interrupt_pin() This patch adds pci_swizzle_interrupt_pin(), which implements the INTx swizzling algorithm specified in Table 9-1 of the "PCI-to-PCI Bridge Architecture Specification," revision 1.2. There are many architecture-specific implementations of this swizzle that can be replaced by this common one. Reviewed-by: David Howells Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 16 +++++++++++++++- include/linux/pci.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index bd52ca4c2893..d4d71fae6233 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1366,6 +1366,20 @@ void pci_enable_ari(struct pci_dev *dev) bridge->ari_enabled = 1; } +/** + * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge + * @dev: the PCI device + * @pin: the INTx pin (1=INTA, 2=INTB, 3=INTD, 4=INTD) + * + * Perform INTx swizzling for a device behind one level of bridge. This is + * required by section 9.1 of the PCI-to-PCI bridge specification for devices + * behind bridges on add-in cards. + */ +u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin) +{ + return (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1; +} + int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) { @@ -1376,7 +1390,7 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) return -1; while (dev->bus->self) { - pin = (((pin - 1) + PCI_SLOT(dev->devfn)) % 4) + 1; + pin = pci_swizzle_interrupt_pin(dev, pin); dev = dev->bus->self; } *bridge = dev; diff --git a/include/linux/pci.h b/include/linux/pci.h index bfcb39ca8879..58357d14f94c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -532,6 +532,7 @@ int __must_check pci_bus_add_device(struct pci_dev *dev); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); +u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); -- cgit 1.4.1 From bebd590ca27e80ffe3129ab4f0a3124f0a340f43 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 16 Dec 2008 14:06:58 -0500 Subject: PCI: fix incorrect error return in pci_enable_wake This patch (as1186) fixes a minor mistake in pci_enable_wake(). When the routine is asked to disable remote wakeup, it should not return an error merely because the device is not allowed to do wakeups! Signed-off-by: Alan Stern Acked-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d4d71fae6233..1fb7cff4cdae 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1091,7 +1091,7 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) int error = 0; bool pme_done = false; - if (!device_may_wakeup(&dev->dev)) + if (enable && !device_may_wakeup(&dev->dev)) return -EINVAL; /* -- cgit 1.4.1 From 1684f5ddd4c0c754f52c78eaa2c5c69ad09fb18c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 1 Dec 2008 14:30:30 -0800 Subject: PCI: uninline pci_ioremap_bar() It's too large to be inlined. Acked-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 16 ++++++++++++++++ include/linux/pci.h | 15 +-------------- 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1fb7cff4cdae..9354dd63f035 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -56,6 +56,22 @@ unsigned char pci_bus_max_busnr(struct pci_bus* bus) } EXPORT_SYMBOL_GPL(pci_bus_max_busnr); +#ifdef CONFIG_HAS_IOMEM +void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) +{ + /* + * Make sure the BAR is actually a memory resource, not an IO resource + */ + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { + WARN_ON(1); + return NULL; + } + return ioremap_nocache(pci_resource_start(pdev, bar), + pci_resource_len(pdev, bar)); +} +EXPORT_SYMBOL_GPL(pci_ioremap_bar); +#endif + #if 0 /** * pci_max_busnr - returns maximum PCI bus number diff --git a/include/linux/pci.h b/include/linux/pci.h index 58357d14f94c..0d8bc920c2e5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1160,20 +1160,7 @@ static inline void pci_mmcfg_late_init(void) { } int pci_ext_cfg_avail(struct pci_dev *dev); -#ifdef CONFIG_HAS_IOMEM -static inline void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar) -{ - /* - * Make sure the BAR is actually a memory resource, not an IO resource - */ - if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) { - WARN_ON(1); - return NULL; - } - return ioremap_nocache(pci_resource_start(pdev, bar), - pci_resource_len(pdev, bar)); -} -#endif +void __iomem *pci_ioremap_bar(struct pci_dev *pdev, int bar); #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ -- cgit 1.4.1 From 92425a405ea482209b43093a5e35be7de02acf18 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Sun, 30 Nov 2008 17:10:12 -0800 Subject: PCI: Make settable sysfs attributes more consistent PCI devices have three settable boolean attributes, enable, broken_parity_status, and msi_bus. The store functions for these would silently interpret "0x01" as false, "1llogical" as true, and "true" would be (silently!) ignored and do nothing. This is inconsistent with typical sysfs handling of settable attributes, and just plain doesn't make much sense. So, use strict_strtoul(), which was created for this purpose. The store functions will treat a value of 0 as false, non-zero as true, and return -EINVAL for a parse failure. Additionally, is_enabled_store() and msi_bus_store() return -EPERM if CAP_SYS_ADMIN is lacking, rather than silently doing nothing. This is more typical behavior for sysfs attributes that need a capability. And msi_bus_store() will only print the "forced subordinate bus ..." warning if the MSI flag was actually forced to a different value. Signed-off-by: Trent Piepho Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index d5cdccf27a69..d2f1354fd189 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -58,13 +58,14 @@ static ssize_t broken_parity_status_store(struct device *dev, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); - ssize_t consumed = -EINVAL; + unsigned long val; - if ((count > 0) && (*buf == '0' || *buf == '1')) { - pdev->broken_parity_status = *buf == '1' ? 1 : 0; - consumed = count; - } - return consumed; + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; + + pdev->broken_parity_status = !!val; + + return count; } static ssize_t local_cpus_show(struct device *dev, @@ -133,19 +134,23 @@ static ssize_t is_enabled_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - ssize_t result = -EINVAL; struct pci_dev *pdev = to_pci_dev(dev); + unsigned long val; + ssize_t result = strict_strtoul(buf, 0, &val); + + if (result < 0) + return result; /* this can crash the machine when done on the "wrong" device */ if (!capable(CAP_SYS_ADMIN)) - return count; + return -EPERM; - if (*buf == '0') { + if (!val) { if (atomic_read(&pdev->enable_cnt) != 0) pci_disable_device(pdev); else result = -EIO; - } else if (*buf == '1') + } else result = pci_enable_device(pdev); return result < 0 ? result : count; @@ -185,25 +190,28 @@ msi_bus_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct pci_dev *pdev = to_pci_dev(dev); + unsigned long val; + + if (strict_strtoul(buf, 0, &val) < 0) + return -EINVAL; /* bad things may happen if the no_msi flag is changed * while some drivers are loaded */ if (!capable(CAP_SYS_ADMIN)) - return count; + return -EPERM; + /* Maybe pci devices without subordinate busses shouldn't even have this + * attribute in the first place? */ if (!pdev->subordinate) return count; - if (*buf == '0') { - pdev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI; - dev_warn(&pdev->dev, "forced subordinate bus to not support MSI," - " bad things could happen.\n"); - } + /* Is the flag going to change, or keep the value it already had? */ + if (!(pdev->subordinate->bus_flags & PCI_BUS_FLAGS_NO_MSI) ^ + !!val) { + pdev->subordinate->bus_flags ^= PCI_BUS_FLAGS_NO_MSI; - if (*buf == '1') { - pdev->subordinate->bus_flags &= ~PCI_BUS_FLAGS_NO_MSI; - dev_warn(&pdev->dev, "forced subordinate bus to support MSI," - " bad things could happen.\n"); + dev_warn(&pdev->dev, "forced subordinate bus to%s support MSI," + " bad things could happen\n", val ? "" : " not"); } return count; -- cgit 1.4.1 From 999da9fd489cd9774a89122940190376e19b21ce Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 1 Dec 2008 14:30:29 -0800 Subject: PCI quirks: piix3: warn softer about enabling passive release All the other quirks are dev_info() not dev_err(), this one isn't special. This makes 'quiet' boot in qemu really quiet. Signed-off-by: Adam Jackson Cc: Bartlomiej Zolnierkiewicz Cc: Jeff Garzik Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d3a9e0f38682..baad093aafe3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -56,7 +56,7 @@ static void quirk_passive_release(struct pci_dev *dev) while ((d = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, d))) { pci_read_config_byte(d, 0x82, &dlc); if (!(dlc & 1<<1)) { - dev_err(&d->dev, "PIIX3: Enabling Passive Release\n"); + dev_info(&d->dev, "PIIX3: Enabling Passive Release\n"); dlc |= 1<<1; pci_write_config_byte(d, 0x82, dlc); } -- cgit 1.4.1 From 6a49d8120021897e139641062236215aac5d220e Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:38:21 +0800 Subject: PCI: enhance pci_ari_enabled() Change parameter of pci_ari_enabled() from 'pci_dev' to 'pci_bus'. ARI forwarding on the bridge mostly concerns the subordinate devices rather than the bridge itself. So this change will make the function easier to use. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 7242b511a93f..392388468f6f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -165,13 +165,13 @@ struct pci_slot_attribute { extern void pci_enable_ari(struct pci_dev *dev); /** * pci_ari_enabled - query ARI forwarding status - * @dev: the PCI device + * @bus: the PCI bus * * Returns 1 if ARI forwarding is enabled, or 0 if not enabled; */ -static inline int pci_ari_enabled(struct pci_dev *dev) +static inline int pci_ari_enabled(struct pci_bus *bus) { - return dev->ari_enabled; + return bus->self && bus->self->ari_enabled; } #endif /* DRIVERS_PCI_H */ -- cgit 1.4.1 From 14add80b5120966fe0659d61815b9e9b4b68fdc5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:38:52 +0800 Subject: PCI: remove unnecessary arg of pci_update_resource() This cleanup removes unnecessary argument 'struct resource *res' in pci_update_resource(), so it takes same arguments as other companion functions (pci_assign_resource(), etc.). Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 4 ++-- drivers/pci/setup-res.c | 7 ++++--- include/linux/pci.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9354dd63f035..c3ef2e78fc58 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -393,8 +393,8 @@ pci_restore_bars(struct pci_dev *dev) return; } - for (i = 0; i < numres; i ++) - pci_update_resource(dev, &dev->resource[i], i); + for (i = 0; i < numres; i++) + pci_update_resource(dev, i); } static struct pci_platform_pm_ops *pci_platform_pm; diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 4e375632499a..3c5203ff53c7 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,11 +26,12 @@ #include "pci.h" -void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) +void pci_update_resource(struct pci_dev *dev, int resno) { struct pci_bus_region region; u32 new, check, mask; int reg; + struct resource *res = dev->resource + resno; /* * Ignore resources for unimplemented BARs and unused resource slots @@ -162,7 +163,7 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } else { res->flags &= ~IORESOURCE_STARTALIGN; if (resno < PCI_BRIDGE_RESOURCES) - pci_update_resource(dev, res, resno); + pci_update_resource(dev, resno); } return ret; @@ -197,7 +198,7 @@ int pci_assign_resource_fixed(struct pci_dev *dev, int resno) dev_err(&dev->dev, "BAR %d: can't allocate %s resource %pR\n", resno, res->flags & IORESOURCE_IO ? "I/O" : "mem", res); } else if (resno < PCI_BRIDGE_RESOURCES) { - pci_update_resource(dev, res, resno); + pci_update_resource(dev, resno); } return ret; diff --git a/include/linux/pci.h b/include/linux/pci.h index 0d8bc920c2e5..c5e02f324e13 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -648,7 +648,7 @@ int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); int pci_reset_function(struct pci_dev *dev); int pci_execute_reset_function(struct pci_dev *dev); -void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); +void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); -- cgit 1.4.1 From fde09c6d8f92de0c9f75698a75f0989f2234c517 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:39:32 +0800 Subject: PCI: define PCI resource names in an 'enum' This patch moves all definitions of the PCI resource names to an 'enum', and also replaces some hard-coded resource variables with symbol names. This change eases introduction of device specific resources. Reviewed-by: Bjorn Helgaas Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 4 +++- drivers/pci/probe.c | 2 +- drivers/pci/proc.c | 7 ++++--- include/linux/pci.h | 37 ++++++++++++++++++++++++------------- 4 files changed, 32 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index d2f1354fd189..ea54cedcdfc6 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -102,11 +102,13 @@ resource_show(struct device * dev, struct device_attribute *attr, char * buf) struct pci_dev * pci_dev = to_pci_dev(dev); char * str = buf; int i; - int max = 7; + int max; resource_size_t start, end; if (pci_dev->subordinate) max = DEVICE_COUNT_RESOURCE; + else + max = PCI_BRIDGE_RESOURCES; for (i = 0; i < max; i++) { struct resource *res = &pci_dev->resource[i]; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5dcf2b65e3f9..e1cf5d50ed4d 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -423,7 +423,7 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent, child->subordinate = 0xff; /* Set up default resource pointers and names.. */ - for (i = 0; i < 4; i++) { + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { child->resource[i] = &bridge->resource[PCI_BRIDGE_RESOURCES+i]; child->resource[i]->name = child->name; } diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 7fb086d39617..593bb844b8db 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -361,15 +361,16 @@ static int show_device(struct seq_file *m, void *v) dev->vendor, dev->device, dev->irq); - /* Here should be 7 and not PCI_NUM_RESOURCES as we need to preserve compatibility */ - for (i=0; i<7; i++) { + + /* only print standard and ROM resources to preserve compatibility */ + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { resource_size_t start, end; pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); seq_printf(m, "\t%16llx", (unsigned long long)(start | (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); } - for (i=0; i<7; i++) { + for (i = 0; i <= PCI_ROM_RESOURCE; i++) { resource_size_t start, end; pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); seq_printf(m, "\t%16llx", diff --git a/include/linux/pci.h b/include/linux/pci.h index c5e02f324e13..da1c22bab40e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -82,7 +82,30 @@ enum pci_mmap_state { #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 -#define DEVICE_COUNT_RESOURCE 12 +/* + * For PCI devices, the region numbers are assigned this way: + */ +enum { + /* #0-5: standard PCI resources */ + PCI_STD_RESOURCES, + PCI_STD_RESOURCE_END = 5, + + /* #6: expansion ROM resource */ + PCI_ROM_RESOURCE, + + /* resources assigned to buses behind the bridge */ +#define PCI_BRIDGE_RESOURCE_NUM 4 + + PCI_BRIDGE_RESOURCES, + PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES + + PCI_BRIDGE_RESOURCE_NUM - 1, + + /* total resources associated with a PCI device */ + PCI_NUM_RESOURCES, + + /* preserve this for compatibility */ + DEVICE_COUNT_RESOURCE +}; typedef int __bitwise pci_power_t; @@ -274,18 +297,6 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space); } -/* - * For PCI devices, the region numbers are assigned this way: - * - * 0-5 standard PCI regions - * 6 expansion ROM - * 7-10 bridges: address space assigned to buses behind the bridge - */ - -#define PCI_ROM_RESOURCE 6 -#define PCI_BRIDGE_RESOURCES 7 -#define PCI_NUM_RESOURCES 11 - #ifndef PCI_BUS_NUM_RESOURCES #define PCI_BUS_NUM_RESOURCES 16 #endif -- cgit 1.4.1 From bc5f5a8277cb353161454b6704b3186ebcf3a2a3 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:40:00 +0800 Subject: PCI: remove unnecessary condition check in pci_restore_bars() Remove the unnecessary number of resources condition checks because the pci_update_resource() will check availability of the resources. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c3ef2e78fc58..deeab19d7d10 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -376,24 +376,9 @@ pci_find_parent_resource(const struct pci_dev *dev, struct resource *res) static void pci_restore_bars(struct pci_dev *dev) { - int i, numres; - - switch (dev->hdr_type) { - case PCI_HEADER_TYPE_NORMAL: - numres = 6; - break; - case PCI_HEADER_TYPE_BRIDGE: - numres = 2; - break; - case PCI_HEADER_TYPE_CARDBUS: - numres = 1; - break; - default: - /* Should never get here, but just in case... */ - return; - } + int i; - for (i = 0; i < numres; i++) + for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) pci_update_resource(dev, i); } -- cgit 1.4.1 From 0b400c7ed4d027e02f6231afa39852a2d48e6f25 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:40:40 +0800 Subject: PCI: export __pci_read_base() Export __pci_read_base() so it can be used by whole PCI subsystem. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.h | 9 +++++++++ drivers/pci/probe.c | 20 +++++++++----------- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 392388468f6f..d881fde8bb82 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -162,6 +162,15 @@ struct pci_slot_attribute { }; #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) +enum pci_bar_type { + pci_bar_unknown, /* Standard PCI BAR probe */ + pci_bar_io, /* An io port BAR */ + pci_bar_mem32, /* A 32-bit memory BAR */ + pci_bar_mem64, /* A 64-bit memory BAR */ +}; + +extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, + struct resource *res, unsigned int reg); extern void pci_enable_ari(struct pci_dev *dev); /** * pci_ari_enabled - query ARI forwarding status diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e1cf5d50ed4d..5372d3699e08 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -135,13 +135,6 @@ static u64 pci_size(u64 base, u64 maxbase, u64 mask) return size; } -enum pci_bar_type { - pci_bar_unknown, /* Standard PCI BAR probe */ - pci_bar_io, /* An io port BAR */ - pci_bar_mem32, /* A 32-bit memory BAR */ - pci_bar_mem64, /* A 64-bit memory BAR */ -}; - static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar) { if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { @@ -156,11 +149,16 @@ static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar) return pci_bar_mem32; } -/* - * If the type is not unknown, we assume that the lowest bit is 'enable'. - * Returns 1 if the BAR was 64-bit and 0 if it was 32-bit. +/** + * pci_read_base - read a PCI BAR + * @dev: the PCI device + * @type: type of the BAR + * @res: resource buffer to be filled in + * @pos: BAR position in the config space + * + * Returns 1 if the BAR is 64-bit, or 0 if 32-bit. */ -static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, +int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int pos) { u32 l, sz, mask; -- cgit 1.4.1 From 3789fa8a2e534523c896a32a9f27f78d52ad7d82 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:41:07 +0800 Subject: PCI: allow pci_alloc_child_bus() to handle a NULL bridge Allow pci_alloc_child_bus() to allocate buses without bridge devices. Some SR-IOV devices can occupy more than one bus number, but there is no explicit bridges because that have internal routing mechanism. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5372d3699e08..2ee00962734b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -398,12 +398,10 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent, if (!child) return NULL; - child->self = bridge; child->parent = parent; child->ops = parent->ops; child->sysdata = parent->sysdata; child->bus_flags = parent->bus_flags; - child->bridge = get_device(&bridge->dev); /* initialize some portions of the bus device, but don't register it * now as the parent is not properly set up yet. This device will get @@ -420,6 +418,12 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent, child->primary = parent->secondary; child->subordinate = 0xff; + if (!bridge) + return child; + + child->self = bridge; + child->bridge = get_device(&bridge->dev); + /* Set up default resource pointers and names.. */ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { child->resource[i] = &bridge->resource[PCI_BRIDGE_RESOURCES+i]; -- cgit 1.4.1 From 613e7ed6f72b1a115f7ece8ce1b66cf095de1348 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:41:27 +0800 Subject: PCI: add a new function to map BAR offsets Add a function to map a given resource number to a corresponding register so drivers can get the offset and type of device specific BARs. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 22 ++++++++++++++++++++++ drivers/pci/pci.h | 2 ++ drivers/pci/setup-res.c | 13 +++++-------- 3 files changed, 29 insertions(+), 8 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index deeab19d7d10..7e9c0f3936dd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2201,6 +2201,28 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags) return bars; } +/** + * pci_resource_bar - get position of the BAR associated with a resource + * @dev: the PCI device + * @resno: the resource number + * @type: the BAR type to be filled in + * + * Returns BAR position in config space, or 0 if the BAR is invalid. + */ +int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type) +{ + if (resno < PCI_ROM_RESOURCE) { + *type = pci_bar_unknown; + return PCI_BASE_ADDRESS_0 + 4 * resno; + } else if (resno == PCI_ROM_RESOURCE) { + *type = pci_bar_mem32; + return dev->rom_base_reg; + } + + dev_err(&dev->dev, "BAR: invalid resource #%d\n", resno); + return 0; +} + static void __devinit pci_no_domains(void) { #ifdef CONFIG_PCI_DOMAINS diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d881fde8bb82..c4f4a1e6ea28 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -171,6 +171,8 @@ enum pci_bar_type { extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); +extern int pci_resource_bar(struct pci_dev *dev, int resno, + enum pci_bar_type *type); extern void pci_enable_ari(struct pci_dev *dev); /** * pci_ari_enabled - query ARI forwarding status diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 3c5203ff53c7..32e8d88a4619 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -31,6 +31,7 @@ void pci_update_resource(struct pci_dev *dev, int resno) struct pci_bus_region region; u32 new, check, mask; int reg; + enum pci_bar_type type; struct resource *res = dev->resource + resno; /* @@ -62,17 +63,13 @@ void pci_update_resource(struct pci_dev *dev, int resno) else mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; - if (resno < 6) { - reg = PCI_BASE_ADDRESS_0 + 4 * resno; - } else if (resno == PCI_ROM_RESOURCE) { + reg = pci_resource_bar(dev, resno, &type); + if (!reg) + return; + if (type != pci_bar_unknown) { if (!(res->flags & IORESOURCE_ROM_ENABLE)) return; new |= PCI_ROM_ADDRESS_ENABLE; - reg = dev->rom_base_reg; - } else { - /* Hmm, non-standard resource. */ - - return; /* kill uninitialised var warning */ } pci_write_config_dword(dev, reg, new); -- cgit 1.4.1 From 3fa16fdb48e0d83c2acf46e357548c89891df58b Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:41:45 +0800 Subject: PCI: cleanup pci_bus_add_devices() Cleanup pci_bus_add_devices() by negating the conditional and continuing, rather than having a single conditional take up the whole body. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 55 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 27 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 3e1c135b174a..1b6de1b565aa 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -71,7 +71,7 @@ pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, } /** - * add a single device + * pci_bus_add_device - add a single device * @dev: device to add * * This adds a single pci device to the global @@ -105,7 +105,7 @@ int pci_bus_add_device(struct pci_dev *dev) void pci_bus_add_devices(struct pci_bus *bus) { struct pci_dev *dev; - struct pci_bus *child_bus; + struct pci_bus *child; int retval; list_for_each_entry(dev, &bus->devices, bus_list) { @@ -120,39 +120,40 @@ void pci_bus_add_devices(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { BUG_ON(!dev->is_added); + child = dev->subordinate; /* * If there is an unattached subordinate bus, attach * it and then scan for unattached PCI devices. */ - if (dev->subordinate) { - if (list_empty(&dev->subordinate->node)) { - down_write(&pci_bus_sem); - list_add_tail(&dev->subordinate->node, - &dev->bus->children); - up_write(&pci_bus_sem); - } - pci_bus_add_devices(dev->subordinate); - - /* register the bus with sysfs as the parent is now - * properly registered. */ - child_bus = dev->subordinate; - if (child_bus->is_added) - continue; - child_bus->dev.parent = child_bus->bridge; - retval = device_register(&child_bus->dev); - if (retval) - dev_err(&dev->dev, "Error registering pci_bus," - " continuing...\n"); - else { - child_bus->is_added = 1; - retval = device_create_file(&child_bus->dev, - &dev_attr_cpuaffinity); - } + if (!child) + continue; + if (list_empty(&child->node)) { + down_write(&pci_bus_sem); + list_add_tail(&child->node, &dev->bus->children); + up_write(&pci_bus_sem); + } + pci_bus_add_devices(child); + + /* + * register the bus with sysfs as the parent is now + * properly registered. + */ + if (child->is_added) + continue; + child->dev.parent = child->bridge; + retval = device_register(&child->dev); + if (retval) + dev_err(&dev->dev, "Error registering pci_bus," + " continuing...\n"); + else { + child->is_added = 1; + retval = device_create_file(&child->dev, + &dev_attr_cpuaffinity); if (retval) dev_err(&dev->dev, "Error creating cpuaffinity" " file, continuing...\n"); - retval = device_create_file(&child_bus->dev, + retval = device_create_file(&child->dev, &dev_attr_cpulistaffinity); if (retval) dev_err(&dev->dev, -- cgit 1.4.1 From 876e501ab25dcd683574a5d3d56d8fe450083ed6 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sat, 22 Nov 2008 02:42:35 +0800 Subject: PCI: factor pci_bus_add_child() from pci_bus_add_devices() This patch splits a new function, pci_bus_add_child(), from pci_bus_add_devices(). The new function can be used to register PCI buses to the device core. Signed-off-by: Yu Zhao Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 56 ++++++++++++++++++++++++++++++++----------------------- drivers/pci/pci.h | 1 + 2 files changed, 34 insertions(+), 23 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 1b6de1b565aa..52b54f053be0 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -90,6 +90,37 @@ int pci_bus_add_device(struct pci_dev *dev) return 0; } +/** + * pci_bus_add_child - add a child bus + * @bus: bus to add + * + * This adds sysfs entries for a single bus + */ +int pci_bus_add_child(struct pci_bus *bus) +{ + int retval; + + if (bus->bridge) + bus->dev.parent = bus->bridge; + + retval = device_register(&bus->dev); + if (retval) + return retval; + + bus->is_added = 1; + + retval = device_create_file(&bus->dev, &dev_attr_cpuaffinity); + if (retval) + return retval; + + retval = device_create_file(&bus->dev, &dev_attr_cpulistaffinity); + + /* Create legacy_io and legacy_mem files for this bus */ + pci_create_legacy_files(bus); + + return retval; +} + /** * pci_bus_add_devices - insert newly discovered PCI devices * @bus: bus to check for new devices @@ -140,30 +171,9 @@ void pci_bus_add_devices(struct pci_bus *bus) */ if (child->is_added) continue; - child->dev.parent = child->bridge; - retval = device_register(&child->dev); + retval = pci_bus_add_child(child); if (retval) - dev_err(&dev->dev, "Error registering pci_bus," - " continuing...\n"); - else { - child->is_added = 1; - retval = device_create_file(&child->dev, - &dev_attr_cpuaffinity); - if (retval) - dev_err(&dev->dev, "Error creating cpuaffinity" - " file, continuing...\n"); - - retval = device_create_file(&child->dev, - &dev_attr_cpulistaffinity); - if (retval) - dev_err(&dev->dev, - "Error creating cpulistaffinity" - " file, continuing...\n"); - - /* Create legacy_io and legacy_mem files for this bus */ - pci_create_legacy_files(child_bus); - - } + dev_err(&dev->dev, "Error adding bus, continuing\n"); } } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index c4f4a1e6ea28..d1e92d83aa06 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -173,6 +173,7 @@ extern int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg); extern int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); +extern int pci_bus_add_child(struct pci_bus *bus); extern void pci_enable_ari(struct pci_dev *dev); /** * pci_ari_enabled - query ARI forwarding status -- cgit 1.4.1 From eb9c39d031bbcfd4005bd7e0337c3fd3909c1bf7 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 17 Dec 2008 12:10:05 -0800 Subject: PCI: set device wakeup capable flag if platform support is present When PCI devices are initialized, we check whether they support PCI PM caps and set the device can_wakeup flag if so. However, some devices may have platform provided wakeup events rather than PCI PME signals, so we need to set can_wakeup in that case too. Doing so should allow wakeups from many more devices, especially on cost constrained systems. Reported-by: Alan Stern Tested-by: Joseph Chan Acked-by: "Rafael J. Wysocki" Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 20 ++++++++++++++++++++ drivers/pci/pci.h | 1 + drivers/pci/probe.c | 1 + 3 files changed, 22 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 7e9c0f3936dd..1b807330e500 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1285,6 +1285,26 @@ void pci_pm_init(struct pci_dev *dev) } } +/** + * platform_pci_wakeup_init - init platform wakeup if present + * @dev: PCI device + * + * Some devices don't have PCI PM caps but can still generate wakeup + * events through platform methods (like ACPI events). If @dev supports + * platform wakeup events, set the device flag to indicate as much. This + * may be redundant if the device also supports PCI PM caps, but double + * initialization should be safe in that case. + */ +void platform_pci_wakeup_init(struct pci_dev *dev) +{ + if (!platform_pci_can_wakeup(dev)) + return; + + device_set_wakeup_capable(&dev->dev, true); + device_set_wakeup_enable(&dev->dev, false); + platform_pci_sleep_wake(dev, false); +} + /** * pci_add_save_buffer - allocate buffer for saving given capability registers * @dev: the PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d1e92d83aa06..65deed8bfc06 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -45,6 +45,7 @@ struct pci_platform_pm_ops { extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); extern void pci_pm_init(struct pci_dev *dev); +extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2ee00962734b..303644614eea 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -965,6 +965,7 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Power Management */ pci_pm_init(dev); + platform_pci_wakeup_init(dev); /* Vital Product Data */ pci_vpd_pci22_init(dev); -- cgit 1.4.1 From f06fc0b6f8a6846e0ad48aee7b0f282b4fb5dcdc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 27 Dec 2008 16:30:52 +0100 Subject: PCI PM: Fix pci_update_current_state Currently, PCI devices without the PM capability that are power manageable by the platform (eg. ACPI) are not handled correctly by pci_set_power_state(), because their current_state field is not updated to reflect the new power state of the device. Fix this by making pci_update_current_state() accept additional argument representing the power state of the device as set by the platform. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1b807330e500..9d2aa6366fd0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -525,14 +525,17 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) * pci_update_current_state - Read PCI power state of given device from its * PCI PM registers and cache it * @dev: PCI device to handle. + * @state: State to cache in case the device doesn't have the PM capability */ -static void pci_update_current_state(struct pci_dev *dev) +static void pci_update_current_state(struct pci_dev *dev, pci_power_t state) { if (dev->pm_cap) { u16 pmcsr; pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); + } else { + dev->current_state = state; } } @@ -575,7 +578,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) */ int ret = platform_pci_set_power_state(dev, PCI_D0); if (!ret) - pci_update_current_state(dev); + pci_update_current_state(dev, PCI_D0); } /* This device is quirked not to be put into D3, so don't put it in D3 */ @@ -588,7 +591,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) /* Allow the platform to finalize the transition */ int ret = platform_pci_set_power_state(dev, state); if (!ret) { - pci_update_current_state(dev); + pci_update_current_state(dev, state); error = 0; } } -- cgit 1.4.1 From a79d682f789730dfabaebbb507c87a90c0671a62 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 27 Dec 2008 16:28:58 +0100 Subject: PCI PM: Split PCI Express port suspend-resume Suspend-resume of PCI Express ports has recently been moved into _suspend_late() and _resume_early() callbacks, but some functions executed from there should not be called with interrupts disabled, eg. pci_enable_device(). For this reason, split the suspend-resume of PCI Express ports into parts to be executed with interrupts disabled and with interrupts enabled. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_pci.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 0549fe2bdac9..99a914a027f8 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -41,7 +41,6 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev) { int retval; - pci_restore_state(dev); retval = pci_enable_device(dev); if (retval) return retval; @@ -50,23 +49,32 @@ static int pcie_portdrv_restore_config(struct pci_dev *dev) } #ifdef CONFIG_PM -static int pcie_portdrv_suspend_late(struct pci_dev *dev, pm_message_t state) +static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) { - int ret = pcie_port_device_suspend(dev, state); + return pcie_port_device_suspend(dev, state); - if (!ret) - ret = pcie_portdrv_save_config(dev); - return ret; +} + +static int pcie_portdrv_suspend_late(struct pci_dev *dev, pm_message_t state) +{ + return pci_save_state(dev); } static int pcie_portdrv_resume_early(struct pci_dev *dev) +{ + return pci_restore_state(dev); +} + +static int pcie_portdrv_resume(struct pci_dev *dev) { pcie_portdrv_restore_config(dev); return pcie_port_device_resume(dev); } #else +#define pcie_portdrv_suspend NULL #define pcie_portdrv_suspend_late NULL #define pcie_portdrv_resume_early NULL +#define pcie_portdrv_resume NULL #endif /* @@ -221,6 +229,7 @@ static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) /* If fatal, restore cfg space for possible link reset at upstream */ if (dev->error_state == pci_channel_io_frozen) { + pci_restore_state(dev); pcie_portdrv_restore_config(dev); pci_enable_pcie_error_reporting(dev); } @@ -282,8 +291,10 @@ static struct pci_driver pcie_portdriver = { .probe = pcie_portdrv_probe, .remove = pcie_portdrv_remove, + .suspend = pcie_portdrv_suspend, .suspend_late = pcie_portdrv_suspend_late, .resume_early = pcie_portdrv_resume_early, + .resume = pcie_portdrv_resume, .err_handler = &pcie_portdrv_err_handler, }; -- cgit 1.4.1 From 873392ca514f87eae39f53b6944caf85b1a047cb Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 31 Dec 2008 23:54:56 +1030 Subject: PCI: work_on_cpu: use in drivers/pci/pci-driver.c This uses work_on_cpu(), rather than altering the cpumask of the thread which we happen to be. Note the cleanups: 1) I've removed the CONFIG_NUMA test, since dev_to_node() returns -1 for !CONFIG_NUMA anyway and the compiler will eliminate it. 2) No need to reset mempolicy to default (a bad idea anyway) since work_on_cpu is run from a workqueue. Signed-off-by: Rusty Russell Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 52 +++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 888191a3b0d1..c3f76be832d4 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "pci.h" /* @@ -185,32 +186,43 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, return pci_match_id(drv->id_table, dev); } +struct drv_dev_and_id { + struct pci_driver *drv; + struct pci_dev *dev; + const struct pci_device_id *id; +}; + +static long local_pci_probe(void *_ddi) +{ + struct drv_dev_and_id *ddi = _ddi; + + return ddi->drv->probe(ddi->dev, ddi->id); +} + static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, const struct pci_device_id *id) { - int error; -#ifdef CONFIG_NUMA - /* Execute driver initialization on node where the - device's bus is attached to. This way the driver likely - allocates its local memory on the right node without - any need to change it. */ - struct mempolicy *oldpol; - cpumask_t oldmask = current->cpus_allowed; - int node = dev_to_node(&dev->dev); + int error, node; + struct drv_dev_and_id ddi = { drv, dev, id }; + /* Execute driver initialization on node where the device's + bus is attached to. This way the driver likely allocates + its local memory on the right node without any need to + change it. */ + node = dev_to_node(&dev->dev); if (node >= 0) { + int cpu; node_to_cpumask_ptr(nodecpumask, node); - set_cpus_allowed_ptr(current, nodecpumask); - } - /* And set default memory allocation policy */ - oldpol = current->mempolicy; - current->mempolicy = NULL; /* fall back to system default policy */ -#endif - error = drv->probe(dev, id); -#ifdef CONFIG_NUMA - set_cpus_allowed_ptr(current, &oldmask); - current->mempolicy = oldpol; -#endif + + get_online_cpus(); + cpu = cpumask_any_and(nodecpumask, cpu_online_mask); + if (cpu < nr_cpu_ids) + error = work_on_cpu(cpu, local_pci_probe, &ddi); + else + error = local_pci_probe(&ddi); + put_online_cpus(); + } else + error = local_pci_probe(&ddi); return error; } -- cgit 1.4.1 From c9ffa5a586a97da4d552f89b8f39eea79a63a612 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 17 Dec 2008 12:07:38 +0900 Subject: PCI: pciehp: add ACPI based slot detection There is a problem that some non hot-pluggable PCIe slots are detected as hot-pluggable by pciehp on some platforms. The immediate cause of this problem is that hot-plug capable bit in the Slot Capabilities register is set even for non hot-pluggable slots on those platforms. It seems a BIOS/hardware problem, but we need workaround about that. Some of those platforms define hot-pluggable PCIe slots on ACPI namespace properly, while hot-plug capable bit in the Slot Capabilities register is set improperly. So using ACPI namespace information in pciehp to detect PCIe hot-pluggable slots would be a workaround. This patch adds 'pciehp_detect_mode' module option. When 'acpi' is specified, pciehp uses ACPI namespace information to detect PCIe hot-pluggable slots. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/Makefile | 3 + drivers/pci/hotplug/pciehp.h | 15 ++++- drivers/pci/hotplug/pciehp_acpi.c | 114 ++++++++++++++++++++++++++++++++++++++ drivers/pci/hotplug/pciehp_core.c | 1 + 4 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 drivers/pci/hotplug/pciehp_acpi.c (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index 9bdbe1a6688f..e31fb91652ce 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -55,6 +55,9 @@ pciehp-objs := pciehp_core.o \ pciehp_ctrl.o \ pciehp_pci.o \ pciehp_hpc.o +ifdef CONFIG_ACPI +pciehp-objs += pciehp_acpi.o +endif shpchp-objs := shpchp_core.o \ shpchp_ctrl.o \ diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index b2801a7ee37f..27fd18f019f8 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -220,11 +220,23 @@ struct hpc_ops { #include #include +extern void __init pciehp_acpi_slot_detection_init(void); +extern int pciehp_acpi_slot_detection_check(struct pci_dev *dev); + +static inline void pciehp_firmware_init(void) +{ + pciehp_acpi_slot_detection_init(); +} + static inline int pciehp_get_hp_hw_control_from_firmware(struct pci_dev *dev) { + int retval; u32 flags = (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); - return acpi_get_hp_hw_control_from_firmware(dev, flags); + retval = acpi_get_hp_hw_control_from_firmware(dev, flags); + if (retval) + return retval; + return pciehp_acpi_slot_detection_check(dev); } static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev, @@ -235,6 +247,7 @@ static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev, return 0; } #else +#define pciehp_firmware_init() do {} while (0) #define pciehp_get_hp_hw_control_from_firmware(dev) 0 #define pciehp_get_hp_params_from_firmware(dev, hpp) (-ENODEV) #endif /* CONFIG_ACPI */ diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c new file mode 100644 index 000000000000..0cd49b728045 --- /dev/null +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -0,0 +1,114 @@ +/* + * ACPI related functions for PCI Express Hot Plug driver. + * + * Copyright (C) 2008 Kenji Kaneshige + * Copyright (C) 2008 Fujitsu Limited. + * + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include "pciehp.h" + +#define PCIEHP_DETECT_PCIE (0) +#define PCIEHP_DETECT_ACPI (1) +#define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_PCIE + +static int slot_detection_mode; +static char *pciehp_detect_mode; +module_param(pciehp_detect_mode, charp, 0444); +MODULE_PARM_DESC(pciehp_detect_mode, + "Slot detection mode: pcie, acpi\n" + " pcie - Use PCIe based slot detection (default)\n" + " acpi - Use ACPI for slot detection\n"); + +static int is_ejectable(acpi_handle handle) +{ + acpi_status status; + acpi_handle tmp; + unsigned long long removable; + status = acpi_get_handle(handle, "_ADR", &tmp); + if (ACPI_FAILURE(status)) + return 0; + status = acpi_get_handle(handle, "_EJ0", &tmp); + if (ACPI_SUCCESS(status)) + return 1; + status = acpi_evaluate_integer(handle, "_RMV", NULL, &removable); + if (ACPI_SUCCESS(status) && removable) + return 1; + return 0; +} + +static acpi_status +check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + int *found = (int *)context; + if (is_ejectable(handle)) { + *found = 1; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +static int pciehp_detect_acpi_slot(struct pci_bus *pbus) +{ + acpi_handle handle; + struct pci_dev *pdev = pbus->self; + int found = 0; + + if (!pdev){ + int seg = pci_domain_nr(pbus), busnr = pbus->number; + handle = acpi_get_pci_rootbridge_handle(seg, busnr); + } else + handle = DEVICE_ACPI_HANDLE(&(pdev->dev)); + + if (!handle) + return 0; + + acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + check_hotplug, (void *)&found, NULL); + return found; +} + +int pciehp_acpi_slot_detection_check(struct pci_dev *dev) +{ + if (slot_detection_mode != PCIEHP_DETECT_ACPI) + return 0; + if (pciehp_detect_acpi_slot(dev->subordinate)) + return 0; + return -ENODEV; +} + +static int __init parse_detect_mode(void) +{ + if (!pciehp_detect_mode) + return PCIEHP_DETECT_DEFAULT; + if (!strcmp(pciehp_detect_mode, "pcie")) + return PCIEHP_DETECT_PCIE; + if (!strcmp(pciehp_detect_mode, "acpi")) + return PCIEHP_DETECT_ACPI; + warn("bad specifier '%s' for pciehp_detect_mode. Use default\n", + pciehp_detect_mode); + return PCIEHP_DETECT_DEFAULT; +} + +void __init pciehp_acpi_slot_detection_init(void) +{ + slot_detection_mode = parse_detect_mode(); +} diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 39cf248d24e3..5482d4ed8256 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -522,6 +522,7 @@ static int __init pcied_init(void) { int retval = 0; + pciehp_firmware_init(); retval = pcie_port_service_register(&hpdriver_portdrv); dbg("pcie_port_service_register = %d\n", retval); info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); -- cgit 1.4.1 From e046cbd6c05ee859244245d7beeac395cd0057b3 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 17 Dec 2008 12:08:15 +0900 Subject: PCI: pciehp: add auto option to pciehp_detect_mode ACPI based hot-pluggable PCIe slot detection logic was added to prevent the problem non hot-pluggable PCIe slot was detected as hot-pluggable. The slot detection logic can be selected through 'pciehp_detect_mode', but it would be better if it is selected automatically. This patch adds 'auto' option for 'pciehp_detect_mode'. When it is specified, pciehp judges which 'acpi' or 'pcie' should be used. It seems that the physical slot number is duplicated among some slots on most of the platforms with the above-mentioned problem. So 'auto' mode uses this information to judge which 'acpi' or 'pcie' should be used. That is, if duplicated physical slot numbers are detected, 'acpi' mode is used. This method is not perfect, but it's realistic. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_acpi.c | 80 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 0cd49b728045..88a5c57f2e5b 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -28,15 +28,18 @@ #define PCIEHP_DETECT_PCIE (0) #define PCIEHP_DETECT_ACPI (1) -#define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_PCIE +#define PCIEHP_DETECT_AUTO (2) +#define PCIEHP_DETECT_DEFAULT PCIEHP_DETECT_AUTO static int slot_detection_mode; static char *pciehp_detect_mode; module_param(pciehp_detect_mode, charp, 0444); MODULE_PARM_DESC(pciehp_detect_mode, - "Slot detection mode: pcie, acpi\n" - " pcie - Use PCIe based slot detection (default)\n" - " acpi - Use ACPI for slot detection\n"); + "Slot detection mode: pcie, acpi, auto\n" + " pcie - Use PCIe based slot detection\n" + " acpi - Use ACPI for slot detection\n" + " auto(default) - Auto select mode. Use acpi option if duplicate\n" + " slot ids are found. Otherwise, use pcie option\n"); static int is_ejectable(acpi_handle handle) { @@ -103,12 +106,81 @@ static int __init parse_detect_mode(void) return PCIEHP_DETECT_PCIE; if (!strcmp(pciehp_detect_mode, "acpi")) return PCIEHP_DETECT_ACPI; + if (!strcmp(pciehp_detect_mode, "auto")) + return PCIEHP_DETECT_AUTO; warn("bad specifier '%s' for pciehp_detect_mode. Use default\n", pciehp_detect_mode); return PCIEHP_DETECT_DEFAULT; } +static struct pcie_port_service_id __initdata port_pci_ids[] = { + { + .vendor = PCI_ANY_ID, + .device = PCI_ANY_ID, + .port_type = PCIE_ANY_PORT, + .service_type = PCIE_PORT_SERVICE_HP, + .driver_data = 0, + }, { /* end: all zeroes */ } +}; + +static int __initdata dup_slot_id; +static int __initdata acpi_slot_detected; +static struct list_head __initdata dummy_slots = LIST_HEAD_INIT(dummy_slots); + +/* Dummy driver for dumplicate name detection */ +static int __init dummy_probe(struct pcie_device *dev, + const struct pcie_port_service_id *id) +{ + int pos; + u32 slot_cap; + struct slot *slot, *tmp; + struct pci_dev *pdev = dev->port; + if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL))) + return -ENOMEM; + /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ + if (pciehp_get_hp_hw_control_from_firmware(pdev)) + return -ENODEV; + if (!(pos = pci_find_capability(pdev, PCI_CAP_ID_EXP))) + return -ENODEV; + pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &slot_cap); + slot->number = slot_cap >> 19; + list_for_each_entry(tmp, &dummy_slots, slot_list) { + if (tmp->number == slot->number) + dup_slot_id++; + } + list_add_tail(&slot->slot_list, &dummy_slots); + if (!acpi_slot_detected && pciehp_detect_acpi_slot(pdev->subordinate)) + acpi_slot_detected = 1; + return -ENODEV; /* dummy driver always returns error */ +} + +static struct pcie_port_service_driver __initdata dummy_driver = { + .name = "pciehp_dummy", + .id_table = port_pci_ids, + .probe = dummy_probe, +}; + +static int __init select_detection_mode(void) +{ + struct slot *slot, *tmp; + pcie_port_service_register(&dummy_driver); + pcie_port_service_unregister(&dummy_driver); + list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) { + list_del(&slot->slot_list); + kfree(slot); + } + if (acpi_slot_detected && dup_slot_id) + return PCIEHP_DETECT_ACPI; + return PCIEHP_DETECT_PCIE; +} + void __init pciehp_acpi_slot_detection_init(void) { slot_detection_mode = parse_detect_mode(); + if (slot_detection_mode != PCIEHP_DETECT_AUTO) + goto out; + slot_detection_mode = select_detection_mode(); +out: + if (slot_detection_mode == PCIEHP_DETECT_ACPI) + info("Using ACPI for slot detection.\n"); } -- cgit 1.4.1 From e8c331e963c58b83db24b7d0e39e8c07f687dbc6 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 17 Dec 2008 12:09:12 +0900 Subject: PCI hotplug: introduce functions for ACPI slot detection Some ACPI related PCI hotplug code can be shared among PCI hotplug drivers. This patch introduces the following functions in drivers/pci/hotplug/acpi_pcihp.c to share the code, and changes acpiphp and pciehp to use them. - int acpi_pci_detect_ejectable(struct pci_bus *pbus) This checks if the specified PCI bus has ejectable slots. - int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle) This checks if the specified handle is ejectable ACPI PCI slot. The 'pbus' parameter is needed to check if 'handle' is PCI related ACPI object. This patch also introduces the following inline function in include/linux/pci-acpi.h, which is useful to get ACPI handle of the PCI bridge from struct pci_bus of the bridge's secondary bus. - static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) This returns ACPI handle of the PCI bridge which generates PCI bus specified by 'pbus'. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/acpi_pcihp.c | 69 ++++++++++++++++++++++++ drivers/pci/hotplug/acpiphp_glue.c | 107 ++++++------------------------------- drivers/pci/hotplug/pciehp_acpi.c | 55 ++----------------- include/linux/pci-acpi.h | 9 ++++ include/linux/pci_hotplug.h | 2 + 5 files changed, 100 insertions(+), 142 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index e17ef54f0efc..c62ab8d240aa 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -501,5 +501,74 @@ int acpi_root_bridge(acpi_handle handle) } EXPORT_SYMBOL_GPL(acpi_root_bridge); + +static int is_ejectable(acpi_handle handle) +{ + acpi_status status; + acpi_handle tmp; + unsigned long long removable; + status = acpi_get_handle(handle, "_ADR", &tmp); + if (ACPI_FAILURE(status)) + return 0; + status = acpi_get_handle(handle, "_EJ0", &tmp); + if (ACPI_SUCCESS(status)) + return 1; + status = acpi_evaluate_integer(handle, "_RMV", NULL, &removable); + if (ACPI_SUCCESS(status) && removable) + return 1; + return 0; +} + +/** + * acpi_pcihp_check_ejectable - check if handle is ejectable ACPI PCI slot + * @pbus: the PCI bus of the PCI slot corresponding to 'handle' + * @handle: ACPI handle to check + * + * Return 1 if handle is ejectable PCI slot, 0 otherwise. + */ +int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle) +{ + acpi_handle bridge_handle, parent_handle; + + if (!(bridge_handle = acpi_pci_get_bridge_handle(pbus))) + return 0; + if ((ACPI_FAILURE(acpi_get_parent(handle, &parent_handle)))) + return 0; + if (bridge_handle != parent_handle) + return 0; + return is_ejectable(handle); +} +EXPORT_SYMBOL_GPL(acpi_pci_check_ejectable); + +static acpi_status +check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + int *found = (int *)context; + if (is_ejectable(handle)) { + *found = 1; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +/** + * acpi_pci_detect_ejectable - check if the PCI bus has ejectable slots + * @pbus - PCI bus to scan + * + * Returns 1 if the PCI bus has ACPI based ejectable slots, 0 otherwise. + */ +int acpi_pci_detect_ejectable(struct pci_bus *pbus) +{ + acpi_handle handle; + int found = 0; + + if (!(handle = acpi_pci_get_bridge_handle(pbus))) + return 0; + acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, + check_hotplug, (void *)&found, NULL); + return found; +} +EXPORT_SYMBOL_GPL(acpi_pci_detect_ejectable); + module_param(debug_acpi, bool, 0644); MODULE_PARM_DESC(debug_acpi, "Debugging mode for ACPI enabled or not"); diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 7a5760426897..f09b1010d477 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include "../pci.h" @@ -62,68 +63,6 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus); static void acpiphp_set_hpp_values(acpi_handle handle, struct pci_bus *bus); static void handle_hotplug_event_func(acpi_handle handle, u32 type, void *context); - -/* - * initialization & terminatation routines - */ - -/** - * is_ejectable - determine if a slot is ejectable - * @handle: handle to acpi namespace - * - * Ejectable slot should satisfy at least these conditions: - * - * 1. has _ADR method - * 2. has _EJ0 method or _RMV method - * - * optionally - * - * 1. has _STA method - * 2. has _PS0 method - * 3. has _PS3 method - * 4. .. - */ -static int is_ejectable(acpi_handle handle) -{ - acpi_status status; - acpi_handle tmp; - unsigned long long removable; - - status = acpi_get_handle(handle, "_ADR", &tmp); - if (ACPI_FAILURE(status)) - return 0; - - status = acpi_get_handle(handle, "_EJ0", &tmp); - if (ACPI_SUCCESS(status)) - return 1; - - status = acpi_get_handle(handle, "_RMV", &tmp); - if (ACPI_SUCCESS(status)) { - status = acpi_evaluate_integer(handle, "_RMV", NULL, - &removable); - if (ACPI_SUCCESS(status) && removable) - return 1; - } - - return 0; -} - - -/* callback routine to check for the existence of ejectable slots */ -static acpi_status -is_ejectable_slot(acpi_handle handle, u32 lvl, void *context, void **rv) -{ - int *count = (int *)context; - - if (is_ejectable(handle)) { - (*count)++; - /* only one ejectable slot is enough */ - return AE_CTRL_TERMINATE; - } else { - return AE_OK; - } -} - /* callback routine to check for the existence of a pci dock device */ static acpi_status is_pci_dock_device(acpi_handle handle, u32 lvl, void *context, void **rv) @@ -138,9 +77,6 @@ is_pci_dock_device(acpi_handle handle, u32 lvl, void *context, void **rv) } } - - - /* * the _DCK method can do funny things... and sometimes not * hah-hah funny. @@ -191,8 +127,9 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) acpi_status status = AE_OK; unsigned long long adr, sun; int device, function, retval; + struct pci_bus *pbus = bridge->pci_bus; - if (!is_ejectable(handle) && !is_dock_device(handle)) + if (!acpi_pci_check_ejectable(pbus, handle) && !is_dock_device(handle)) return AE_OK; acpi_evaluate_integer(handle, "_ADR", NULL, &adr); @@ -258,8 +195,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) bridge->nr_slots++; dbg("found ACPI PCI Hotplug slot %llu at PCI %04x:%02x:%02x\n", - slot->sun, pci_domain_nr(bridge->pci_bus), - bridge->pci_bus->number, slot->device); + slot->sun, pci_domain_nr(pbus), pbus->number, device); retval = acpiphp_register_hotplug_slot(slot); if (retval) { if (retval == -EBUSY) @@ -276,8 +212,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) list_add_tail(&newfunc->sibling, &slot->funcs); /* associate corresponding pci_dev */ - newfunc->pci_dev = pci_get_slot(bridge->pci_bus, - PCI_DEVFN(device, function)); + newfunc->pci_dev = pci_get_slot(pbus, PCI_DEVFN(device, function)); if (newfunc->pci_dev) { slot->flags |= (SLOT_ENABLED | SLOT_POWEREDON); } @@ -326,27 +261,15 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) /* see if it's worth looking at this bridge */ -static int detect_ejectable_slots(acpi_handle *bridge_handle) +static int detect_ejectable_slots(struct pci_bus *pbus) { - acpi_status status; - int count; - - count = 0; - - /* only check slots defined directly below bridge object */ - status = acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, (u32)1, - is_ejectable_slot, (void *)&count, NULL); - - /* - * we also need to add this bridge if there is a dock bridge or - * other pci device on a dock station (removable) - */ - if (!count) - status = acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, - (u32)1, is_pci_dock_device, (void *)&count, - NULL); - - return count; + int found = acpi_pci_detect_ejectable(pbus); + if (!found) { + acpi_handle bridge_handle = acpi_pci_get_bridge_handle(pbus); + acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, (u32)1, + is_pci_dock_device, (void *)&found, NULL); + } + return found; } @@ -556,7 +479,7 @@ find_p2p_bridge(acpi_handle handle, u32 lvl, void *context, void **rv) goto out; /* check if this bridge has ejectable slots */ - if ((detect_ejectable_slots(handle) > 0)) { + if ((detect_ejectable_slots(dev->subordinate) > 0)) { dbg("found PCI-to-PCI bridge at PCI %s\n", pci_name(dev)); add_p2p_bridge(handle, dev); } @@ -617,7 +540,7 @@ static int add_bridge(acpi_handle handle) } /* check if this bridge has ejectable slots */ - if (detect_ejectable_slots(handle) > 0) { + if (detect_ejectable_slots(pci_bus) > 0) { dbg("found PCI host-bus bridge with hot-pluggable slots\n"); add_host_bridge(handle, pci_bus); } diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 88a5c57f2e5b..438d795f9fe3 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -24,6 +24,8 @@ */ #include +#include +#include #include "pciehp.h" #define PCIEHP_DETECT_PCIE (0) @@ -41,59 +43,11 @@ MODULE_PARM_DESC(pciehp_detect_mode, " auto(default) - Auto select mode. Use acpi option if duplicate\n" " slot ids are found. Otherwise, use pcie option\n"); -static int is_ejectable(acpi_handle handle) -{ - acpi_status status; - acpi_handle tmp; - unsigned long long removable; - status = acpi_get_handle(handle, "_ADR", &tmp); - if (ACPI_FAILURE(status)) - return 0; - status = acpi_get_handle(handle, "_EJ0", &tmp); - if (ACPI_SUCCESS(status)) - return 1; - status = acpi_evaluate_integer(handle, "_RMV", NULL, &removable); - if (ACPI_SUCCESS(status) && removable) - return 1; - return 0; -} - -static acpi_status -check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv) -{ - int *found = (int *)context; - if (is_ejectable(handle)) { - *found = 1; - return AE_CTRL_TERMINATE; - } - return AE_OK; -} - -static int pciehp_detect_acpi_slot(struct pci_bus *pbus) -{ - acpi_handle handle; - struct pci_dev *pdev = pbus->self; - int found = 0; - - if (!pdev){ - int seg = pci_domain_nr(pbus), busnr = pbus->number; - handle = acpi_get_pci_rootbridge_handle(seg, busnr); - } else - handle = DEVICE_ACPI_HANDLE(&(pdev->dev)); - - if (!handle) - return 0; - - acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1, - check_hotplug, (void *)&found, NULL); - return found; -} - int pciehp_acpi_slot_detection_check(struct pci_dev *dev) { if (slot_detection_mode != PCIEHP_DETECT_ACPI) return 0; - if (pciehp_detect_acpi_slot(dev->subordinate)) + if (acpi_pci_detect_ejectable(dev->subordinate)) return 0; return -ENODEV; } @@ -135,6 +89,7 @@ static int __init dummy_probe(struct pcie_device *dev, u32 slot_cap; struct slot *slot, *tmp; struct pci_dev *pdev = dev->port; + struct pci_bus *pbus = pdev->subordinate; if (!(slot = kzalloc(sizeof(*slot), GFP_KERNEL))) return -ENOMEM; /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ @@ -149,7 +104,7 @@ static int __init dummy_probe(struct pcie_device *dev, dup_slot_id++; } list_add_tail(&slot->slot_list, &dummy_slots); - if (!acpi_slot_detected && pciehp_detect_acpi_slot(pdev->subordinate)) + if (!acpi_slot_detected && acpi_pci_detect_ejectable(pbus)) acpi_slot_detected = 1; return -ENODEV; /* dummy driver always returns error */ } diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 871e096e0fbc..042c166f65d5 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -60,6 +60,15 @@ static inline acpi_handle acpi_find_root_bridge_handle(struct pci_dev *pdev) return acpi_get_pci_rootbridge_handle(pci_domain_nr(pdev->bus), pdev->bus->number); } + +static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) +{ + int seg = pci_domain_nr(pbus), busnr = pbus->number; + struct pci_dev *bridge = pbus->self; + if (bridge) + return DEVICE_ACPI_HANDLE(&(bridge->dev)); + return acpi_get_pci_rootbridge_handle(seg, busnr); +} #else #if !defined(AE_ERROR) typedef u32 acpi_status; diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index a00bd1a0f156..f7cc204fab07 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -228,6 +228,8 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); int acpi_root_bridge(acpi_handle handle); +int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle); +int acpi_pci_detect_ejectable(struct pci_bus *pbus); #endif #endif -- cgit 1.4.1 From 68feac87de15edfc2c700d2d81b814288c93d003 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 16 Dec 2008 21:36:55 -0700 Subject: PCI: add pci_common_swizzle() for INTx swizzling This patch adds pci_common_swizzle(), which swizzles INTx values all the way up to a root bridge. This common implementation can replace several architecture-specific ones. This should someday be combined with pci_get_interrupt_pin(), but I left it separate for now to make reviewing easier. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 20 ++++++++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 21 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9d2aa6366fd0..c824dc8d617c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1421,6 +1421,26 @@ pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge) return pin; } +/** + * pci_common_swizzle - swizzle INTx all the way to root bridge + * @dev: the PCI device + * @pinp: pointer to the INTx pin value (1=INTA, 2=INTB, 3=INTD, 4=INTD) + * + * Perform INTx swizzling for a device. This traverses through all PCI-to-PCI + * bridges all the way up to a PCI root bus. + */ +u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp) +{ + u8 pin = *pinp; + + while (dev->bus->self) { + pin = pci_swizzle_interrupt_pin(dev, pin); + dev = dev->bus->self; + } + *pinp = pin; + return PCI_SLOT(dev->devfn); +} + /** * pci_release_region - Release a PCI bar * @pdev: PCI device whose resources were previously reserved by pci_request_region diff --git a/include/linux/pci.h b/include/linux/pci.h index da1c22bab40e..170f9ae2d8a0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -545,6 +545,7 @@ struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); u8 pci_swizzle_interrupt_pin(struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); +u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); extern void pci_remove_bus(struct pci_bus *b); -- cgit 1.4.1 From 1120f8b8169fb2cb51219d326892d963e762edb6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: handle long delays in VPD access Accessing the VPD area can take a long time. The existing VPD access code fails consistently on my hardware. There are comments in the SysKonnect vendor driver that it can take up to 13ms per word. Change the access routines to: * use a mutex rather than spinning with IRQ's disabled and lock held * have a much longer timeout * call cond_resched while spinning Signed-off-by: Stephen Hemminger Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- drivers/pci/access.c | 55 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 22 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 39bb96b413ef..98ddba94b5b9 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -133,39 +133,46 @@ PCI_USER_WRITE_CONFIG(dword, u32) struct pci_vpd_pci22 { struct pci_vpd base; - spinlock_t lock; /* controls access to hardware and the flags */ - u8 cap; + struct mutex lock; + u16 flag; bool busy; - bool flag; /* value of F bit to wait for */ + u8 cap; }; -/* Wait for last operation to complete */ +/* + * Wait for last operation to complete. + * This code has to spin since there is no other notification from the PCI + * hardware. Since the VPD is often implemented by serial attachment to an + * EEPROM, it may take many milliseconds to complete. + */ static int pci_vpd_pci22_wait(struct pci_dev *dev) { struct pci_vpd_pci22 *vpd = container_of(dev->vpd, struct pci_vpd_pci22, base); - u16 flag, status; - int wait; + unsigned long timeout = jiffies + HZ/20 + 2; + u16 status; int ret; if (!vpd->busy) return 0; - flag = vpd->flag ? PCI_VPD_ADDR_F : 0; - wait = vpd->flag ? 10 : 1000; /* read: 100 us; write: 10 ms */ for (;;) { - ret = pci_user_read_config_word(dev, - vpd->cap + PCI_VPD_ADDR, + ret = pci_user_read_config_word(dev, vpd->cap + PCI_VPD_ADDR, &status); - if (ret < 0) + if (ret) return ret; - if ((status & PCI_VPD_ADDR_F) == flag) { + + if ((status & PCI_VPD_ADDR_F) == vpd->flag) { vpd->busy = false; return 0; } - if (wait-- == 0) + + if (time_after(jiffies, timeout)) return -ETIMEDOUT; - udelay(10); + if (fatal_signal_pending(current)) + return -EINTR; + if (!cond_resched()) + udelay(10); } } @@ -175,7 +182,7 @@ static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size, struct pci_vpd_pci22 *vpd = container_of(dev->vpd, struct pci_vpd_pci22, base); u32 val; - int ret; + int ret = 0; int begin, end, i; if (pos < 0 || pos > vpd->base.len || size > vpd->base.len - pos) @@ -183,7 +190,9 @@ static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size, if (size == 0) return 0; - spin_lock_irq(&vpd->lock); + if (mutex_lock_killable(&vpd->lock)) + return -EINTR; + ret = pci_vpd_pci22_wait(dev); if (ret < 0) goto out; @@ -191,15 +200,16 @@ static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size, pos & ~3); if (ret < 0) goto out; + vpd->busy = true; - vpd->flag = 1; + vpd->flag = PCI_VPD_ADDR_F; ret = pci_vpd_pci22_wait(dev); if (ret < 0) goto out; ret = pci_user_read_config_dword(dev, vpd->cap + PCI_VPD_DATA, &val); out: - spin_unlock_irq(&vpd->lock); + mutex_unlock(&vpd->lock); if (ret < 0) return ret; @@ -220,7 +230,7 @@ static int pci_vpd_pci22_write(struct pci_dev *dev, int pos, int size, struct pci_vpd_pci22 *vpd = container_of(dev->vpd, struct pci_vpd_pci22, base); u32 val; - int ret; + int ret = 0; if (pos < 0 || pos > vpd->base.len || pos & 3 || size > vpd->base.len - pos || size < 4) @@ -231,7 +241,8 @@ static int pci_vpd_pci22_write(struct pci_dev *dev, int pos, int size, val |= ((u8) *buf++) << 16; val |= ((u32)(u8) *buf++) << 24; - spin_lock_irq(&vpd->lock); + if (mutex_lock_killable(&vpd->lock)) + return -EINTR; ret = pci_vpd_pci22_wait(dev); if (ret < 0) goto out; @@ -247,7 +258,7 @@ static int pci_vpd_pci22_write(struct pci_dev *dev, int pos, int size, vpd->flag = 0; ret = pci_vpd_pci22_wait(dev); out: - spin_unlock_irq(&vpd->lock); + mutex_unlock(&vpd->lock); if (ret < 0) return ret; @@ -279,7 +290,7 @@ int pci_vpd_pci22_init(struct pci_dev *dev) vpd->base.len = PCI_VPD_PCI22_SIZE; vpd->base.ops = &pci_vpd_pci22_ops; - spin_lock_init(&vpd->lock); + mutex_init(&vpd->lock); vpd->cap = cap; vpd->busy = false; dev->vpd = &vpd->base; -- cgit 1.4.1 From 287d19ce2e67c15e79a187b3bdcbbea1a0a51a7d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: revise VPD access interface Change PCI VPD API which was only used by sysfs to something usable in drivers. * move iteration over multiple words to the low level * use conventional types for arguments * add exportable wrapper Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- drivers/pci/access.c | 156 ++++++++++++++++++++++++++++++------------------ drivers/pci/pci-sysfs.c | 38 +++--------- drivers/pci/pci.h | 6 +- include/linux/pci.h | 4 ++ 4 files changed, 114 insertions(+), 90 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 98ddba94b5b9..86ec4ad44bcd 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -66,6 +66,39 @@ EXPORT_SYMBOL(pci_bus_write_config_byte); EXPORT_SYMBOL(pci_bus_write_config_word); EXPORT_SYMBOL(pci_bus_write_config_dword); + +/** + * pci_read_vpd - Read one entry from Vital Product Data + * @dev: pci device struct + * @pos: offset in vpd space + * @count: number of bytes to read + * @buf: pointer to where to store result + * + */ +ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf) +{ + if (!dev->vpd || !dev->vpd->ops) + return -ENODEV; + return dev->vpd->ops->read(dev, pos, count, buf); +} +EXPORT_SYMBOL(pci_read_vpd); + +/** + * pci_write_vpd - Write entry to Vital Product Data + * @dev: pci device struct + * @pos: offset in vpd space + * @count: number of bytes to read + * @val: value to write + * + */ +ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf) +{ + if (!dev->vpd || !dev->vpd->ops) + return -ENODEV; + return dev->vpd->ops->write(dev, pos, count, buf); +} +EXPORT_SYMBOL(pci_write_vpd); + /* * The following routines are to prevent the user from accessing PCI config * space when it's unsafe to do so. Some devices require this during BIST and @@ -176,19 +209,17 @@ static int pci_vpd_pci22_wait(struct pci_dev *dev) } } -static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size, - char *buf) +static ssize_t pci_vpd_pci22_read(struct pci_dev *dev, loff_t pos, size_t count, + void *arg) { struct pci_vpd_pci22 *vpd = container_of(dev->vpd, struct pci_vpd_pci22, base); - u32 val; - int ret = 0; - int begin, end, i; + int ret; + loff_t end = pos + count; + u8 *buf = arg; - if (pos < 0 || pos > vpd->base.len || size > vpd->base.len - pos) + if (pos < 0 || pos > vpd->base.len || end > vpd->base.len) return -EINVAL; - if (size == 0) - return 0; if (mutex_lock_killable(&vpd->lock)) return -EINTR; @@ -196,73 +227,84 @@ static int pci_vpd_pci22_read(struct pci_dev *dev, int pos, int size, ret = pci_vpd_pci22_wait(dev); if (ret < 0) goto out; - ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR, - pos & ~3); - if (ret < 0) - goto out; - vpd->busy = true; - vpd->flag = PCI_VPD_ADDR_F; - ret = pci_vpd_pci22_wait(dev); - if (ret < 0) - goto out; - ret = pci_user_read_config_dword(dev, vpd->cap + PCI_VPD_DATA, - &val); + while (pos < end) { + u32 val; + unsigned int i, skip; + + ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR, + pos & ~3); + if (ret < 0) + break; + vpd->busy = true; + vpd->flag = PCI_VPD_ADDR_F; + ret = pci_vpd_pci22_wait(dev); + if (ret < 0) + break; + + ret = pci_user_read_config_dword(dev, vpd->cap + PCI_VPD_DATA, &val); + if (ret < 0) + break; + + skip = pos & 3; + for (i = 0; i < sizeof(u32); i++) { + if (i >= skip) { + *buf++ = val; + if (++pos == end) + break; + } + val >>= 8; + } + } out: mutex_unlock(&vpd->lock); - if (ret < 0) - return ret; - - /* Convert to bytes */ - begin = pos & 3; - end = min(4, begin + size); - for (i = 0; i < end; ++i) { - if (i >= begin) - *buf++ = val; - val >>= 8; - } - return end - begin; + return ret ? ret : count; } -static int pci_vpd_pci22_write(struct pci_dev *dev, int pos, int size, - const char *buf) +static ssize_t pci_vpd_pci22_write(struct pci_dev *dev, loff_t pos, size_t count, + const void *arg) { struct pci_vpd_pci22 *vpd = container_of(dev->vpd, struct pci_vpd_pci22, base); - u32 val; + const u8 *buf = arg; + loff_t end = pos + count; int ret = 0; - if (pos < 0 || pos > vpd->base.len || pos & 3 || - size > vpd->base.len - pos || size < 4) + if (pos < 0 || (pos & 3) || (count & 3) || end > vpd->base.len) return -EINVAL; - val = (u8) *buf++; - val |= ((u8) *buf++) << 8; - val |= ((u8) *buf++) << 16; - val |= ((u32)(u8) *buf++) << 24; - if (mutex_lock_killable(&vpd->lock)) return -EINTR; + ret = pci_vpd_pci22_wait(dev); if (ret < 0) goto out; - ret = pci_user_write_config_dword(dev, vpd->cap + PCI_VPD_DATA, - val); - if (ret < 0) - goto out; - ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR, - pos | PCI_VPD_ADDR_F); - if (ret < 0) - goto out; - vpd->busy = true; - vpd->flag = 0; - ret = pci_vpd_pci22_wait(dev); + + while (pos < end) { + u32 val; + + val = *buf++; + val |= *buf++ << 8; + val |= *buf++ << 16; + val |= *buf++ << 24; + + ret = pci_user_write_config_dword(dev, vpd->cap + PCI_VPD_DATA, val); + if (ret < 0) + break; + ret = pci_user_write_config_word(dev, vpd->cap + PCI_VPD_ADDR, + pos | PCI_VPD_ADDR_F); + if (ret < 0) + break; + + vpd->busy = true; + vpd->flag = 0; + ret = pci_vpd_pci22_wait(dev); + + pos += sizeof(u32); + } out: mutex_unlock(&vpd->lock); - if (ret < 0) - return ret; - - return 4; + return ret ? ret : count; } static void pci_vpd_pci22_release(struct pci_dev *dev) @@ -270,7 +312,7 @@ static void pci_vpd_pci22_release(struct pci_dev *dev) kfree(container_of(dev->vpd, struct pci_vpd_pci22, base)); } -static struct pci_vpd_ops pci_vpd_pci22_ops = { +static const struct pci_vpd_ops pci_vpd_pci22_ops = { .read = pci_vpd_pci22_read, .write = pci_vpd_pci22_write, .release = pci_vpd_pci22_release, diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index ea54cedcdfc6..c23619fb6c4b 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -371,55 +371,33 @@ pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, } static ssize_t -pci_read_vpd(struct kobject *kobj, struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count) +read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj, struct device, kobj)); - int end; - int ret; if (off > bin_attr->size) count = 0; else if (count > bin_attr->size - off) count = bin_attr->size - off; - end = off + count; - - while (off < end) { - ret = dev->vpd->ops->read(dev, off, end - off, buf); - if (ret < 0) - return ret; - buf += ret; - off += ret; - } - return count; + return pci_read_vpd(dev, off, count, buf); } static ssize_t -pci_write_vpd(struct kobject *kobj, struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count) +write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) { struct pci_dev *dev = to_pci_dev(container_of(kobj, struct device, kobj)); - int end; - int ret; if (off > bin_attr->size) count = 0; else if (count > bin_attr->size - off) count = bin_attr->size - off; - end = off + count; - - while (off < end) { - ret = dev->vpd->ops->write(dev, off, end - off, buf); - if (ret < 0) - return ret; - buf += ret; - off += ret; - } - return count; + return pci_write_vpd(dev, off, count, buf); } #ifdef HAVE_PCI_LEGACY @@ -845,8 +823,8 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev) attr->size = dev->vpd->len; attr->attr.name = "vpd"; attr->attr.mode = S_IRUSR | S_IWUSR; - attr->read = pci_read_vpd; - attr->write = pci_write_vpd; + attr->read = read_vpd_attr; + attr->write = write_vpd_attr; retval = sysfs_create_bin_file(&dev->dev.kobj, attr); if (retval) { kfree(dev->vpd->attr); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 65deed8bfc06..211fd418f48f 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -56,14 +56,14 @@ extern int pci_user_write_config_word(struct pci_dev *dev, int where, u16 val); extern int pci_user_write_config_dword(struct pci_dev *dev, int where, u32 val); struct pci_vpd_ops { - int (*read)(struct pci_dev *dev, int pos, int size, char *buf); - int (*write)(struct pci_dev *dev, int pos, int size, const char *buf); + ssize_t (*read)(struct pci_dev *dev, loff_t pos, size_t count, void *buf); + ssize_t (*write)(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); void (*release)(struct pci_dev *dev); }; struct pci_vpd { unsigned int len; - struct pci_vpd_ops *ops; + const struct pci_vpd_ops *ops; struct bin_attribute *attr; /* descriptor for sysfs VPD entry */ }; diff --git a/include/linux/pci.h b/include/linux/pci.h index 170f9ae2d8a0..76079e106895 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -687,6 +687,10 @@ int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); +/* Vital product data routines */ +ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); +ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); + /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); -- cgit 1.4.1 From db5679437a2b938c9127480a3923633721583a4f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 18 Dec 2008 09:17:16 -0800 Subject: PCI: add interface to set visible size of VPD The VPD on all devices may not be 32K. Unfortunately, there is no generic way to find the size, so this adds a simple API hook to reset it. Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- drivers/pci/access.c | 23 +++++++++++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 24 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 86ec4ad44bcd..381444794778 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -339,6 +339,29 @@ int pci_vpd_pci22_init(struct pci_dev *dev) return 0; } +/** + * pci_vpd_truncate - Set available Vital Product Data size + * @dev: pci device struct + * @size: available memory in bytes + * + * Adjust size of available VPD area. + */ +int pci_vpd_truncate(struct pci_dev *dev, size_t size) +{ + if (!dev->vpd) + return -EINVAL; + + /* limited by the access method */ + if (size > dev->vpd->len) + return -EINVAL; + + dev->vpd->len = size; + dev->vpd->attr->size = size; + + return 0; +} +EXPORT_SYMBOL(pci_vpd_truncate); + /** * pci_block_user_cfg_access - Block userspace PCI config reads/writes * @dev: pci device struct diff --git a/include/linux/pci.h b/include/linux/pci.h index 76079e106895..7cbecef19bb6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -690,6 +690,7 @@ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +int pci_vpd_truncate(struct pci_dev *dev, size_t size); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); -- cgit 1.4.1 From a19f5df7d9696b9e53ba7c865816597057d5f76e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 18 Dec 2008 16:34:19 -0700 Subject: PCI: use dev_printk for PCI bus resource mssages Since pci_bus has a struct device, use dev_printk directly instead of faking it by hand. Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 09e2c3cd2bef..704608945780 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -536,9 +536,8 @@ static void pci_bus_dump_res(struct pci_bus *bus) if (!res) continue; - printk(KERN_DEBUG "pci %04x:%02x: bus resource %d %s %pR\n", - pci_domain_nr(bus), bus->number, i, - (res->flags & IORESOURCE_IO) ? "io: " : "mem:", res); + dev_printk(KERN_DEBUG, &bus->dev, "resource %d %s %pR\n", i, + (res->flags & IORESOURCE_IO) ? "io: " : "mem:", res); } } -- cgit 1.4.1 From 46bbdfa44cfc0d352148a0dc33ba9f6db02ccdf0 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 19 Dec 2008 09:27:42 +0800 Subject: PCI: keep ASPM link state consistent throughout PCIe hierarchy In a PCIe hierarchy with a switch present, if the link state of an endpoint device is changed, we must check the whole hierarchy from the endpoint device to root port, and for each link in the hierarchy, the new link state should be configured. Previously, the implementation checked the state but forgot to configure the links between root port to switch. Fixes Novell bz #448987. Signed-off-by: Shaohua Li Tested-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aspm.c | 125 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 19 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index e361c7dc726f..4d8e2c7b2ad1 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -33,6 +33,11 @@ struct endpoint_state { struct pcie_link_state { struct list_head sibiling; struct pci_dev *pdev; + bool downstream_has_switch; + + struct pcie_link_state *parent; + struct list_head children; + struct list_head link; /* ASPM state */ unsigned int support_state; @@ -125,7 +130,7 @@ static void pcie_set_clock_pm(struct pci_dev *pdev, int enable) link_state->clk_pm_enabled = !!enable; } -static void pcie_check_clock_pm(struct pci_dev *pdev) +static void pcie_check_clock_pm(struct pci_dev *pdev, int blacklist) { int pos; u32 reg32; @@ -149,10 +154,26 @@ static void pcie_check_clock_pm(struct pci_dev *pdev) if (!(reg16 & PCI_EXP_LNKCTL_CLKREQ_EN)) enabled = 0; } - link_state->clk_pm_capable = capable; link_state->clk_pm_enabled = enabled; link_state->bios_clk_state = enabled; - pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev)); + if (!blacklist) { + link_state->clk_pm_capable = capable; + pcie_set_clock_pm(pdev, policy_to_clkpm_state(pdev)); + } else { + link_state->clk_pm_capable = 0; + pcie_set_clock_pm(pdev, 0); + } +} + +static bool pcie_aspm_downstream_has_switch(struct pci_dev *pdev) +{ + struct pci_dev *child_dev; + + list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) { + if (child_dev->pcie_type == PCI_EXP_TYPE_UPSTREAM) + return true; + } + return false; } /* @@ -419,9 +440,9 @@ static unsigned int pcie_aspm_check_state(struct pci_dev *pdev, { struct pci_dev *child_dev; - /* If no child, disable the link */ + /* If no child, ignore the link */ if (list_empty(&pdev->subordinate->devices)) - return 0; + return state; list_for_each_entry(child_dev, &pdev->subordinate->devices, bus_list) { if (child_dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) { /* @@ -462,6 +483,9 @@ static void __pcie_aspm_config_link(struct pci_dev *pdev, unsigned int state) int valid = 1; struct pcie_link_state *link_state = pdev->link_state; + /* If no child, disable the link */ + if (list_empty(&pdev->subordinate->devices)) + state = 0; /* * if the downstream component has pci bridge function, don't do ASPM * now @@ -493,20 +517,52 @@ static void __pcie_aspm_config_link(struct pci_dev *pdev, unsigned int state) link_state->enabled_state = state; } +static struct pcie_link_state *get_root_port_link(struct pcie_link_state *link) +{ + struct pcie_link_state *root_port_link = link; + while (root_port_link->parent) + root_port_link = root_port_link->parent; + return root_port_link; +} + +/* check the whole hierarchy, and configure each link in the hierarchy */ static void __pcie_aspm_configure_link_state(struct pci_dev *pdev, unsigned int state) { struct pcie_link_state *link_state = pdev->link_state; + struct pcie_link_state *root_port_link = get_root_port_link(link_state); + struct pcie_link_state *leaf; - if (link_state->support_state == 0) - return; state &= PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1; - /* state 0 means disabling aspm */ - state = pcie_aspm_check_state(pdev, state); + /* check all links who have specific root port link */ + list_for_each_entry(leaf, &link_list, sibiling) { + if (!list_empty(&leaf->children) || + get_root_port_link(leaf) != root_port_link) + continue; + state = pcie_aspm_check_state(leaf->pdev, state); + } + /* check root port link too in case it hasn't children */ + state = pcie_aspm_check_state(root_port_link->pdev, state); + if (link_state->enabled_state == state) return; - __pcie_aspm_config_link(pdev, state); + + /* + * we must change the hierarchy. See comments in + * __pcie_aspm_config_link for the order + **/ + if (state & PCIE_LINK_STATE_L1) { + list_for_each_entry(leaf, &link_list, sibiling) { + if (get_root_port_link(leaf) == root_port_link) + __pcie_aspm_config_link(leaf->pdev, state); + } + } else { + list_for_each_entry_reverse(leaf, &link_list, sibiling) { + if (get_root_port_link(leaf) == root_port_link) + __pcie_aspm_config_link(leaf->pdev, state); + } + } } /* @@ -570,6 +626,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev) unsigned int state; struct pcie_link_state *link_state; int error = 0; + int blacklist; if (aspm_disabled || !pdev->is_pcie || pdev->link_state) return; @@ -580,29 +637,58 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev) if (list_empty(&pdev->subordinate->devices)) goto out; - if (pcie_aspm_sanity_check(pdev)) - goto out; + blacklist = !!pcie_aspm_sanity_check(pdev); mutex_lock(&aspm_lock); link_state = kzalloc(sizeof(*link_state), GFP_KERNEL); if (!link_state) goto unlock_out; - pdev->link_state = link_state; - pcie_aspm_configure_common_clock(pdev); + link_state->downstream_has_switch = pcie_aspm_downstream_has_switch(pdev); + INIT_LIST_HEAD(&link_state->children); + INIT_LIST_HEAD(&link_state->link); + if (pdev->bus->self) {/* this is a switch */ + struct pcie_link_state *parent_link_state; - pcie_aspm_cap_init(pdev); + parent_link_state = pdev->bus->parent->self->link_state; + if (!parent_link_state) { + kfree(link_state); + goto unlock_out; + } + list_add(&link_state->link, &parent_link_state->children); + link_state->parent = parent_link_state; + } - /* config link state to avoid BIOS error */ - state = pcie_aspm_check_state(pdev, policy_to_aspm_state(pdev)); - __pcie_aspm_config_link(pdev, state); + pdev->link_state = link_state; - pcie_check_clock_pm(pdev); + if (!blacklist) { + pcie_aspm_configure_common_clock(pdev); + pcie_aspm_cap_init(pdev); + } else { + link_state->enabled_state = PCIE_LINK_STATE_L0S|PCIE_LINK_STATE_L1; + link_state->bios_aspm_state = 0; + /* Set support state to 0, so we will disable ASPM later */ + link_state->support_state = 0; + } link_state->pdev = pdev; list_add(&link_state->sibiling, &link_list); + if (link_state->downstream_has_switch) { + /* + * If link has switch, delay the link config. The leaf link + * initialization will config the whole hierarchy. but we must + * make sure BIOS doesn't set unsupported link state + **/ + state = pcie_aspm_check_state(pdev, link_state->bios_aspm_state); + __pcie_aspm_config_link(pdev, state); + } else + __pcie_aspm_configure_link_state(pdev, + policy_to_aspm_state(pdev)); + + pcie_check_clock_pm(pdev, blacklist); + unlock_out: if (error) free_link_state(pdev); @@ -635,6 +721,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) /* All functions are removed, so just disable ASPM for the link */ __pcie_aspm_config_one_dev(parent, 0); list_del(&link_state->sibiling); + list_del(&link_state->link); /* Clock PM is for endpoint device */ free_link_state(parent); -- cgit 1.4.1 From 67f6533802fd2cc6f5b3c6355ef72bcf636d7fda Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 19 Dec 2008 15:18:10 +0900 Subject: PCI: pciehp: ignore undefined bit in link status register Bit 10 in Link Status register used to be defined as Training Error in the PCI Express 1.0a specification. But it was removed by Training Error ECN and is no longer defined. So pciehp must ignore the value read from it. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index b643ca13e4f1..22b88cb17a07 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -419,8 +419,7 @@ static int hpc_check_lnk_status(struct controller *ctrl) } ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status); - if ( (lnk_status & LNK_TRN) || (lnk_status & LNK_TRN_ERR) || - !(lnk_status & NEG_LINK_WD)) { + if ((lnk_status & LNK_TRN) || !(lnk_status & NEG_LINK_WD)) { ctrl_err(ctrl, "Link Training Error occurs \n"); retval = -1; return retval; -- cgit 1.4.1 From 322162a71bd9fc4edb1b11236e7bc8aa27ccac22 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Fri, 19 Dec 2008 15:19:02 +0900 Subject: PCI: pciehp: cleanup register and field definitions Clean up register definitions related to PCI Express Hot plug. - Add register definitions into include/linux/pci_regs.h, and use them instead of pciehp's locally definied register definitions. - Remove pciehp's locally defined register definitions - Remove unused register definitions in pciehp. - Some minor cleanups. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 328 ++++++++++++--------------------------- include/linux/pci_regs.h | 64 +++++++- 2 files changed, 156 insertions(+), 236 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 22b88cb17a07..71a8012886b0 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -42,42 +42,6 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0); -struct ctrl_reg { - u8 cap_id; - u8 nxt_ptr; - u16 cap_reg; - u32 dev_cap; - u16 dev_ctrl; - u16 dev_status; - u32 lnk_cap; - u16 lnk_ctrl; - u16 lnk_status; - u32 slot_cap; - u16 slot_ctrl; - u16 slot_status; - u16 root_ctrl; - u16 rsvp; - u32 root_status; -} __attribute__ ((packed)); - -/* offsets to the controller registers based on the above structure layout */ -enum ctrl_offsets { - PCIECAPID = offsetof(struct ctrl_reg, cap_id), - NXTCAPPTR = offsetof(struct ctrl_reg, nxt_ptr), - CAPREG = offsetof(struct ctrl_reg, cap_reg), - DEVCAP = offsetof(struct ctrl_reg, dev_cap), - DEVCTRL = offsetof(struct ctrl_reg, dev_ctrl), - DEVSTATUS = offsetof(struct ctrl_reg, dev_status), - LNKCAP = offsetof(struct ctrl_reg, lnk_cap), - LNKCTRL = offsetof(struct ctrl_reg, lnk_ctrl), - LNKSTATUS = offsetof(struct ctrl_reg, lnk_status), - SLOTCAP = offsetof(struct ctrl_reg, slot_cap), - SLOTCTRL = offsetof(struct ctrl_reg, slot_ctrl), - SLOTSTATUS = offsetof(struct ctrl_reg, slot_status), - ROOTCTRL = offsetof(struct ctrl_reg, root_ctrl), - ROOTSTATUS = offsetof(struct ctrl_reg, root_status), -}; - static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value) { struct pci_dev *dev = ctrl->pci_dev; @@ -102,95 +66,9 @@ static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value) return pci_write_config_dword(dev, ctrl->cap_base + reg, value); } -/* Field definitions in PCI Express Capabilities Register */ -#define CAP_VER 0x000F -#define DEV_PORT_TYPE 0x00F0 -#define SLOT_IMPL 0x0100 -#define MSG_NUM 0x3E00 - -/* Device or Port Type */ -#define NAT_ENDPT 0x00 -#define LEG_ENDPT 0x01 -#define ROOT_PORT 0x04 -#define UP_STREAM 0x05 -#define DN_STREAM 0x06 -#define PCIE_PCI_BRDG 0x07 -#define PCI_PCIE_BRDG 0x10 - -/* Field definitions in Device Capabilities Register */ -#define DATTN_BUTTN_PRSN 0x1000 -#define DATTN_LED_PRSN 0x2000 -#define DPWR_LED_PRSN 0x4000 - -/* Field definitions in Link Capabilities Register */ -#define MAX_LNK_SPEED 0x000F -#define MAX_LNK_WIDTH 0x03F0 -#define LINK_ACTIVE_REPORTING 0x00100000 - -/* Link Width Encoding */ -#define LNK_X1 0x01 -#define LNK_X2 0x02 -#define LNK_X4 0x04 -#define LNK_X8 0x08 -#define LNK_X12 0x0C -#define LNK_X16 0x10 -#define LNK_X32 0x20 - -/*Field definitions of Link Status Register */ -#define LNK_SPEED 0x000F -#define NEG_LINK_WD 0x03F0 -#define LNK_TRN_ERR 0x0400 -#define LNK_TRN 0x0800 -#define SLOT_CLK_CONF 0x1000 -#define LINK_ACTIVE 0x2000 - -/* Field definitions in Slot Capabilities Register */ -#define ATTN_BUTTN_PRSN 0x00000001 -#define PWR_CTRL_PRSN 0x00000002 -#define MRL_SENS_PRSN 0x00000004 -#define ATTN_LED_PRSN 0x00000008 -#define PWR_LED_PRSN 0x00000010 -#define HP_SUPR_RM_SUP 0x00000020 -#define HP_CAP 0x00000040 -#define SLOT_PWR_VALUE 0x000003F8 -#define SLOT_PWR_LIMIT 0x00000C00 -#define PSN 0xFFF80000 /* PSN: Physical Slot Number */ - -/* Field definitions in Slot Control Register */ -#define ATTN_BUTTN_ENABLE 0x0001 -#define PWR_FAULT_DETECT_ENABLE 0x0002 -#define MRL_DETECT_ENABLE 0x0004 -#define PRSN_DETECT_ENABLE 0x0008 -#define CMD_CMPL_INTR_ENABLE 0x0010 -#define HP_INTR_ENABLE 0x0020 -#define ATTN_LED_CTRL 0x00C0 -#define PWR_LED_CTRL 0x0300 -#define PWR_CTRL 0x0400 -#define EMI_CTRL 0x0800 - -/* Attention indicator and Power indicator states */ -#define LED_ON 0x01 -#define LED_BLINK 0x10 -#define LED_OFF 0x11 - /* Power Control Command */ #define POWER_ON 0 -#define POWER_OFF 0x0400 - -/* EMI Status defines */ -#define EMI_DISENGAGED 0 -#define EMI_ENGAGED 1 - -/* Field definitions in Slot Status Register */ -#define ATTN_BUTTN_PRESSED 0x0001 -#define PWR_FAULT_DETECTED 0x0002 -#define MRL_SENS_CHANGED 0x0004 -#define PRSN_DETECT_CHANGED 0x0008 -#define CMD_COMPLETED 0x0010 -#define MRL_STATE 0x0020 -#define PRSN_STATE 0x0040 -#define EMI_STATE 0x0080 -#define EMI_STATUS_BIT 7 +#define POWER_OFF PCI_EXP_SLTCTL_PCC static irqreturn_t pcie_isr(int irq, void *dev_id); static void start_int_poll_timer(struct controller *ctrl, int sec); @@ -253,22 +131,20 @@ static inline void pciehp_free_irq(struct controller *ctrl) static int pcie_poll_cmd(struct controller *ctrl) { u16 slot_status; - int timeout = 1000; + int err, timeout = 1000; - if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { - if (slot_status & CMD_COMPLETED) { - pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); - return 1; - } + err = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); + if (!err && (slot_status & PCI_EXP_SLTSTA_CC)) { + pciehp_writew(ctrl, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_CC); + return 1; } while (timeout > 0) { msleep(10); timeout -= 10; - if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) { - if (slot_status & CMD_COMPLETED) { - pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); - return 1; - } + err = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); + if (!err && (slot_status & PCI_EXP_SLTSTA_CC)) { + pciehp_writew(ctrl, PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_CC); + return 1; } } return 0; /* timeout */ @@ -302,14 +178,14 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) mutex_lock(&ctrl->ctrl_lock); - retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); + retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n", __func__); goto out; } - if (slot_status & CMD_COMPLETED) { + if (slot_status & PCI_EXP_SLTSTA_CC) { if (!ctrl->no_cmd_complete) { /* * After 1 sec and CMD_COMPLETED still not set, just @@ -332,7 +208,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) } } - retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl); + retval = pciehp_readw(ctrl, PCI_EXP_SLTCTL, &slot_ctrl); if (retval) { ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__); goto out; @@ -342,7 +218,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) slot_ctrl |= (cmd & mask); ctrl->cmd_busy = 1; smp_mb(); - retval = pciehp_writew(ctrl, SLOTCTRL, slot_ctrl); + retval = pciehp_writew(ctrl, PCI_EXP_SLTCTL, slot_ctrl); if (retval) ctrl_err(ctrl, "Cannot write to SLOTCTRL register\n"); @@ -356,8 +232,8 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) * completed interrupt is not enabled, we need to poll * command completed event. */ - if (!(slot_ctrl & HP_INTR_ENABLE) || - !(slot_ctrl & CMD_CMPL_INTR_ENABLE)) + if (!(slot_ctrl & PCI_EXP_SLTCTL_HPIE) || + !(slot_ctrl & PCI_EXP_SLTCTL_CCIE)) poll = 1; pcie_wait_cmd(ctrl, poll); } @@ -370,9 +246,9 @@ static inline int check_link_active(struct controller *ctrl) { u16 link_status; - if (pciehp_readw(ctrl, LNKSTATUS, &link_status)) + if (pciehp_readw(ctrl, PCI_EXP_LNKSTA, &link_status)) return 0; - return !!(link_status & LINK_ACTIVE); + return !!(link_status & PCI_EXP_LNKSTA_DLLLA); } static void pcie_wait_link_active(struct controller *ctrl) @@ -412,14 +288,15 @@ static int hpc_check_lnk_status(struct controller *ctrl) } else msleep(1000); - retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status); + retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status); if (retval) { ctrl_err(ctrl, "Cannot read LNKSTATUS register\n"); return retval; } ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status); - if ((lnk_status & LNK_TRN) || !(lnk_status & NEG_LINK_WD)) { + if ((lnk_status & PCI_EXP_LNKSTA_LT) || + !(lnk_status & PCI_EXP_LNKSTA_NLW)) { ctrl_err(ctrl, "Link Training Error occurs \n"); retval = -1; return retval; @@ -435,16 +312,16 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status) u8 atten_led_state; int retval = 0; - retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl); + retval = pciehp_readw(ctrl, PCI_EXP_SLTCTL, &slot_ctrl); if (retval) { ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__); return retval; } ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_ctrl); - atten_led_state = (slot_ctrl & ATTN_LED_CTRL) >> 6; + atten_led_state = (slot_ctrl & PCI_EXP_SLTCTL_AIC) >> 6; switch (atten_led_state) { case 0: @@ -474,15 +351,15 @@ static int hpc_get_power_status(struct slot *slot, u8 *status) u8 pwr_state; int retval = 0; - retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl); + retval = pciehp_readw(ctrl, PCI_EXP_SLTCTL, &slot_ctrl); if (retval) { ctrl_err(ctrl, "%s: Cannot read SLOTCTRL register\n", __func__); return retval; } ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_ctrl); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_ctrl); - pwr_state = (slot_ctrl & PWR_CTRL) >> 10; + pwr_state = (slot_ctrl & PCI_EXP_SLTCTL_PCC) >> 10; switch (pwr_state) { case 0: @@ -503,17 +380,15 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_status; - int retval = 0; + int retval; - retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); + retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n", __func__); return retval; } - - *status = (((slot_status & MRL_STATE) >> 5) == 0) ? 0 : 1; - + *status = !!(slot_status & PCI_EXP_SLTSTA_MRLSS); return 0; } @@ -521,18 +396,15 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_status; - u8 card_state; - int retval = 0; + int retval; - retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); + retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n", __func__); return retval; } - card_state = (u8)((slot_status & PRSN_STATE) >> 6); - *status = (card_state == 1) ? 1 : 0; - + *status = !!(slot_status & PCI_EXP_SLTSTA_PDS); return 0; } @@ -540,32 +412,28 @@ static int hpc_query_power_fault(struct slot *slot) { struct controller *ctrl = slot->ctrl; u16 slot_status; - u8 pwr_fault; - int retval = 0; + int retval; - retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); + retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { ctrl_err(ctrl, "Cannot check for power fault\n"); return retval; } - pwr_fault = (u8)((slot_status & PWR_FAULT_DETECTED) >> 1); - - return pwr_fault; + return !!(slot_status & PCI_EXP_SLTSTA_PFD); } static int hpc_get_emi_status(struct slot *slot, u8 *status) { struct controller *ctrl = slot->ctrl; u16 slot_status; - int retval = 0; + int retval; - retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); + retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { ctrl_err(ctrl, "Cannot check EMI status\n"); return retval; } - *status = (slot_status & EMI_STATE) >> EMI_STATUS_BIT; - + *status = !!(slot_status & PCI_EXP_SLTSTA_EIS); return retval; } @@ -575,8 +443,8 @@ static int hpc_toggle_emi(struct slot *slot) u16 cmd_mask; int rc; - slot_cmd = EMI_CTRL; - cmd_mask = EMI_CTRL; + slot_cmd = PCI_EXP_SLTCTL_EIC; + cmd_mask = PCI_EXP_SLTCTL_EIC; rc = pcie_write_cmd(slot->ctrl, slot_cmd, cmd_mask); slot->last_emi_toggle = get_seconds(); @@ -590,7 +458,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value) u16 cmd_mask; int rc; - cmd_mask = ATTN_LED_CTRL; + cmd_mask = PCI_EXP_SLTCTL_AIC; switch (value) { case 0 : /* turn off */ slot_cmd = 0x00C0; @@ -606,7 +474,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value) } rc = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); return rc; } @@ -618,10 +486,10 @@ static void hpc_set_green_led_on(struct slot *slot) u16 cmd_mask; slot_cmd = 0x0100; - cmd_mask = PWR_LED_CTRL; + cmd_mask = PCI_EXP_SLTCTL_PIC; pcie_write_cmd(ctrl, slot_cmd, cmd_mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } static void hpc_set_green_led_off(struct slot *slot) @@ -631,10 +499,10 @@ static void hpc_set_green_led_off(struct slot *slot) u16 cmd_mask; slot_cmd = 0x0300; - cmd_mask = PWR_LED_CTRL; + cmd_mask = PCI_EXP_SLTCTL_PIC; pcie_write_cmd(ctrl, slot_cmd, cmd_mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } static void hpc_set_green_led_blink(struct slot *slot) @@ -644,10 +512,10 @@ static void hpc_set_green_led_blink(struct slot *slot) u16 cmd_mask; slot_cmd = 0x0200; - cmd_mask = PWR_LED_CTRL; + cmd_mask = PCI_EXP_SLTCTL_PIC; pcie_write_cmd(ctrl, slot_cmd, cmd_mask); ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); } static int hpc_power_on_slot(struct slot * slot) @@ -661,15 +529,15 @@ static int hpc_power_on_slot(struct slot * slot) ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot); /* Clear sticky power-fault bit from previous power failures */ - retval = pciehp_readw(ctrl, SLOTSTATUS, &slot_status); + retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status); if (retval) { ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS register\n", __func__); return retval; } - slot_status &= PWR_FAULT_DETECTED; + slot_status &= PCI_EXP_SLTSTA_PFD; if (slot_status) { - retval = pciehp_writew(ctrl, SLOTSTATUS, slot_status); + retval = pciehp_writew(ctrl, PCI_EXP_SLTSTA, slot_status); if (retval) { ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS register\n", @@ -679,13 +547,13 @@ static int hpc_power_on_slot(struct slot * slot) } slot_cmd = POWER_ON; - cmd_mask = PWR_CTRL; + cmd_mask = PCI_EXP_SLTCTL_PCC; /* Enable detection that we turned off at slot power-off time */ if (!pciehp_poll_mode) { - slot_cmd |= (PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | - PRSN_DETECT_ENABLE); - cmd_mask |= (PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | - PRSN_DETECT_ENABLE); + slot_cmd |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | + PCI_EXP_SLTCTL_PDCE); + cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | + PCI_EXP_SLTCTL_PDCE); } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); @@ -695,7 +563,7 @@ static int hpc_power_on_slot(struct slot * slot) return -1; } ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); return retval; } @@ -752,7 +620,7 @@ static int hpc_power_off_slot(struct slot * slot) changed = pcie_mask_bad_dllp(ctrl); slot_cmd = POWER_OFF; - cmd_mask = PWR_CTRL; + cmd_mask = PCI_EXP_SLTCTL_PCC; /* * If we get MRL or presence detect interrupts now, the isr * will notice the sticky power-fault bit too and issue power @@ -761,10 +629,10 @@ static int hpc_power_off_slot(struct slot * slot) * till the slot is powered on again. */ if (!pciehp_poll_mode) { - slot_cmd &= ~(PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | - PRSN_DETECT_ENABLE); - cmd_mask |= (PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | - PRSN_DETECT_ENABLE); + slot_cmd &= ~(PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | + PCI_EXP_SLTCTL_PDCE); + cmd_mask |= (PCI_EXP_SLTCTL_PFDE | PCI_EXP_SLTCTL_MRLSCE | + PCI_EXP_SLTCTL_PDCE); } retval = pcie_write_cmd(ctrl, slot_cmd, cmd_mask); @@ -774,7 +642,7 @@ static int hpc_power_off_slot(struct slot * slot) goto out; } ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", - __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); + __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd); out: if (changed) pcie_unmask_bad_dllp(ctrl); @@ -795,19 +663,19 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) */ intr_loc = 0; do { - if (pciehp_readw(ctrl, SLOTSTATUS, &detected)) { + if (pciehp_readw(ctrl, PCI_EXP_SLTSTA, &detected)) { ctrl_err(ctrl, "%s: Cannot read SLOTSTATUS\n", __func__); return IRQ_NONE; } - detected &= (ATTN_BUTTN_PRESSED | PWR_FAULT_DETECTED | - MRL_SENS_CHANGED | PRSN_DETECT_CHANGED | - CMD_COMPLETED); + detected &= (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD | + PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC | + PCI_EXP_SLTSTA_CC); intr_loc |= detected; if (!intr_loc) return IRQ_NONE; - if (detected && pciehp_writew(ctrl, SLOTSTATUS, detected)) { + if (detected && pciehp_writew(ctrl, PCI_EXP_SLTSTA, detected)) { ctrl_err(ctrl, "%s: Cannot write to SLOTSTATUS\n", __func__); return IRQ_NONE; @@ -817,31 +685,31 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) ctrl_dbg(ctrl, "%s: intr_loc %x\n", __func__, intr_loc); /* Check Command Complete Interrupt Pending */ - if (intr_loc & CMD_COMPLETED) { + if (intr_loc & PCI_EXP_SLTSTA_CC) { ctrl->cmd_busy = 0; smp_mb(); wake_up(&ctrl->queue); } - if (!(intr_loc & ~CMD_COMPLETED)) + if (!(intr_loc & ~PCI_EXP_SLTSTA_CC)) return IRQ_HANDLED; p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); /* Check MRL Sensor Changed */ - if (intr_loc & MRL_SENS_CHANGED) + if (intr_loc & PCI_EXP_SLTSTA_MRLSC) pciehp_handle_switch_change(p_slot); /* Check Attention Button Pressed */ - if (intr_loc & ATTN_BUTTN_PRESSED) + if (intr_loc & PCI_EXP_SLTSTA_ABP) pciehp_handle_attention_button(p_slot); /* Check Presence Detect Changed */ - if (intr_loc & PRSN_DETECT_CHANGED) + if (intr_loc & PCI_EXP_SLTSTA_PDC) pciehp_handle_presence_change(p_slot); /* Check Power Fault Detected */ - if (intr_loc & PWR_FAULT_DETECTED) + if (intr_loc & PCI_EXP_SLTSTA_PFD) pciehp_handle_power_fault(p_slot); return IRQ_HANDLED; @@ -854,7 +722,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value) u32 lnk_cap; int retval = 0; - retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap); + retval = pciehp_readl(ctrl, PCI_EXP_LNKCAP, &lnk_cap); if (retval) { ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__); return retval; @@ -883,13 +751,13 @@ static int hpc_get_max_lnk_width(struct slot *slot, u32 lnk_cap; int retval = 0; - retval = pciehp_readl(ctrl, LNKCAP, &lnk_cap); + retval = pciehp_readl(ctrl, PCI_EXP_LNKCAP, &lnk_cap); if (retval) { ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__); return retval; } - switch ((lnk_cap & 0x03F0) >> 4){ + switch ((lnk_cap & PCI_EXP_LNKSTA_NLW) >> 4){ case 0: lnk_wdth = PCIE_LNK_WIDTH_RESRV; break; @@ -932,14 +800,14 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value) int retval = 0; u16 lnk_status; - retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status); + retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status); if (retval) { ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n", __func__); return retval; } - switch (lnk_status & 0x0F) { + switch (lnk_status & PCI_EXP_LNKSTA_CLS) { case 1: lnk_speed = PCIE_2PT5GB; break; @@ -962,14 +830,14 @@ static int hpc_get_cur_lnk_width(struct slot *slot, int retval = 0; u16 lnk_status; - retval = pciehp_readw(ctrl, LNKSTATUS, &lnk_status); + retval = pciehp_readw(ctrl, PCI_EXP_LNKSTA, &lnk_status); if (retval) { ctrl_err(ctrl, "%s: Cannot read LNKSTATUS register\n", __func__); return retval; } - switch ((lnk_status & 0x03F0) >> 4){ + switch ((lnk_status & PCI_EXP_LNKSTA_NLW) >> 4){ case 0: lnk_wdth = PCIE_LNK_WIDTH_RESRV; break; @@ -1035,18 +903,19 @@ int pcie_enable_notification(struct controller *ctrl) { u16 cmd, mask; - cmd = PRSN_DETECT_ENABLE; + cmd = PCI_EXP_SLTCTL_PDCE; if (ATTN_BUTTN(ctrl)) - cmd |= ATTN_BUTTN_ENABLE; + cmd |= PCI_EXP_SLTCTL_ABPE; if (POWER_CTRL(ctrl)) - cmd |= PWR_FAULT_DETECT_ENABLE; + cmd |= PCI_EXP_SLTCTL_PFDE; if (MRL_SENS(ctrl)) - cmd |= MRL_DETECT_ENABLE; + cmd |= PCI_EXP_SLTCTL_MRLSCE; if (!pciehp_poll_mode) - cmd |= HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; + cmd |= PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE; - mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE | - PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; + mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE | + PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE | + PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE); if (pcie_write_cmd(ctrl, cmd, mask)) { ctrl_err(ctrl, "Cannot enable software notification\n"); @@ -1058,8 +927,9 @@ int pcie_enable_notification(struct controller *ctrl) static void pcie_disable_notification(struct controller *ctrl) { u16 mask; - mask = PRSN_DETECT_ENABLE | ATTN_BUTTN_ENABLE | MRL_DETECT_ENABLE | - PWR_FAULT_DETECT_ENABLE | HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE; + mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE | + PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE | + PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE); if (pcie_write_cmd(ctrl, 0, mask)) ctrl_warn(ctrl, "Cannot disable software notification\n"); } @@ -1156,9 +1026,9 @@ static inline void dbg_ctrl(struct controller *ctrl) EMI(ctrl) ? "yes" : "no"); ctrl_info(ctrl, " Command Completed : %3s\n", NO_CMD_CMPL(ctrl) ? "no" : "yes"); - pciehp_readw(ctrl, SLOTSTATUS, ®16); + pciehp_readw(ctrl, PCI_EXP_SLTSTA, ®16); ctrl_info(ctrl, "Slot Status : 0x%04x\n", reg16); - pciehp_readw(ctrl, SLOTCTRL, ®16); + pciehp_readw(ctrl, PCI_EXP_SLTCTL, ®16); ctrl_info(ctrl, "Slot Control : 0x%04x\n", reg16); } @@ -1182,7 +1052,7 @@ struct controller *pcie_init(struct pcie_device *dev) ctrl_err(ctrl, "Cannot find PCI Express capability\n"); goto abort_ctrl; } - if (pciehp_readl(ctrl, SLOTCAP, &slot_cap)) { + if (pciehp_readl(ctrl, PCI_EXP_SLTCAP, &slot_cap)) { ctrl_err(ctrl, "Cannot read SLOTCAP register\n"); goto abort_ctrl; } @@ -1207,17 +1077,17 @@ struct controller *pcie_init(struct pcie_device *dev) ctrl->no_cmd_complete = 1; /* Check if Data Link Layer Link Active Reporting is implemented */ - if (pciehp_readl(ctrl, LNKCAP, &link_cap)) { + if (pciehp_readl(ctrl, PCI_EXP_LNKCAP, &link_cap)) { ctrl_err(ctrl, "%s: Cannot read LNKCAP register\n", __func__); goto abort_ctrl; } - if (link_cap & LINK_ACTIVE_REPORTING) { + if (link_cap & PCI_EXP_LNKCAP_DLLLARC) { ctrl_dbg(ctrl, "Link Active Reporting supported\n"); ctrl->link_active_reporting = 1; } /* Clear all remaining event bits in Slot Status register */ - if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) + if (pciehp_writew(ctrl, PCI_EXP_SLTSTA, 0x1f)) goto abort_ctrl; /* Disable sotfware notification */ diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 7766488470e4..027815b4635e 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -411,20 +411,70 @@ #define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ -#define PCI_EXP_LNKCAP_ASPMS 0xc00 /* ASPM Support */ -#define PCI_EXP_LNKCAP_L0SEL 0x7000 /* L0s Exit Latency */ -#define PCI_EXP_LNKCAP_L1EL 0x38000 /* L1 Exit Latency */ -#define PCI_EXP_LNKCAP_CLKPM 0x40000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ +#define PCI_EXP_LNKCAP_L1EL 0x00038000 /* L1 Exit Latency */ +#define PCI_EXP_LNKCAP_CLKPM 0x00040000 /* L1 Clock Power Management */ +#define PCI_EXP_LNKCAP_SDERC 0x00080000 /* Suprise Down Error Reporting Capable */ +#define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ +#define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ +#define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ #define PCI_EXP_LNKCTL 16 /* Link Control */ -#define PCI_EXP_LNKCTL_RL 0x20 /* Retrain Link */ -#define PCI_EXP_LNKCTL_CCC 0x40 /* Common Clock COnfiguration */ +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ +#define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ +#define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ +#define PCI_EXP_LNKCTL_CCC 0x0040 /* Common Clock Configuration */ +#define PCI_EXP_LNKCTL_ES 0x0080 /* Extended Synch */ #define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ +#define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ +#define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ +#define PCI_EXP_LNKCTL_LABIE 0x0800 /* Lnk Autonomous Bandwidth Interrupt Enable */ #define PCI_EXP_LNKSTA 18 /* Link Status */ -#define PCI_EXP_LNKSTA_LT 0x800 /* Link Training */ +#define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ +#define PCI_EXP_LNKSTA_NLW 0x03f0 /* Nogotiated Link Width */ +#define PCI_EXP_LNKSTA_LT 0x0800 /* Link Training */ #define PCI_EXP_LNKSTA_SLC 0x1000 /* Slot Clock Configuration */ +#define PCI_EXP_LNKSTA_DLLLA 0x2000 /* Data Link Layer Link Active */ +#define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ +#define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ +#define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ +#define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +#define PCI_EXP_SLTCAP_AIP 0x00000008 /* Attention Indicator Present */ +#define PCI_EXP_SLTCAP_PIP 0x00000010 /* Power Indicator Present */ +#define PCI_EXP_SLTCAP_HPS 0x00000020 /* Hot-Plug Surprise */ +#define PCI_EXP_SLTCAP_HPC 0x00000040 /* Hot-Plug Capable */ +#define PCI_EXP_SLTCAP_SPLV 0x00007f80 /* Slot Power Limit Value */ +#define PCI_EXP_SLTCAP_SPLS 0x00018000 /* Slot Power Limit Scale */ +#define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ +#define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ +#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ #define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ +#define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ +#define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +#define PCI_EXP_SLTCTL_PDCE 0x0008 /* Presence Detect Changed Enable */ +#define PCI_EXP_SLTCTL_CCIE 0x0010 /* Command Completed Interrupt Enable */ +#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */ +#define PCI_EXP_SLTCTL_AIC 0x00c0 /* Attention Indicator Control */ +#define PCI_EXP_SLTCTL_PIC 0x0300 /* Power Indicator Control */ +#define PCI_EXP_SLTCTL_PCC 0x0400 /* Power Controller Control */ +#define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ +#define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ #define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ +#define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ +#define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +#define PCI_EXP_SLTSTA_PDC 0x0008 /* Presence Detect Changed */ +#define PCI_EXP_SLTSTA_CC 0x0010 /* Command Completed */ +#define PCI_EXP_SLTSTA_MRLSS 0x0020 /* MRL Sensor State */ +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ +#define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ +#define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ #define PCI_EXP_RTCTL 28 /* Root Control */ #define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ #define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ -- cgit 1.4.1 From b8d9cb2a2226118fd71f657c80b06b670a653022 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 21 Dec 2008 16:39:37 +0100 Subject: PCI hotplug: remove redundant test in cpq hotplug func is checked not to be NULL a few lines before. A simplified version of the semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; expression E; position p1,p2; @@ if (x@p1 == NULL || ...) { ... when forall return ...; } ... when != \(x=E\|x--\|x++\|--x\|++x\|x-=E\|x+=E\|x|=E\|x&=E\|&x\) ( x@p2 == NULL | x@p2 != NULL ) // another path to the test that is not through p1? @s exists@ local idexpression r.x; position r.p1,r.p2; @@ ... when != x@p1 ( x@p2 == NULL | x@p2 != NULL ) @fix depends on !s@ position r.p1,r.p2; expression x,E; statement S1,S2; @@ ( - if ((x@p2 != NULL) || ...) S1 | - if ((x@p2 == NULL) && ...) S1 | - BUG_ON(x@p2 == NULL); ) // Signed-off-by: Julia Lawall Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/cpqphp_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index d94722149e02..cc227a8c4b11 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -1954,7 +1954,7 @@ void cpqhp_pushbutton_thread(unsigned long slot) return ; } - if (func != NULL && ctrl != NULL) { + if (ctrl != NULL) { if (cpqhp_process_SI(ctrl, func) != 0) { amber_LED_on(ctrl, hp_slot); green_LED_off(ctrl, hp_slot); -- cgit 1.4.1 From 6a479079c07211bf348ac8a79754f26bea258f26 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 23 Dec 2008 03:08:29 +0000 Subject: PCI: Add pci_clear_master() as opposite of pci_set_master() During an online device reset it may be useful to disable bus-mastering. pci_disable_device() does that, and far more besides, so is not suitable for an online reset. Add pci_clear_master() which does just this. Signed-off-by: Ben Hutchings Reviewed-by: Matthew Wilcox Signed-off-by: Jesse Barnes --- Documentation/PCI/pci.txt | 3 ++- drivers/pci/pci.c | 39 ++++++++++++++++++++++++++++----------- include/linux/pci.h | 1 + 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'drivers/pci') diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt index fd4907a2968c..7f6de6ea5b47 100644 --- a/Documentation/PCI/pci.txt +++ b/Documentation/PCI/pci.txt @@ -294,7 +294,8 @@ NOTE: pci_enable_device() can fail! Check the return value. pci_set_master() will enable DMA by setting the bus master bit in the PCI_COMMAND register. It also fixes the latency timer value if -it's set to something bogus by the BIOS. +it's set to something bogus by the BIOS. pci_clear_master() will +disable DMA by clearing the bus master bit. If the PCI device can use the PCI Memory-Write-Invalidate transaction, call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c824dc8d617c..f3fd55df67db 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1667,6 +1667,22 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name) ((1 << 6) - 1), res_name); } +static void __pci_set_master(struct pci_dev *dev, bool enable) +{ + u16 old_cmd, cmd; + + pci_read_config_word(dev, PCI_COMMAND, &old_cmd); + if (enable) + cmd = old_cmd | PCI_COMMAND_MASTER; + else + cmd = old_cmd & ~PCI_COMMAND_MASTER; + if (cmd != old_cmd) { + dev_dbg(&dev->dev, "%s bus mastering\n", + enable ? "enabling" : "disabling"); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + dev->is_busmaster = enable; +} /** * pci_set_master - enables bus-mastering for device dev @@ -1675,21 +1691,21 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name) * Enables bus-mastering on the device and calls pcibios_set_master() * to do the needed arch specific settings. */ -void -pci_set_master(struct pci_dev *dev) +void pci_set_master(struct pci_dev *dev) { - u16 cmd; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - if (! (cmd & PCI_COMMAND_MASTER)) { - dev_dbg(&dev->dev, "enabling bus mastering\n"); - cmd |= PCI_COMMAND_MASTER; - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - dev->is_busmaster = 1; + __pci_set_master(dev, true); pcibios_set_master(dev); } +/** + * pci_clear_master - disables bus-mastering for device dev + * @dev: the PCI device to disable + */ +void pci_clear_master(struct pci_dev *dev) +{ + __pci_set_master(dev, false); +} + #ifdef PCI_DISABLE_MWI int pci_set_mwi(struct pci_dev *dev) { @@ -2346,6 +2362,7 @@ EXPORT_SYMBOL(pci_release_selected_regions); EXPORT_SYMBOL(pci_request_selected_regions); EXPORT_SYMBOL(pci_request_selected_regions_exclusive); EXPORT_SYMBOL(pci_set_master); +EXPORT_SYMBOL(pci_clear_master); EXPORT_SYMBOL(pci_set_mwi); EXPORT_SYMBOL(pci_try_set_mwi); EXPORT_SYMBOL(pci_clear_mwi); diff --git a/include/linux/pci.h b/include/linux/pci.h index 7cbecef19bb6..0f6d2bb1df9c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -642,6 +642,7 @@ static inline int pci_is_managed(struct pci_dev *pdev) void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); +void pci_clear_master(struct pci_dev *dev); int pci_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); -- cgit 1.4.1 From facf6d1627a33badbbc154524c4a2c73c51bdc99 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Jan 2009 19:48:55 +0100 Subject: PCI: PCIe portdrv: Add kerneldoc comments to some core functions Add kerneldoc comments to some functions in drivers/pci/pcie/portdrv_core.c, since the code in there is not easy to follow without any additional description. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 77 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 6a9f83ccaff4..8f09b353d2af 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -94,12 +94,12 @@ static int pcie_port_resume_service(struct device *dev) return 0; } -/* - * release_pcie_device - * - * Being invoked automatically when device is being removed - * in response to device_unregister(dev) call. - * Release all resources being claimed. +/** + * release_pcie_device - free PCI Express port service device structure + * @dev: Port service device to release + * + * Invoked automatically when device is being removed in response to + * device_unregister(dev). Release all resources being claimed. */ static void release_pcie_device(struct device *dev) { @@ -127,7 +127,16 @@ static int is_msi_quirked(struct pci_dev *dev) } return quirk; } - + +/** + * assign_interrupt_mode - choose interrupt mode for PCI Express port services + * (INTx, MSI-X, MSI) and set up vectors + * @dev: PCI Express port to handle + * @vectors: Array of interrupt vectors to populate + * @mask: Bitmask of port capabilities returned by get_port_device_capability() + * + * Return value: Interrupt mode associated with the port + */ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) { int i, pos, nvec, status = -EINVAL; @@ -174,6 +183,16 @@ static int assign_interrupt_mode(struct pci_dev *dev, int *vectors, int mask) return interrupt_mode; } +/** + * get_port_device_capability - discover capabilities of a PCI Express port + * @dev: PCI Express port to examine + * + * The capabilities are read from the port's PCI Express configuration registers + * as described in PCI Express Base Specification 1.0a sections 7.8.2, 7.8.9 and + * 7.9 - 7.11. + * + * Return value: Bitmask of discovered port capabilities + */ static int get_port_device_capability(struct pci_dev *dev) { int services = 0, pos; @@ -201,6 +220,15 @@ static int get_port_device_capability(struct pci_dev *dev) return services; } +/** + * pcie_device_init - initialize PCI Express port service device + * @dev: Port service device to initialize + * @parent: PCI Express port to associate the service device with + * @port_type: Type of the port + * @service_type: Type of service to associate with the service device + * @irq: Interrupt vector to associate with the service device + * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) + */ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, int port_type, int service_type, int irq, int irq_mode) { @@ -226,6 +254,14 @@ static void pcie_device_init(struct pci_dev *parent, struct pcie_device *dev, device->parent = &parent->dev; } +/** + * alloc_pcie_device - allocate PCI Express port service device structure + * @parent: PCI Express port to associate the service device with + * @port_type: Type of the port + * @service_type: Type of service to associate with the service device + * @irq: Interrupt vector to associate with the service device + * @irq_mode: Interrupt mode of the service (INTx, MSI-X, MSI) + */ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, int port_type, int service_type, int irq, int irq_mode) { @@ -239,6 +275,10 @@ static struct pcie_device* alloc_pcie_device(struct pci_dev *parent, return device; } +/** + * pcie_port_device_probe - check if device is a PCI Express port + * @dev: Device to check + */ int pcie_port_device_probe(struct pci_dev *dev) { int pos, type; @@ -256,6 +296,13 @@ int pcie_port_device_probe(struct pci_dev *dev) return -ENODEV; } +/** + * pcie_port_device_register - register PCI Express port + * @dev: PCI Express port to register + * + * Allocate the port extension structure and register services associated with + * the port. + */ int pcie_port_device_register(struct pci_dev *dev) { struct pcie_port_device_ext *p_ext; @@ -319,6 +366,11 @@ static int suspend_iter(struct device *dev, void *data) return 0; } +/** + * pcie_port_device_suspend - suspend port services associated with a PCIe port + * @dev: PCI Express port to handle + * @state: Representation of system power management transition in progress + */ int pcie_port_device_suspend(struct pci_dev *dev, pm_message_t state) { return device_for_each_child(&dev->dev, &state, suspend_iter); @@ -337,6 +389,10 @@ static int resume_iter(struct device *dev, void *data) return 0; } +/** + * pcie_port_device_suspend - resume port services associated with a PCIe port + * @dev: PCI Express port to handle + */ int pcie_port_device_resume(struct pci_dev *dev) { return device_for_each_child(&dev->dev, NULL, resume_iter); @@ -359,6 +415,13 @@ static int remove_iter(struct device *dev, void *data) return 0; } +/** + * pcie_port_device_remove - unregister PCI Express port service devices + * @dev: PCI Express port the service devices to unregister are associated with + * + * Remove PCI Express port service devices associated with given port and + * disable MSI-X or MSI for the port. + */ void pcie_port_device_remove(struct pci_dev *dev) { struct device *device; -- cgit 1.4.1 From e7ae88486670f3904f187d0fff9dcf469bcdd8ba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Jan 2009 19:51:15 +0100 Subject: PCI: PCIe portdrv: Fix suspend and resume of PCI Express port services There is a problem with the suspend and resume of PCI Express port service devices that the ->suspend() and ->resume() routines of each service device are called twice in each suspend-resume cycle, which is obviously wrong. The scenario is that first, the PCI Express port driver calls suspend and resume routines of each port service driver from its pcie_portdrv_suspend() and pcie_portdrv_resume() callbacks, respectively (which is correct), and second, the pcie_port_bus_type driver calls them from its ->suspend() and ->resume() callbacks (which is not correct, because it doesn't happen at the right time). The solution is to remove the ->suspend() and ->resume() callbacks from pcie_port_bus_type and the associated functions. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_bus.c | 34 ---------------------------------- drivers/pci/pcie/portdrv_core.c | 33 --------------------------------- 2 files changed, 67 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index 359fe5568df1..3a03db4323ad 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -16,14 +16,10 @@ #include "portdrv.h" static int pcie_port_bus_match(struct device *dev, struct device_driver *drv); -static int pcie_port_bus_suspend(struct device *dev, pm_message_t state); -static int pcie_port_bus_resume(struct device *dev); struct bus_type pcie_port_bus_type = { .name = "pci_express", .match = pcie_port_bus_match, - .suspend = pcie_port_bus_suspend, - .resume = pcie_port_bus_resume, }; EXPORT_SYMBOL_GPL(pcie_port_bus_type); @@ -48,33 +44,3 @@ static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) return 1; } - -static int pcie_port_bus_suspend(struct device *dev, pm_message_t state) -{ - struct pcie_device *pciedev; - struct pcie_port_service_driver *driver; - - if (!dev || !dev->driver) - return 0; - - pciedev = to_pcie_device(dev); - driver = to_service_driver(dev->driver); - if (driver && driver->suspend) - driver->suspend(pciedev, state); - return 0; -} - -static int pcie_port_bus_resume(struct device *dev) -{ - struct pcie_device *pciedev; - struct pcie_port_service_driver *driver; - - if (!dev || !dev->driver) - return 0; - - pciedev = to_pcie_device(dev); - driver = to_service_driver(dev->driver); - if (driver && driver->resume) - driver->resume(pciedev); - return 0; -} diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 8f09b353d2af..6ca5a40cc61c 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -63,37 +63,6 @@ static int pcie_port_remove_service(struct device *dev) static void pcie_port_shutdown_service(struct device *dev) {} -static int pcie_port_suspend_service(struct device *dev, pm_message_t state) -{ - struct pcie_device *pciedev; - struct pcie_port_service_driver *driver; - - if (!dev || !dev->driver) - return 0; - - pciedev = to_pcie_device(dev); - driver = to_service_driver(dev->driver); - if (driver && driver->suspend) - driver->suspend(pciedev, state); - return 0; -} - -static int pcie_port_resume_service(struct device *dev) -{ - struct pcie_device *pciedev; - struct pcie_port_service_driver *driver; - - if (!dev || !dev->driver) - return 0; - - pciedev = to_pcie_device(dev); - driver = to_service_driver(dev->driver); - - if (driver && driver->resume) - driver->resume(pciedev); - return 0; -} - /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -462,8 +431,6 @@ int pcie_port_service_register(struct pcie_port_service_driver *new) new->driver.probe = pcie_port_probe_service; new->driver.remove = pcie_port_remove_service; new->driver.shutdown = pcie_port_shutdown_service; - new->driver.suspend = pcie_port_suspend_service; - new->driver.resume = pcie_port_resume_service; return driver_register(&new->driver); } -- cgit 1.4.1 From fa6c993736cb8cc18af86b8d17b608efa7882ab5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Jan 2009 19:52:12 +0100 Subject: PCI: PCIe portdrv: Rearrange code so that related things are together Rearrange code in drivers/pci/pcie/portdrv_bus.c and drivers/pci/pcie/portdrv_core.c so that related functions and data structures are closer together. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_bus.c | 10 +++++ drivers/pci/pcie/portdrv_core.c | 90 ++++++++++++++++++----------------------- 2 files changed, 50 insertions(+), 50 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_bus.c b/drivers/pci/pcie/portdrv_bus.c index 3a03db4323ad..eec89b767f9f 100644 --- a/drivers/pci/pcie/portdrv_bus.c +++ b/drivers/pci/pcie/portdrv_bus.c @@ -44,3 +44,13 @@ static int pcie_port_bus_match(struct device *dev, struct device_driver *drv) return 1; } + +int pcie_port_bus_register(void) +{ + return bus_register(&pcie_port_bus_type); +} + +void pcie_port_bus_unregister(void) +{ + bus_unregister(&pcie_port_bus_type); +} diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 6ca5a40cc61c..a0d51e3f34f4 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -19,50 +19,6 @@ extern int pcie_mch_quirk; /* MSI-quirk Indicator */ -static int pcie_port_probe_service(struct device *dev) -{ - struct pcie_device *pciedev; - struct pcie_port_service_driver *driver; - int status; - - if (!dev || !dev->driver) - return -ENODEV; - - driver = to_service_driver(dev->driver); - if (!driver || !driver->probe) - return -ENODEV; - - pciedev = to_pcie_device(dev); - status = driver->probe(pciedev, driver->id_table); - if (!status) { - dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", - driver->name); - get_device(dev); - } - return status; -} - -static int pcie_port_remove_service(struct device *dev) -{ - struct pcie_device *pciedev; - struct pcie_port_service_driver *driver; - - if (!dev || !dev->driver) - return 0; - - pciedev = to_pcie_device(dev); - driver = to_service_driver(dev->driver); - if (driver && driver->remove) { - dev_printk(KERN_DEBUG, dev, "unloading service driver %s\n", - driver->name); - driver->remove(pciedev); - put_device(dev); - } - return 0; -} - -static void pcie_port_shutdown_service(struct device *dev) {} - /** * release_pcie_device - free PCI Express port service device structure * @dev: Port service device to release @@ -414,16 +370,50 @@ void pcie_port_device_remove(struct pci_dev *dev) pci_disable_msi(dev); } -int pcie_port_bus_register(void) +static int pcie_port_probe_service(struct device *dev) { - return bus_register(&pcie_port_bus_type); + struct pcie_device *pciedev; + struct pcie_port_service_driver *driver; + int status; + + if (!dev || !dev->driver) + return -ENODEV; + + driver = to_service_driver(dev->driver); + if (!driver || !driver->probe) + return -ENODEV; + + pciedev = to_pcie_device(dev); + status = driver->probe(pciedev, driver->id_table); + if (!status) { + dev_printk(KERN_DEBUG, dev, "service driver %s loaded\n", + driver->name); + get_device(dev); + } + return status; } -void pcie_port_bus_unregister(void) +static int pcie_port_remove_service(struct device *dev) { - bus_unregister(&pcie_port_bus_type); + struct pcie_device *pciedev; + struct pcie_port_service_driver *driver; + + if (!dev || !dev->driver) + return 0; + + pciedev = to_pcie_device(dev); + driver = to_service_driver(dev->driver); + if (driver && driver->remove) { + dev_printk(KERN_DEBUG, dev, "unloading service driver %s\n", + driver->name); + driver->remove(pciedev); + put_device(dev); + } + return 0; } +static void pcie_port_shutdown_service(struct device *dev) {} + int pcie_port_service_register(struct pcie_port_service_driver *new) { new->driver.name = (char *)new->name; @@ -435,9 +425,9 @@ int pcie_port_service_register(struct pcie_port_service_driver *new) return driver_register(&new->driver); } -void pcie_port_service_unregister(struct pcie_port_service_driver *new) +void pcie_port_service_unregister(struct pcie_port_service_driver *drv) { - driver_unregister(&new->driver); + driver_unregister(&drv->driver); } EXPORT_SYMBOL(pcie_port_service_register); -- cgit 1.4.1 From d9347371c538544a7309d5b6475ec011d98d40e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Jan 2009 19:53:32 +0100 Subject: PCI: PCIe portdrv: Add kerneldoc comments to remining core funtions Add kerneldoc comments to the reamining functions in drivers/pci/pcie/portdrv_core.c . Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pcie/portdrv_core.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index a0d51e3f34f4..8b3f8c18032f 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -370,6 +370,14 @@ void pcie_port_device_remove(struct pci_dev *dev) pci_disable_msi(dev); } +/** + * pcie_port_probe_service - probe driver for given PCI Express port service + * @dev: PCI Express port service device to probe against + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * whenever match is found between the driver and a port service device. + */ static int pcie_port_probe_service(struct device *dev) { struct pcie_device *pciedev; @@ -393,6 +401,15 @@ static int pcie_port_probe_service(struct device *dev) return status; } +/** + * pcie_port_remove_service - detach driver from given PCI Express port service + * @dev: PCI Express port service device to handle + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * when device_unregister() is called for the port service device associated + * with the driver. + */ static int pcie_port_remove_service(struct device *dev) { struct pcie_device *pciedev; @@ -412,8 +429,21 @@ static int pcie_port_remove_service(struct device *dev) return 0; } +/** + * pcie_port_shutdown_service - shut down given PCI Express port service + * @dev: PCI Express port service device to handle + * + * If PCI Express port service driver is registered with + * pcie_port_service_register(), this function will be called by the driver core + * when device_shutdown() is called for the port service device associated + * with the driver. + */ static void pcie_port_shutdown_service(struct device *dev) {} +/** + * pcie_port_service_register - register PCI Express port service driver + * @new: PCI Express port service driver to register + */ int pcie_port_service_register(struct pcie_port_service_driver *new) { new->driver.name = (char *)new->name; @@ -425,6 +455,10 @@ int pcie_port_service_register(struct pcie_port_service_driver *new) return driver_register(&new->driver); } +/** + * pcie_port_service_unregister - unregister PCI Express port service driver + * @drv: PCI Express port service driver to unregister + */ void pcie_port_service_unregister(struct pcie_port_service_driver *drv) { driver_unregister(&drv->driver); -- cgit 1.4.1 From 987a4c783a8bbf3baf554e6b8ff588b26e06e020 Mon Sep 17 00:00:00 2001 From: Andrew Patterson Date: Mon, 5 Jan 2009 16:21:04 -0700 Subject: PCI: Use msleep instead of cpu_relax during ASPM link retraining The cpu_relax() function can be a noop on certain architectures like IA-64 when CPU threads are disabled, so use msleep instead during link retraining busy/wait loop. Introduce define LINK_RETRAIN_TIMEOUT instead of hard-coding timeout in pcie_aspm_configure_common_clock. Use time_after() to avoid jiffy wraparound when checking for expired timeout. After timeout expires, recheck link status register link training bit instead of checking for expired timeout to avoid possible false positive. Note that Matthew Wilcox came up with the first rough version of this patch. Reviewed-by: Matthew Wilcox Signed-off-by: Andrew Patterson Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aspm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 4d8e2c7b2ad1..586b6f75910d 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "../pci.h" @@ -75,6 +76,8 @@ static const char *policy_str[] = { [POLICY_POWERSAVE] = "powersave" }; +#define LINK_RETRAIN_TIMEOUT HZ + static int policy_to_aspm_state(struct pci_dev *pdev) { struct pcie_link_state *link_state = pdev->link_state; @@ -238,16 +241,18 @@ static void pcie_aspm_configure_common_clock(struct pci_dev *pdev) pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16); /* Wait for link training end */ - /* break out after waiting for 1 second */ + /* break out after waiting for timeout */ start_jiffies = jiffies; - while ((jiffies - start_jiffies) < HZ) { + for (;;) { pci_read_config_word(pdev, pos + PCI_EXP_LNKSTA, ®16); if (!(reg16 & PCI_EXP_LNKSTA_LT)) break; - cpu_relax(); + if (time_after(jiffies, start_jiffies + LINK_RETRAIN_TIMEOUT)) + break; + msleep(1); } /* training failed -> recover */ - if ((jiffies - start_jiffies) >= HZ) { + if (reg16 & PCI_EXP_LNKSTA_LT) { dev_printk (KERN_ERR, &pdev->dev, "ASPM: Could not configure" " common clock\n"); i = 0; -- cgit 1.4.1 From c9b9972b3c88272be02d971346285d1c67fbb95f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:02:36 +0100 Subject: PCI PM: Fix poweroff and restore callbacks pci_fixup_device() is called too early in pci_pm_poweroff() and too late in pci_pm_restore(). Moreover, pci_pm_restore_noirq() calls pci_fixup_device() twice and in a wrong way. Fix that. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index c3f76be832d4..23bdf64411e5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -611,8 +611,6 @@ static int pci_pm_poweroff(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_suspend, pci_dev); - if (drv && drv->pm) { if (drv->pm->poweroff) { error = drv->pm->poweroff(dev); @@ -622,6 +620,8 @@ static int pci_pm_poweroff(struct device *dev) error = pci_legacy_suspend(dev, PMSG_HIBERNATE); } + pci_fixup_device(pci_fixup_suspend, pci_dev); + return error; } @@ -648,6 +648,8 @@ static int pci_pm_restore(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + pci_fixup_device(pci_fixup_resume, pci_dev); + if (drv && drv->pm) { if (drv->pm->restore) error = drv->pm->restore(dev); @@ -656,7 +658,6 @@ static int pci_pm_restore(struct device *dev) } else { error = pci_default_pm_resume_late(pci_dev); } - pci_fixup_device(pci_fixup_resume, pci_dev); return error; } @@ -667,7 +668,7 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_resume, pci_dev); + pci_fixup_device(pci_fixup_resume_early, pci_dev); if (drv && drv->pm) { if (drv->pm->restore_noirq) @@ -677,7 +678,6 @@ static int pci_pm_restore_noirq(struct device *dev) } else { pci_default_pm_resume_early(pci_dev); } - pci_fixup_device(pci_fixup_resume_early, pci_dev); return error; } -- cgit 1.4.1 From fa58d305d9925b01830e535896a7227a868a9e15 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:03:42 +0100 Subject: PCI PM: Add suspend counterpart of pci_reenable_device PCI devices without drivers are not disabled during suspend and hibernation, but they are enabled during resume, with the help of pci_reenable_device(), so there is an unbalanced execution of pcibios_enable_device() in the resume code path. To correct this introduce function pci_disable_enabled_device() that will disable the argument device, if it is enabled when the function is being run, without updating the device's pci_dev structure and use it in the suspend code path to balance the pci_reenable_device() executed during resume. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 35 ++++++++++++++++++++++++++++++----- drivers/pci/pci.c | 36 ++++++++++++++++++++++++++++-------- drivers/pci/pci.h | 1 + 3 files changed, 59 insertions(+), 13 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 23bdf64411e5..57cb0015a470 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -324,9 +324,19 @@ static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) /* * Default "suspend" method for devices that have no driver provided suspend, - * or not even a driver at all. + * or not even a driver at all (first part). + */ +static void pci_default_pm_suspend_early(struct pci_dev *pci_dev) +{ + /* If device is enabled at this point, disable it */ + pci_disable_enabled_device(pci_dev); +} + +/* + * Default "suspend" method for devices that have no driver provided suspend, + * or not even a driver at all (second part). */ -static void pci_default_pm_suspend(struct pci_dev *pci_dev) +static void pci_default_pm_suspend_late(struct pci_dev *pci_dev) { pci_save_state(pci_dev); /* @@ -377,7 +387,11 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) i = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, i); } else { - pci_default_pm_suspend(pci_dev); + /* + * For compatibility with existing code with legacy PM support + * don't call pci_default_pm_suspend_early() here. + */ + pci_default_pm_suspend_late(pci_dev); } return i; } @@ -455,7 +469,10 @@ static int pci_pm_suspend(struct device *dev) } } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_SUSPEND); + } else { + pci_default_pm_suspend_early(pci_dev); } + pci_fixup_device(pci_fixup_suspend, pci_dev); return error; @@ -475,7 +492,7 @@ static int pci_pm_suspend_noirq(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend_late(dev, PMSG_SUSPEND); } else { - pci_default_pm_suspend(pci_dev); + pci_default_pm_suspend_late(pci_dev); } return error; @@ -546,6 +563,8 @@ static int pci_pm_freeze(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_FREEZE); pci_fixup_device(pci_fixup_suspend, pci_dev); + } else { + pci_default_pm_suspend_early(pci_dev); } return error; @@ -565,7 +584,7 @@ static int pci_pm_freeze_noirq(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend_late(dev, PMSG_FREEZE); } else { - pci_default_pm_suspend(pci_dev); + pci_default_pm_suspend_late(pci_dev); } return error; @@ -583,6 +602,8 @@ static int pci_pm_thaw(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { pci_fixup_device(pci_fixup_resume, pci_dev); error = pci_legacy_resume(dev); + } else { + pci_default_pm_resume_late(pci_dev); } return error; @@ -600,6 +621,8 @@ static int pci_pm_thaw_noirq(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); error = pci_legacy_resume_early(dev); + } else { + pci_default_pm_resume_early(pci_dev); } return error; @@ -618,6 +641,8 @@ static int pci_pm_poweroff(struct device *dev) } } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_HIBERNATE); + } else { + pci_default_pm_suspend_early(pci_dev); } pci_fixup_device(pci_fixup_suspend, pci_dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index f3fd55df67db..6e309c8b47df 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -970,6 +970,32 @@ void pcim_pin_device(struct pci_dev *pdev) */ void __attribute__ ((weak)) pcibios_disable_device (struct pci_dev *dev) {} +static void do_pci_disable_device(struct pci_dev *dev) +{ + u16 pci_command; + + pci_read_config_word(dev, PCI_COMMAND, &pci_command); + if (pci_command & PCI_COMMAND_MASTER) { + pci_command &= ~PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, pci_command); + } + + pcibios_disable_device(dev); +} + +/** + * pci_disable_enabled_device - Disable device without updating enable_cnt + * @dev: PCI device to disable + * + * NOTE: This function is a backend of PCI power management routines and is + * not supposed to be called drivers. + */ +void pci_disable_enabled_device(struct pci_dev *dev) +{ + if (atomic_read(&dev->enable_cnt)) + do_pci_disable_device(dev); +} + /** * pci_disable_device - Disable PCI device after use * @dev: PCI device to be disabled @@ -984,7 +1010,6 @@ void pci_disable_device(struct pci_dev *dev) { struct pci_devres *dr; - u16 pci_command; dr = find_pci_dr(dev); if (dr) @@ -993,14 +1018,9 @@ pci_disable_device(struct pci_dev *dev) if (atomic_sub_return(1, &dev->enable_cnt) != 0) return; - pci_read_config_word(dev, PCI_COMMAND, &pci_command); - if (pci_command & PCI_COMMAND_MASTER) { - pci_command &= ~PCI_COMMAND_MASTER; - pci_write_config_word(dev, PCI_COMMAND, pci_command); - } - dev->is_busmaster = 0; + do_pci_disable_device(dev); - pcibios_disable_device(dev); + dev->is_busmaster = 0; } /** diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 211fd418f48f..881dc15f8efd 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -44,6 +44,7 @@ struct pci_platform_pm_ops { }; extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); +extern void pci_disable_enabled_device(struct pci_dev *dev); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); -- cgit 1.4.1 From 571ff7584bb9e05fca0eb79752ae55a46faf3a98 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:05:05 +0100 Subject: PCI PM: Power-manage devices without drivers during suspend-resume PCI devices without drivers can be put into low power states during suspend with the help of pci_prepare_to_sleep() and prevented from generating wake-up events during resume with the help of pci_enable_wake(). However, it's better not to put bridges into low power states during suspend, because that might result in entire bus segments being powered off. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 57cb0015a470..2617ebb34e14 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -435,6 +435,31 @@ static int pci_legacy_resume_early(struct device *dev) return error; } +/* Auxiliary functions used by the new power management framework */ + +static bool pci_is_bridge(struct pci_dev *pci_dev) +{ + return !!(pci_dev->subordinate); +} + +static int pci_pm_default_resume(struct pci_dev *pci_dev) +{ + if (!pci_is_bridge(pci_dev)) + pci_enable_wake(pci_dev, PCI_D0, false); + + return pci_default_pm_resume_late(pci_dev); +} + +static void pci_pm_default_suspend(struct pci_dev *pci_dev) +{ + pci_default_pm_suspend_early(pci_dev); + + if (!pci_is_bridge(pci_dev)) + pci_prepare_to_sleep(pci_dev); +} + +/* New power management framework */ + static int pci_pm_prepare(struct device *dev) { struct device_driver *drv = dev->driver; @@ -470,7 +495,7 @@ static int pci_pm_suspend(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_SUSPEND); } else { - pci_default_pm_suspend_early(pci_dev); + pci_pm_default_suspend(pci_dev); } pci_fixup_device(pci_fixup_suspend, pci_dev); @@ -512,7 +537,7 @@ static int pci_pm_resume(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_resume(dev); } else { - error = pci_default_pm_resume_late(pci_dev); + error = pci_pm_default_resume(pci_dev); } return error; @@ -642,7 +667,7 @@ static int pci_pm_poweroff(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_suspend(dev, PMSG_HIBERNATE); } else { - pci_default_pm_suspend_early(pci_dev); + pci_pm_default_suspend(pci_dev); } pci_fixup_device(pci_fixup_suspend, pci_dev); @@ -681,7 +706,7 @@ static int pci_pm_restore(struct device *dev) } else if (pci_has_legacy_pm_support(pci_dev)) { error = pci_legacy_resume(dev); } else { - error = pci_default_pm_resume_late(pci_dev); + error = pci_pm_default_resume(pci_dev); } return error; -- cgit 1.4.1 From 07e836e8d1f3688311d97fe1bf46980b0f9ae9c1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:06:10 +0100 Subject: PCI PM: Move pci_has_legacy_pm_support Move pci_has_legacy_pm_support() closer to the functions that call it. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 2617ebb34e14..bfaa77d88537 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -314,14 +314,6 @@ static void pci_device_shutdown(struct device *dev) #ifdef CONFIG_PM_SLEEP -static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) -{ - struct pci_driver *drv = pci_dev->driver; - - return drv && (drv->suspend || drv->suspend_late || drv->resume - || drv->resume_early); -} - /* * Default "suspend" method for devices that have no driver provided suspend, * or not even a driver at all (first part). @@ -458,6 +450,14 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev) pci_prepare_to_sleep(pci_dev); } +static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) +{ + struct pci_driver *drv = pci_dev->driver; + + return drv && (drv->suspend || drv->suspend_late || drv->resume + || drv->resume_early); +} + /* New power management framework */ static int pci_pm_prepare(struct device *dev) -- cgit 1.4.1 From 734104292ff77dc71fe626b4ebd91b314547ca1b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:07:15 +0100 Subject: PCI PM: Avoid touching devices behind bridges in unknown state It generally is better to avoid accessing devices behind bridges that may not be in the D0 power state, because in that case the bridges' secondary buses may not be accessible. For this reason, during the early phase of resume (ie. with interrupts disabled), before restoring the standard config registers of a device, check the power state of the bridge the device is behind and postpone the restoration of the device's config space, as well as any other operations that would involve accessing the device, if that state is not D0. In such cases the restoration of the device's config space will be retried during the "normal" phase of resume (ie. with interrupts enabled), so that the bridge can be put into D0 before that happens. Also, save standard configuration registers of PCI devices during the "normal" phase of suspend (ie. with interrupts enabled), so that the bridges the devices are behind can be put into low power states (we don't put bridges into low power states at the moment, but we may want to do it in the future and it seems reasonable to design for that). Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 109 +++++++++++++++++++++++++++++++---------------- drivers/pci/pci.c | 2 +- drivers/pci/pci.h | 1 + 3 files changed, 74 insertions(+), 38 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index bfaa77d88537..750ee79c178f 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -314,23 +314,12 @@ static void pci_device_shutdown(struct device *dev) #ifdef CONFIG_PM_SLEEP -/* - * Default "suspend" method for devices that have no driver provided suspend, - * or not even a driver at all (first part). - */ -static void pci_default_pm_suspend_early(struct pci_dev *pci_dev) -{ - /* If device is enabled at this point, disable it */ - pci_disable_enabled_device(pci_dev); -} - /* * Default "suspend" method for devices that have no driver provided suspend, * or not even a driver at all (second part). */ static void pci_default_pm_suspend_late(struct pci_dev *pci_dev) { - pci_save_state(pci_dev); /* * mark its power state as "unknown", since we don't know if * e.g. the BIOS will change its device state when we suspend. @@ -339,16 +328,6 @@ static void pci_default_pm_suspend_late(struct pci_dev *pci_dev) pci_dev->current_state = PCI_UNKNOWN; } -/* - * Default "resume" method for devices that have no driver provided resume, - * or not even a driver at all (first part). - */ -static void pci_default_pm_resume_early(struct pci_dev *pci_dev) -{ - /* restore the PCI config space */ - pci_restore_state(pci_dev); -} - /* * Default "resume" method for devices that have no driver provided resume, * or not even a driver at all (second part). @@ -379,9 +358,10 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) i = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, i); } else { + pci_save_state(pci_dev); /* - * For compatibility with existing code with legacy PM support - * don't call pci_default_pm_suspend_early() here. + * This is for compatibility with existing code with legacy PM + * support. */ pci_default_pm_suspend_late(pci_dev); } @@ -410,7 +390,8 @@ static int pci_legacy_resume(struct device *dev) if (drv && drv->resume) { error = drv->resume(pci_dev); } else { - pci_default_pm_resume_early(pci_dev); + /* restore the PCI config space */ + pci_restore_state(pci_dev); error = pci_default_pm_resume_late(pci_dev); } return error; @@ -429,22 +410,72 @@ static int pci_legacy_resume_early(struct device *dev) /* Auxiliary functions used by the new power management framework */ +static int pci_restore_standard_config(struct pci_dev *pci_dev) +{ + struct pci_dev *parent = pci_dev->bus->self; + int error = 0; + + /* Check if the device's bus is operational */ + if (!parent || parent->current_state == PCI_D0) { + pci_restore_state(pci_dev); + pci_update_current_state(pci_dev, PCI_D0); + } else { + dev_warn(&pci_dev->dev, "unable to restore config, " + "bridge %s in low power state D%d\n", pci_name(parent), + parent->current_state); + pci_dev->current_state = PCI_UNKNOWN; + error = -EAGAIN; + } + + return error; +} + static bool pci_is_bridge(struct pci_dev *pci_dev) { return !!(pci_dev->subordinate); } +static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev) +{ + if (pci_restore_standard_config(pci_dev)) + pci_fixup_device(pci_fixup_resume_early, pci_dev); +} + static int pci_pm_default_resume(struct pci_dev *pci_dev) { + /* + * pci_restore_standard_config() should have been called once already, + * but it would have failed if the device's parent bridge had not been + * in power state D0 at that time. Check it and try again if necessary. + */ + if (pci_dev->current_state == PCI_UNKNOWN) { + int error = pci_restore_standard_config(pci_dev); + if (error) + return error; + } + + pci_fixup_device(pci_fixup_resume, pci_dev); + if (!pci_is_bridge(pci_dev)) pci_enable_wake(pci_dev, PCI_D0, false); return pci_default_pm_resume_late(pci_dev); } +static void pci_pm_default_suspend_generic(struct pci_dev *pci_dev) +{ + /* If device is enabled at this point, disable it */ + pci_disable_enabled_device(pci_dev); + /* + * Save state with interrupts enabled, because in principle the bus the + * device is on may be put into a low power state after this code runs. + */ + pci_save_state(pci_dev); +} + static void pci_pm_default_suspend(struct pci_dev *pci_dev) { - pci_default_pm_suspend_early(pci_dev); + pci_pm_default_suspend_generic(pci_dev); if (!pci_is_bridge(pci_dev)) pci_prepare_to_sleep(pci_dev); @@ -529,12 +560,13 @@ static int pci_pm_resume(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_resume, pci_dev); - if (drv && drv->pm) { + pci_fixup_device(pci_fixup_resume, pci_dev); + if (drv->pm->resume) error = drv->pm->resume(dev); } else if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume, pci_dev); error = pci_legacy_resume(dev); } else { error = pci_pm_default_resume(pci_dev); @@ -549,15 +581,16 @@ static int pci_pm_resume_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); - if (drv && drv->pm) { + pci_fixup_device(pci_fixup_resume_early, pci_dev); + if (drv->pm->resume_noirq) error = drv->pm->resume_noirq(dev); } else if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume_early, pci_dev); error = pci_legacy_resume_early(dev); } else { - pci_default_pm_resume_early(pci_dev); + pci_pm_default_resume_noirq(pci_dev); } return error; @@ -589,7 +622,7 @@ static int pci_pm_freeze(struct device *dev) error = pci_legacy_suspend(dev, PMSG_FREEZE); pci_fixup_device(pci_fixup_suspend, pci_dev); } else { - pci_default_pm_suspend_early(pci_dev); + pci_pm_default_suspend_generic(pci_dev); } return error; @@ -647,7 +680,7 @@ static int pci_pm_thaw_noirq(struct device *dev) pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); error = pci_legacy_resume_early(dev); } else { - pci_default_pm_resume_early(pci_dev); + pci_update_current_state(pci_dev, PCI_D0); } return error; @@ -698,12 +731,13 @@ static int pci_pm_restore(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_resume, pci_dev); - if (drv && drv->pm) { + pci_fixup_device(pci_fixup_resume, pci_dev); + if (drv->pm->restore) error = drv->pm->restore(dev); } else if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume, pci_dev); error = pci_legacy_resume(dev); } else { error = pci_pm_default_resume(pci_dev); @@ -718,15 +752,16 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - pci_fixup_device(pci_fixup_resume_early, pci_dev); - if (drv && drv->pm) { + pci_fixup_device(pci_fixup_resume_early, pci_dev); + if (drv->pm->restore_noirq) error = drv->pm->restore_noirq(dev); } else if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume_early, pci_dev); error = pci_legacy_resume_early(dev); } else { - pci_default_pm_resume_early(pci_dev); + pci_pm_default_resume_noirq(pci_dev); } return error; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6e309c8b47df..e491fdedf705 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -527,7 +527,7 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) * @dev: PCI device to handle. * @state: State to cache in case the device doesn't have the PM capability */ -static void pci_update_current_state(struct pci_dev *dev, pci_power_t state) +void pci_update_current_state(struct pci_dev *dev, pci_power_t state) { if (dev->pm_cap) { u16 pmcsr; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 881dc15f8efd..1351bb4addde 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -44,6 +44,7 @@ struct pci_platform_pm_ops { }; extern int pci_set_platform_pm(struct pci_platform_pm_ops *ops); +extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state); extern void pci_disable_enabled_device(struct pci_dev *dev); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); -- cgit 1.4.1 From bb8089454391ac5577215aec1f1991adcd4b4cbf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 14:15:17 +0100 Subject: PCI PM: Rearrange code in pci-driver.c Rename two functions and rearrange code in drivers/pci/pci-driver.c so that it's easier to follow. In particular, separate invocations of the legacy callbacks from the rest of the new callbacks' code. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 116 ++++++++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 41 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 750ee79c178f..2e76945a1cd8 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -318,7 +318,7 @@ static void pci_device_shutdown(struct device *dev) * Default "suspend" method for devices that have no driver provided suspend, * or not even a driver at all (second part). */ -static void pci_default_pm_suspend_late(struct pci_dev *pci_dev) +static void pci_pm_set_unknown_state(struct pci_dev *pci_dev) { /* * mark its power state as "unknown", since we don't know if @@ -332,7 +332,7 @@ static void pci_default_pm_suspend_late(struct pci_dev *pci_dev) * Default "resume" method for devices that have no driver provided resume, * or not even a driver at all (second part). */ -static int pci_default_pm_resume_late(struct pci_dev *pci_dev) +static int pci_pm_reenable_device(struct pci_dev *pci_dev) { int retval; @@ -363,7 +363,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) * This is for compatibility with existing code with legacy PM * support. */ - pci_default_pm_suspend_late(pci_dev); + pci_pm_set_unknown_state(pci_dev); } return i; } @@ -392,7 +392,7 @@ static int pci_legacy_resume(struct device *dev) } else { /* restore the PCI config space */ pci_restore_state(pci_dev); - error = pci_default_pm_resume_late(pci_dev); + error = pci_pm_reenable_device(pci_dev); } return error; } @@ -459,7 +459,7 @@ static int pci_pm_default_resume(struct pci_dev *pci_dev) if (!pci_is_bridge(pci_dev)) pci_enable_wake(pci_dev, PCI_D0, false); - return pci_default_pm_resume_late(pci_dev); + return pci_pm_reenable_device(pci_dev); } static void pci_pm_default_suspend_generic(struct pci_dev *pci_dev) @@ -484,9 +484,17 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev) static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) { struct pci_driver *drv = pci_dev->driver; - - return drv && (drv->suspend || drv->suspend_late || drv->resume + bool ret = drv && (drv->suspend || drv->suspend_late || drv->resume || drv->resume_early); + + /* + * Legacy PM support is used by default, so warn if the new framework is + * supported as well. Drivers are supposed to support either the + * former, or the latter, but not both at the same time. + */ + WARN_ON(ret && drv->driver.pm); + + return ret; } /* New power management framework */ @@ -518,17 +526,21 @@ static int pci_pm_suspend(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + error = pci_legacy_suspend(dev, PMSG_SUSPEND); + goto Exit; + } + if (drv && drv->pm) { if (drv->pm->suspend) { error = drv->pm->suspend(dev); suspend_report_result(drv->pm->suspend, error); } - } else if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend(dev, PMSG_SUSPEND); } else { pci_pm_default_suspend(pci_dev); } + Exit: pci_fixup_device(pci_fixup_suspend, pci_dev); return error; @@ -540,15 +552,16 @@ static int pci_pm_suspend_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) + return pci_legacy_suspend_late(dev, PMSG_SUSPEND); + if (drv && drv->pm) { if (drv->pm->suspend_noirq) { error = drv->pm->suspend_noirq(dev); suspend_report_result(drv->pm->suspend_noirq, error); } - } else if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend_late(dev, PMSG_SUSPEND); } else { - pci_default_pm_suspend_late(pci_dev); + pci_pm_set_unknown_state(pci_dev); } return error; @@ -560,14 +573,16 @@ static int pci_pm_resume(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume, pci_dev); + return pci_legacy_resume(dev); + } + if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume, pci_dev); if (drv->pm->resume) error = drv->pm->resume(dev); - } else if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume, pci_dev); - error = pci_legacy_resume(dev); } else { error = pci_pm_default_resume(pci_dev); } @@ -581,14 +596,16 @@ static int pci_pm_resume_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume_early, pci_dev); + return pci_legacy_resume_early(dev); + } + if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume_early, pci_dev); if (drv->pm->resume_noirq) error = drv->pm->resume_noirq(dev); - } else if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume_early, pci_dev); - error = pci_legacy_resume_early(dev); } else { pci_pm_default_resume_noirq(pci_dev); } @@ -613,14 +630,17 @@ static int pci_pm_freeze(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + error = pci_legacy_suspend(dev, PMSG_FREEZE); + pci_fixup_device(pci_fixup_suspend, pci_dev); + return error; + } + if (drv && drv->pm) { if (drv->pm->freeze) { error = drv->pm->freeze(dev); suspend_report_result(drv->pm->freeze, error); } - } else if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend(dev, PMSG_FREEZE); - pci_fixup_device(pci_fixup_suspend, pci_dev); } else { pci_pm_default_suspend_generic(pci_dev); } @@ -634,15 +654,16 @@ static int pci_pm_freeze_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) + return pci_legacy_suspend_late(dev, PMSG_FREEZE); + if (drv && drv->pm) { if (drv->pm->freeze_noirq) { error = drv->pm->freeze_noirq(dev); suspend_report_result(drv->pm->freeze_noirq, error); } - } else if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend_late(dev, PMSG_FREEZE); } else { - pci_default_pm_suspend_late(pci_dev); + pci_pm_set_unknown_state(pci_dev); } return error; @@ -654,14 +675,16 @@ static int pci_pm_thaw(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume, pci_dev); + return pci_legacy_resume(dev); + } + if (drv && drv->pm) { if (drv->pm->thaw) error = drv->pm->thaw(dev); - } else if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume, pci_dev); - error = pci_legacy_resume(dev); } else { - pci_default_pm_resume_late(pci_dev); + pci_pm_reenable_device(pci_dev); } return error; @@ -673,12 +696,14 @@ static int pci_pm_thaw_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); + return pci_legacy_resume_early(dev); + } + if (drv && drv->pm) { if (drv->pm->thaw_noirq) error = drv->pm->thaw_noirq(dev); - } else if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); - error = pci_legacy_resume_early(dev); } else { pci_update_current_state(pci_dev, PCI_D0); } @@ -692,17 +717,21 @@ static int pci_pm_poweroff(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + error = pci_legacy_suspend(dev, PMSG_HIBERNATE); + goto Exit; + } + if (drv && drv->pm) { if (drv->pm->poweroff) { error = drv->pm->poweroff(dev); suspend_report_result(drv->pm->poweroff, error); } - } else if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend(dev, PMSG_HIBERNATE); } else { pci_pm_default_suspend(pci_dev); } + Exit: pci_fixup_device(pci_fixup_suspend, pci_dev); return error; @@ -713,13 +742,14 @@ static int pci_pm_poweroff_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(to_pci_dev(dev))) + return pci_legacy_suspend_late(dev, PMSG_HIBERNATE); + if (drv && drv->pm) { if (drv->pm->poweroff_noirq) { error = drv->pm->poweroff_noirq(dev); suspend_report_result(drv->pm->poweroff_noirq, error); } - } else if (pci_has_legacy_pm_support(to_pci_dev(dev))) { - error = pci_legacy_suspend_late(dev, PMSG_HIBERNATE); } return error; @@ -731,14 +761,16 @@ static int pci_pm_restore(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume, pci_dev); + return pci_legacy_resume(dev); + } + if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume, pci_dev); if (drv->pm->restore) error = drv->pm->restore(dev); - } else if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume, pci_dev); - error = pci_legacy_resume(dev); } else { error = pci_pm_default_resume(pci_dev); } @@ -752,14 +784,16 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + if (pci_has_legacy_pm_support(pci_dev)) { + pci_fixup_device(pci_fixup_resume_early, pci_dev); + return pci_legacy_resume_early(dev); + } + if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume_early, pci_dev); if (drv->pm->restore_noirq) error = drv->pm->restore_noirq(dev); - } else if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume_early, pci_dev); - error = pci_legacy_resume_early(dev); } else { pci_pm_default_resume_noirq(pci_dev); } -- cgit 1.4.1 From ad8cfa1defee14a5181d9b63e666318c51cfaeed Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:09:37 +0100 Subject: PCI PM: Call pci_fixup_device from legacy routines The size of drivers/pci/pci-driver.c can be reduced quite a bit if pci_fixup_device() is called from the legacy PM callbacks, so make it happen. No functional changes should result from this. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 52 ++++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 33 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 2e76945a1cd8..02bf4d4125e4 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -365,6 +365,9 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) */ pci_pm_set_unknown_state(pci_dev); } + + pci_fixup_device(pci_fixup_suspend, pci_dev); + return i; } @@ -387,6 +390,8 @@ static int pci_legacy_resume(struct device *dev) struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; + pci_fixup_device(pci_fixup_resume, pci_dev); + if (drv && drv->resume) { error = drv->resume(pci_dev); } else { @@ -403,6 +408,8 @@ static int pci_legacy_resume_early(struct device *dev) struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; + pci_fixup_device(pci_fixup_resume_early, pci_dev); + if (drv && drv->resume_early) error = drv->resume_early(pci_dev); return error; @@ -526,10 +533,8 @@ static int pci_pm_suspend(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend(dev, PMSG_SUSPEND); - goto Exit; - } + if (pci_has_legacy_pm_support(pci_dev)) + return pci_legacy_suspend(dev, PMSG_SUSPEND); if (drv && drv->pm) { if (drv->pm->suspend) { @@ -540,7 +545,6 @@ static int pci_pm_suspend(struct device *dev) pci_pm_default_suspend(pci_dev); } - Exit: pci_fixup_device(pci_fixup_suspend, pci_dev); return error; @@ -573,10 +577,8 @@ static int pci_pm_resume(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume, pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - } if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume, pci_dev); @@ -596,10 +598,8 @@ static int pci_pm_resume_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume_early, pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - } if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume_early, pci_dev); @@ -630,11 +630,8 @@ static int pci_pm_freeze(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend(dev, PMSG_FREEZE); - pci_fixup_device(pci_fixup_suspend, pci_dev); - return error; - } + if (pci_has_legacy_pm_support(pci_dev)) + return pci_legacy_suspend(dev, PMSG_FREEZE); if (drv && drv->pm) { if (drv->pm->freeze) { @@ -675,10 +672,8 @@ static int pci_pm_thaw(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume, pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - } if (drv && drv->pm) { if (drv->pm->thaw) @@ -696,10 +691,8 @@ static int pci_pm_thaw_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume_early, to_pci_dev(dev)); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - } if (drv && drv->pm) { if (drv->pm->thaw_noirq) @@ -717,10 +710,8 @@ static int pci_pm_poweroff(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - error = pci_legacy_suspend(dev, PMSG_HIBERNATE); - goto Exit; - } + if (pci_has_legacy_pm_support(pci_dev)) + return pci_legacy_suspend(dev, PMSG_HIBERNATE); if (drv && drv->pm) { if (drv->pm->poweroff) { @@ -731,7 +722,6 @@ static int pci_pm_poweroff(struct device *dev) pci_pm_default_suspend(pci_dev); } - Exit: pci_fixup_device(pci_fixup_suspend, pci_dev); return error; @@ -761,10 +751,8 @@ static int pci_pm_restore(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume, pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - } if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume, pci_dev); @@ -784,10 +772,8 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; - if (pci_has_legacy_pm_support(pci_dev)) { - pci_fixup_device(pci_fixup_resume_early, pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - } if (drv && drv->pm) { pci_fixup_device(pci_fixup_resume_early, pci_dev); -- cgit 1.4.1 From 98e6e286d7b01deb7453b717aa38ebb69d6cefc0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:10:35 +0100 Subject: PCI PM: Register power state of devices during initialization Use the observation that the power state of a PCI device can be loaded into its pci_dev structure as soon as pci_pm_init() is run for it and make that happen. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e491fdedf705..c12f6c790698 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1260,14 +1260,15 @@ void pci_pm_init(struct pci_dev *dev) /* find PCI PM capability in list */ pm = pci_find_capability(dev, PCI_CAP_ID_PM); if (!pm) - return; + goto Exit; + /* Check device's ability to generate PME# */ pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); - return; + goto Exit; } dev->pm_cap = pm; @@ -1306,6 +1307,9 @@ void pci_pm_init(struct pci_dev *dev) } else { dev->pme_support = 0; } + + Exit: + pci_update_current_state(dev, PCI_D0); } /** -- cgit 1.4.1 From d67e37d7933ba3b28a63ff38c957e433aaca5dc4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:11:28 +0100 Subject: PCI PM: Run default PM callbacks for all devices using new framework It should be quite clear that it generally makes sense to execute the default PM callbacks (ie. the callbacks used for handling suspend, hibernation and resume of PCI devices without drivers) for all devices. Of course, the drivers that provide legacy PCI PM support (ie. the ->suspend, ->suspend_late, ->resume_early or ->resume hooks in the pci_driver structure), carry out these operations too, so we can't do it for devices with such drivers. Still, we can make the default PM callbacks run for devices with drivers using the new framework (ie. implement the pm object), since there are no such drivers at the moment. This also simplifies the code and makes it smaller. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 135 +++++++++++++++++++---------------------------- 1 file changed, 53 insertions(+), 82 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 02bf4d4125e4..b7e67c2f40b9 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -486,6 +486,8 @@ static void pci_pm_default_suspend(struct pci_dev *pci_dev) if (!pci_is_bridge(pci_dev)) pci_prepare_to_sleep(pci_dev); + + pci_fixup_device(pci_fixup_suspend, pci_dev); } static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) @@ -536,16 +538,13 @@ static int pci_pm_suspend(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND); - if (drv && drv->pm) { - if (drv->pm->suspend) { - error = drv->pm->suspend(dev); - suspend_report_result(drv->pm->suspend, error); - } - } else { - pci_pm_default_suspend(pci_dev); + if (drv && drv->pm && drv->pm->suspend) { + error = drv->pm->suspend(dev); + suspend_report_result(drv->pm->suspend, error); } - pci_fixup_device(pci_fixup_suspend, pci_dev); + if (!error) + pci_pm_default_suspend(pci_dev); return error; } @@ -559,15 +558,14 @@ static int pci_pm_suspend_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_SUSPEND); - if (drv && drv->pm) { - if (drv->pm->suspend_noirq) { - error = drv->pm->suspend_noirq(dev); - suspend_report_result(drv->pm->suspend_noirq, error); - } - } else { - pci_pm_set_unknown_state(pci_dev); + if (drv && drv->pm && drv->pm->suspend_noirq) { + error = drv->pm->suspend_noirq(dev); + suspend_report_result(drv->pm->suspend_noirq, error); } + if (!error) + pci_pm_set_unknown_state(pci_dev); + return error; } @@ -580,14 +578,10 @@ static int pci_pm_resume(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - if (drv && drv->pm) { - pci_fixup_device(pci_fixup_resume, pci_dev); + error = pci_pm_default_resume(pci_dev); - if (drv->pm->resume) - error = drv->pm->resume(dev); - } else { - error = pci_pm_default_resume(pci_dev); - } + if (!error && drv && drv->pm && drv->pm->resume) + error = drv->pm->resume(dev); return error; } @@ -601,14 +595,10 @@ static int pci_pm_resume_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - if (drv && drv->pm) { - pci_fixup_device(pci_fixup_resume_early, pci_dev); + pci_pm_default_resume_noirq(pci_dev); - if (drv->pm->resume_noirq) - error = drv->pm->resume_noirq(dev); - } else { - pci_pm_default_resume_noirq(pci_dev); - } + if (drv && drv->pm && drv->pm->resume_noirq) + error = drv->pm->resume_noirq(dev); return error; } @@ -633,15 +623,14 @@ static int pci_pm_freeze(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); - if (drv && drv->pm) { - if (drv->pm->freeze) { - error = drv->pm->freeze(dev); - suspend_report_result(drv->pm->freeze, error); - } - } else { - pci_pm_default_suspend_generic(pci_dev); + if (drv && drv->pm && drv->pm->freeze) { + error = drv->pm->freeze(dev); + suspend_report_result(drv->pm->freeze, error); } + if (!error) + pci_pm_default_suspend_generic(pci_dev); + return error; } @@ -654,15 +643,14 @@ static int pci_pm_freeze_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend_late(dev, PMSG_FREEZE); - if (drv && drv->pm) { - if (drv->pm->freeze_noirq) { - error = drv->pm->freeze_noirq(dev); - suspend_report_result(drv->pm->freeze_noirq, error); - } - } else { - pci_pm_set_unknown_state(pci_dev); + if (drv && drv->pm && drv->pm->freeze_noirq) { + error = drv->pm->freeze_noirq(dev); + suspend_report_result(drv->pm->freeze_noirq, error); } + if (!error) + pci_pm_set_unknown_state(pci_dev); + return error; } @@ -675,12 +663,10 @@ static int pci_pm_thaw(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - if (drv && drv->pm) { - if (drv->pm->thaw) - error = drv->pm->thaw(dev); - } else { - pci_pm_reenable_device(pci_dev); - } + pci_pm_reenable_device(pci_dev); + + if (drv && drv->pm && drv->pm->thaw) + error = drv->pm->thaw(dev); return error; } @@ -694,12 +680,10 @@ static int pci_pm_thaw_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - if (drv && drv->pm) { - if (drv->pm->thaw_noirq) - error = drv->pm->thaw_noirq(dev); - } else { - pci_update_current_state(pci_dev, PCI_D0); - } + pci_update_current_state(pci_dev, PCI_D0); + + if (drv && drv->pm && drv->pm->thaw_noirq) + error = drv->pm->thaw_noirq(dev); return error; } @@ -713,16 +697,13 @@ static int pci_pm_poweroff(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); - if (drv && drv->pm) { - if (drv->pm->poweroff) { - error = drv->pm->poweroff(dev); - suspend_report_result(drv->pm->poweroff, error); - } - } else { - pci_pm_default_suspend(pci_dev); + if (drv && drv->pm && drv->pm->poweroff) { + error = drv->pm->poweroff(dev); + suspend_report_result(drv->pm->poweroff, error); } - pci_fixup_device(pci_fixup_suspend, pci_dev); + if (!error) + pci_pm_default_suspend(pci_dev); return error; } @@ -735,11 +716,9 @@ static int pci_pm_poweroff_noirq(struct device *dev) if (pci_has_legacy_pm_support(to_pci_dev(dev))) return pci_legacy_suspend_late(dev, PMSG_HIBERNATE); - if (drv && drv->pm) { - if (drv->pm->poweroff_noirq) { - error = drv->pm->poweroff_noirq(dev); - suspend_report_result(drv->pm->poweroff_noirq, error); - } + if (drv && drv->pm && drv->pm->poweroff_noirq) { + error = drv->pm->poweroff_noirq(dev); + suspend_report_result(drv->pm->poweroff_noirq, error); } return error; @@ -754,14 +733,10 @@ static int pci_pm_restore(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - if (drv && drv->pm) { - pci_fixup_device(pci_fixup_resume, pci_dev); + error = pci_pm_default_resume(pci_dev); - if (drv->pm->restore) - error = drv->pm->restore(dev); - } else { - error = pci_pm_default_resume(pci_dev); - } + if (!error && drv && drv->pm && drv->pm->restore) + error = drv->pm->restore(dev); return error; } @@ -775,14 +750,10 @@ static int pci_pm_restore_noirq(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - if (drv && drv->pm) { - pci_fixup_device(pci_fixup_resume_early, pci_dev); + pci_pm_default_resume_noirq(pci_dev); - if (drv->pm->restore_noirq) - error = drv->pm->restore_noirq(dev); - } else { - pci_pm_default_resume_noirq(pci_dev); - } + if (drv && drv->pm && drv->pm->restore_noirq) + error = drv->pm->restore_noirq(dev); return error; } -- cgit 1.4.1 From f6dc1e5e3d4b523e1616b43beddb04e4fb1d376a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jan 2009 13:12:22 +0100 Subject: PCI PM: Put PM callbacks in the order of execution Put PM callbacks in drivers/pci/pci-driver.c in the order in which they are executed which makes it much easier to follow the code. No functional changes should result from this. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 86 ++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 43 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index b7e67c2f40b9..c697f2680856 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -384,6 +384,19 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) return i; } +static int pci_legacy_resume_early(struct device *dev) +{ + int error = 0; + struct pci_dev * pci_dev = to_pci_dev(dev); + struct pci_driver * drv = pci_dev->driver; + + pci_fixup_device(pci_fixup_resume_early, pci_dev); + + if (drv && drv->resume_early) + error = drv->resume_early(pci_dev); + return error; +} + static int pci_legacy_resume(struct device *dev) { int error; @@ -402,19 +415,6 @@ static int pci_legacy_resume(struct device *dev) return error; } -static int pci_legacy_resume_early(struct device *dev) -{ - int error = 0; - struct pci_dev * pci_dev = to_pci_dev(dev); - struct pci_driver * drv = pci_dev->driver; - - pci_fixup_device(pci_fixup_resume_early, pci_dev); - - if (drv && drv->resume_early) - error = drv->resume_early(pci_dev); - return error; -} - /* Auxiliary functions used by the new power management framework */ static int pci_restore_standard_config(struct pci_dev *pci_dev) @@ -569,36 +569,36 @@ static int pci_pm_suspend_noirq(struct device *dev) return error; } -static int pci_pm_resume(struct device *dev) +static int pci_pm_resume_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume(dev); + return pci_legacy_resume_early(dev); - error = pci_pm_default_resume(pci_dev); + pci_pm_default_resume_noirq(pci_dev); - if (!error && drv && drv->pm && drv->pm->resume) - error = drv->pm->resume(dev); + if (drv && drv->pm && drv->pm->resume_noirq) + error = drv->pm->resume_noirq(dev); return error; } -static int pci_pm_resume_noirq(struct device *dev) +static int pci_pm_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume_early(dev); + return pci_legacy_resume(dev); - pci_pm_default_resume_noirq(pci_dev); + error = pci_pm_default_resume(pci_dev); - if (drv && drv->pm && drv->pm->resume_noirq) - error = drv->pm->resume_noirq(dev); + if (!error && drv && drv->pm && drv->pm->resume) + error = drv->pm->resume(dev); return error; } @@ -654,36 +654,36 @@ static int pci_pm_freeze_noirq(struct device *dev) return error; } -static int pci_pm_thaw(struct device *dev) +static int pci_pm_thaw_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume(dev); + return pci_legacy_resume_early(dev); - pci_pm_reenable_device(pci_dev); + pci_update_current_state(pci_dev, PCI_D0); - if (drv && drv->pm && drv->pm->thaw) - error = drv->pm->thaw(dev); + if (drv && drv->pm && drv->pm->thaw_noirq) + error = drv->pm->thaw_noirq(dev); return error; } -static int pci_pm_thaw_noirq(struct device *dev) +static int pci_pm_thaw(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume_early(dev); + return pci_legacy_resume(dev); - pci_update_current_state(pci_dev, PCI_D0); + pci_pm_reenable_device(pci_dev); - if (drv && drv->pm && drv->pm->thaw_noirq) - error = drv->pm->thaw_noirq(dev); + if (drv && drv->pm && drv->pm->thaw) + error = drv->pm->thaw(dev); return error; } @@ -724,36 +724,36 @@ static int pci_pm_poweroff_noirq(struct device *dev) return error; } -static int pci_pm_restore(struct device *dev) +static int pci_pm_restore_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume(dev); + return pci_legacy_resume_early(dev); - error = pci_pm_default_resume(pci_dev); + pci_pm_default_resume_noirq(pci_dev); - if (!error && drv && drv->pm && drv->pm->restore) - error = drv->pm->restore(dev); + if (drv && drv->pm && drv->pm->restore_noirq) + error = drv->pm->restore_noirq(dev); return error; } -static int pci_pm_restore_noirq(struct device *dev) +static int pci_pm_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct device_driver *drv = dev->driver; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume_early(dev); + return pci_legacy_resume(dev); - pci_pm_default_resume_noirq(pci_dev); + error = pci_pm_default_resume(pci_dev); - if (drv && drv->pm && drv->pm->restore_noirq) - error = drv->pm->restore_noirq(dev); + if (!error && drv && drv->pm && drv->pm->restore) + error = drv->pm->restore(dev); return error; } -- cgit 1.4.1