summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--arch/sh/Kconfig29
-rw-r--r--arch/sh/Makefile4
-rw-r--r--arch/sh/boards/Makefile1
-rw-r--r--arch/sh/boards/mach-ap325rxa/Makefile2
-rw-r--r--arch/sh/boards/mach-ap325rxa/sdram.S69
-rw-r--r--arch/sh/boards/mach-ap325rxa/setup.c (renamed from arch/sh/boards/board-ap325rxa.c)97
-rw-r--r--arch/sh/boards/mach-ecovec24/Makefile2
-rw-r--r--arch/sh/boards/mach-ecovec24/sdram.S52
-rw-r--r--arch/sh/boards/mach-ecovec24/setup.c197
-rw-r--r--arch/sh/boards/mach-highlander/setup.c2
-rw-r--r--arch/sh/boards/mach-kfr2r09/Makefile2
-rw-r--r--arch/sh/boards/mach-kfr2r09/sdram.S80
-rw-r--r--arch/sh/boards/mach-kfr2r09/setup.c238
-rw-r--r--arch/sh/boards/mach-migor/Makefile2
-rw-r--r--arch/sh/boards/mach-migor/sdram.S69
-rw-r--r--arch/sh/boards/mach-migor/setup.c65
-rw-r--r--arch/sh/boards/mach-r2d/irq.c2
-rw-r--r--arch/sh/boards/mach-se/7722/irq.c32
-rw-r--r--arch/sh/boards/mach-se/7722/setup.c17
-rw-r--r--arch/sh/boards/mach-se/7724/Makefile2
-rw-r--r--arch/sh/boards/mach-se/7724/sdram.S52
-rw-r--r--arch/sh/boards/mach-se/7724/setup.c96
-rw-r--r--arch/sh/boot/compressed/misc.c2
-rw-r--r--arch/sh/boot/romimage/Makefile12
-rw-r--r--arch/sh/boot/romimage/head.S38
-rw-r--r--arch/sh/drivers/dma/dma-sysfs.c2
-rw-r--r--arch/sh/drivers/pci/Kconfig19
-rw-r--r--arch/sh/include/asm/addrspace.h9
-rw-r--r--arch/sh/include/asm/atomic.h9
-rw-r--r--arch/sh/include/asm/bitops.h4
-rw-r--r--arch/sh/include/asm/bugs.h4
-rw-r--r--arch/sh/include/asm/dma-mapping.h233
-rw-r--r--arch/sh/include/asm/dwarf.h28
-rw-r--r--arch/sh/include/asm/fixmap.h12
-rw-r--r--arch/sh/include/asm/fpu.h26
-rw-r--r--arch/sh/include/asm/ftrace.h17
-rw-r--r--arch/sh/include/asm/gpio.h82
-rw-r--r--arch/sh/include/asm/hardirq.h13
-rw-r--r--arch/sh/include/asm/io.h16
-rw-r--r--arch/sh/include/asm/irqflags.h31
-rw-r--r--arch/sh/include/asm/irqflags_32.h99
-rw-r--r--arch/sh/include/asm/irqflags_64.h85
-rw-r--r--arch/sh/include/asm/mmu.h13
-rw-r--r--arch/sh/include/asm/pci.h30
-rw-r--r--arch/sh/include/asm/perf_event.h31
-rw-r--r--arch/sh/include/asm/pgtable.h26
-rw-r--r--arch/sh/include/asm/pgtable_32.h2
-rw-r--r--arch/sh/include/asm/processor_32.h3
-rw-r--r--arch/sh/include/asm/scatterlist.h2
-rw-r--r--arch/sh/include/asm/suspend.h65
-rw-r--r--arch/sh/include/asm/system.h4
-rw-r--r--arch/sh/include/asm/system_32.h29
-rw-r--r--arch/sh/include/asm/system_64.h26
-rw-r--r--arch/sh/include/asm/thread_info.h30
-rw-r--r--arch/sh/include/asm/topology.h8
-rw-r--r--arch/sh/include/asm/ubc.h11
-rw-r--r--arch/sh/include/asm/watchdog.h59
-rw-r--r--arch/sh/include/cpu-sh4/cpu/watchdog.h13
-rw-r--r--arch/sh/include/mach-ecovec24/mach/partner-jet-setup.txt3
-rw-r--r--arch/sh/include/mach-se/mach/se7722.h11
-rw-r--r--arch/sh/kernel/Makefile10
-rw-r--r--arch/sh/kernel/asm-offsets.c23
-rw-r--r--arch/sh/kernel/cpu/Makefile1
-rw-r--r--arch/sh/kernel/cpu/init.c28
-rw-r--r--arch/sh/kernel/cpu/sh2a/fpu.c27
-rw-r--r--arch/sh/kernel/cpu/sh3/entry.S33
-rw-r--r--arch/sh/kernel/cpu/sh4/Makefile8
-rw-r--r--arch/sh/kernel/cpu/sh4/fpu.c28
-rw-r--r--arch/sh/kernel/cpu/sh4/perf_event.c253
-rw-r--r--arch/sh/kernel/cpu/sh4a/Makefile1
-rw-r--r--arch/sh/kernel/cpu/sh4a/clock-sh7724.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c269
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7724.c264
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-shx3.c45
-rw-r--r--arch/sh/kernel/cpu/sh4a/smp-shx3.c37
-rw-r--r--arch/sh/kernel/cpu/sh5/entry.S2
-rw-r--r--arch/sh/kernel/cpu/shmobile/cpuidle.c42
-rw-r--r--arch/sh/kernel/cpu/shmobile/pm.c117
-rw-r--r--arch/sh/kernel/cpu/shmobile/pm_runtime.c17
-rw-r--r--arch/sh/kernel/cpu/shmobile/sleep.S344
-rw-r--r--arch/sh/kernel/cpu/ubc.S59
-rw-r--r--arch/sh/kernel/dma-nommu.c82
-rw-r--r--arch/sh/kernel/dwarf.c222
-rw-r--r--arch/sh/kernel/entry-common.S2
-rw-r--r--arch/sh/kernel/ftrace.c146
-rw-r--r--arch/sh/kernel/head_32.S2
-rw-r--r--arch/sh/kernel/idle.c78
-rw-r--r--arch/sh/kernel/io_generic.c4
-rw-r--r--arch/sh/kernel/irq.c14
-rw-r--r--arch/sh/kernel/irq_32.c57
-rw-r--r--arch/sh/kernel/irq_64.c51
-rw-r--r--arch/sh/kernel/machine_kexec.c6
-rw-r--r--arch/sh/kernel/machvec.c4
-rw-r--r--arch/sh/kernel/module.c9
-rw-r--r--arch/sh/kernel/perf_callchain.c98
-rw-r--r--arch/sh/kernel/perf_event.c312
-rw-r--r--arch/sh/kernel/process_32.c42
-rw-r--r--arch/sh/kernel/process_64.c2
-rw-r--r--arch/sh/kernel/return_address.c54
-rw-r--r--arch/sh/kernel/setup.c4
-rw-r--r--arch/sh/kernel/sh_ksyms_32.c67
-rw-r--r--arch/sh/kernel/sh_ksyms_64.c10
-rw-r--r--arch/sh/kernel/signal_32.c24
-rw-r--r--arch/sh/kernel/signal_64.c13
-rw-r--r--arch/sh/kernel/smp.c4
-rw-r--r--arch/sh/kernel/topology.c26
-rw-r--r--arch/sh/kernel/traps.c8
-rw-r--r--arch/sh/kernel/traps_32.c82
-rw-r--r--arch/sh/lib/Makefile7
-rw-r--r--arch/sh/lib/memset-sh4.S107
-rw-r--r--arch/sh/math-emu/math.c6
-rw-r--r--arch/sh/mm/Kconfig19
-rw-r--r--arch/sh/mm/Makefile3
-rw-r--r--arch/sh/mm/cache-sh4.c501
-rw-r--r--arch/sh/mm/cache-sh5.c2
-rw-r--r--arch/sh/mm/cache-sh7705.c2
-rw-r--r--arch/sh/mm/cache.c18
-rw-r--r--arch/sh/mm/consistent.c28
-rw-r--r--arch/sh/mm/init.c19
-rw-r--r--arch/sh/mm/kmap.c4
-rw-r--r--arch/sh/mm/numa.c2
-rw-r--r--arch/sh/mm/pmb-fixed.c45
-rw-r--r--arch/sh/mm/pmb.c268
-rw-r--r--arch/sh/oprofile/Makefile4
-rw-r--r--arch/sh/oprofile/common.c38
-rw-r--r--arch/sh/oprofile/op_impl.h2
-rw-r--r--arch/sh/oprofile/op_model_sh7750.c255
-rw-r--r--drivers/cdrom/gdrom.c10
-rw-r--r--drivers/input/keyboard/sh_keysc.c2
-rw-r--r--drivers/mfd/Kconfig8
-rw-r--r--drivers/mfd/Makefile1
-rw-r--r--drivers/mfd/sh_mobile_sdhi.c156
-rw-r--r--drivers/mmc/host/Kconfig2
-rw-r--r--drivers/rtc/rtc-ds1302.c2
-rw-r--r--drivers/serial/Kconfig2
-rw-r--r--drivers/serial/sh-sci.c59
-rw-r--r--drivers/serial/sh-sci.h2
-rw-r--r--drivers/sh/Makefile1
-rw-r--r--drivers/sh/intc.c123
-rw-r--r--drivers/sh/maple/maple.c4
-rw-r--r--drivers/sh/pfc.c (renamed from arch/sh/kernel/gpio.c)43
-rw-r--r--drivers/video/sh_mobile_lcdcfb.c32
-rw-r--r--include/linux/input/sh_keysc.h (renamed from arch/sh/include/asm/sh_keysc.h)6
-rw-r--r--include/linux/mfd/sh_mobile_sdhi.h8
-rw-r--r--include/linux/sh_intc.h7
-rw-r--r--include/linux/sh_pfc.h96
146 files changed, 4741 insertions, 2405 deletions
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 88cdeb9f72d9..0031a6979f3a 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,7 +16,9 @@ config SUPERH
 	select HAVE_IOREMAP_PROT if MMU
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
+	select HAVE_DMA_ATTRS
 	select HAVE_PERF_EVENTS
+	select PERF_USE_VMALLOC
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
@@ -37,6 +39,7 @@ config SUPERH32
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_ARCH_KGDB
 	select ARCH_HIBERNATION_POSSIBLE if MMU
@@ -170,6 +173,12 @@ config ARCH_HAS_CPU_IDLE_WAIT
 config IO_TRAPPED
 	bool
 
+config DMA_COHERENT
+	bool
+
+config DMA_NONCOHERENT
+	def_bool !DMA_COHERENT
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -220,6 +229,7 @@ config CPU_SHX2
 
 config CPU_SHX3
 	bool
+	select DMA_COHERENT
 
 config ARCH_SHMOBILE
 	bool
@@ -761,17 +771,6 @@ config ENTRY_OFFSET
 	default "0x00010000" if PAGE_SIZE_64KB
 	default "0x00000000"
 
-config UBC_WAKEUP
-	bool "Wakeup UBC on startup"
-	depends on CPU_SH4 && !CPU_SH4A
-	help
-	  Selecting this option will wakeup the User Break Controller (UBC) on
-	  startup. Although the UBC is left in an awake state when the processor
-	  comes up, some boot loaders misbehave by putting the UBC to sleep in a
-	  power saving state, which causes issues with things like ptrace().
-
-	  If unsure, say N.
-
 choice
 	prompt "Kernel command line"
 	optional
@@ -818,7 +817,13 @@ config MAPLE
 	 Dreamcast with a serial line terminal or a remote network
 	 connection.
 
-source "arch/sh/drivers/pci/Kconfig"
+config PCI
+	bool "PCI support"
+	depends on SYS_SUPPORTS_PCI
+	help
+	  Find out whether you have a PCI motherboard. PCI is the name of a
+	  bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box. If you have PCI, say Y, otherwise N.
 
 source "drivers/pci/pcie/Kconfig"
 
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 66e40aabc600..ac17c5ac550e 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -78,6 +78,9 @@ defaultimage-$(CONFIG_SUPERH32)			:= zImage
 defaultimage-$(CONFIG_SH_SH7785LCR)		:= uImage
 defaultimage-$(CONFIG_SH_RSK)			:= uImage
 defaultimage-$(CONFIG_SH_URQUELL)		:= uImage
+defaultimage-$(CONFIG_SH_MIGOR)			:= uImage
+defaultimage-$(CONFIG_SH_AP325RXA)		:= uImage
+defaultimage-$(CONFIG_SH_7724_SOLUTION_ENGINE)	:= uImage
 defaultimage-$(CONFIG_SH_7206_SOLUTION_ENGINE)	:= vmlinux
 defaultimage-$(CONFIG_SH_7619_SOLUTION_ENGINE)	:= vmlinux
 
@@ -136,6 +139,7 @@ machdir-$(CONFIG_SH_7751_SYSTEMH)		+= mach-systemh
 machdir-$(CONFIG_SH_EDOSK7705)			+= mach-edosk7705
 machdir-$(CONFIG_SH_HIGHLANDER)			+= mach-highlander
 machdir-$(CONFIG_SH_MIGOR)			+= mach-migor
+machdir-$(CONFIG_SH_AP325RXA)			+= mach-ap325rxa
 machdir-$(CONFIG_SH_KFR2R09)			+= mach-kfr2r09
 machdir-$(CONFIG_SH_ECOVEC)			+= mach-ecovec24
 machdir-$(CONFIG_SH_SDK7780)			+= mach-sdk7780
diff --git a/arch/sh/boards/Makefile b/arch/sh/boards/Makefile
index 7baa21090231..ce0f26381784 100644
--- a/arch/sh/boards/Makefile
+++ b/arch/sh/boards/Makefile
@@ -1,7 +1,6 @@
 #
 # Specific board support, not covered by a mach group.
 #
-obj-$(CONFIG_SH_AP325RXA)	+= board-ap325rxa.o
 obj-$(CONFIG_SH_MAGIC_PANEL_R2)	+= board-magicpanelr2.o
 obj-$(CONFIG_SH_SH7785LCR)	+= board-sh7785lcr.o
 obj-$(CONFIG_SH_URQUELL)	+= board-urquell.o
diff --git a/arch/sh/boards/mach-ap325rxa/Makefile b/arch/sh/boards/mach-ap325rxa/Makefile
new file mode 100644
index 000000000000..4cf1774d2613
--- /dev/null
+++ b/arch/sh/boards/mach-ap325rxa/Makefile
@@ -0,0 +1,2 @@
+obj-y	 := setup.o sdram.o
+
diff --git a/arch/sh/boards/mach-ap325rxa/sdram.S b/arch/sh/boards/mach-ap325rxa/sdram.S
new file mode 100644
index 000000000000..db24fbed4fca
--- /dev/null
+++ b/arch/sh/boards/mach-ap325rxa/sdram.S
@@ -0,0 +1,69 @@
+/*
+ * AP325RXA sdram self/auto-refresh setup code
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sys.h>
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/suspend.h>
+#include <asm/romimage-macros.h>
+
+/* code to enter and leave self-refresh. must be self-contained.
+ * this code will be copied to on-chip memory and executed from there.
+ */
+	.balign 4
+ENTRY(ap325rxa_sdram_enter_start)
+
+	/* SBSC: disable power down and put in self-refresh mode */
+	mov.l	1f, r4
+	mov.l	2f, r1
+	mov.l	@r4, r2
+	or	r1, r2
+	mov.l   3f, r3
+	and	r3, r2
+	mov.l	r2, @r4
+
+	rts
+	 nop
+
+	.balign 4
+1:	.long	0xfe400008 /* SDCR0 */
+2:	.long	0x00000400
+3:	.long	0xffff7fff
+ENTRY(ap325rxa_sdram_enter_end)
+
+	.balign 4
+ENTRY(ap325rxa_sdram_leave_start)
+
+	/* SBSC: set auto-refresh mode */
+	mov.l	1f, r4
+	mov.l	@r4, r0
+	mov.l   4f, r1
+	and	r1, r0
+	mov.l	r0, @r4
+	mov.l	6f, r4
+	mov.l	8f, r0
+	mov.l	@r4, r1
+	mov	#-1, r4
+	add	r4, r1
+	or	r1, r0
+	mov.l	7f, r1
+	mov.l	r0, @r1
+
+	rts
+	 nop
+
+	.balign 4
+1:	.long	0xfe400008 /* SDCR0 */
+4:	.long	0xfffffbff
+6:	.long   0xfe40001c /* RTCOR */
+7:	.long   0xfe400018 /* RTCNT */
+8:	.long   0xa55a0000
+ENTRY(ap325rxa_sdram_leave_end)
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 2d080732a964..cf9dc12dfeb1 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -20,8 +20,6 @@
 #include <linux/i2c.h>
 #include <linux/smsc911x.h>
 #include <linux/gpio.h>
-#include <linux/spi/spi.h>
-#include <linux/spi/spi_gpio.h>
 #include <media/ov772x.h>
 #include <media/soc_camera.h>
 #include <media/soc_camera_platform.h>
@@ -29,6 +27,7 @@
 #include <video/sh_mobile_lcdc.h>
 #include <asm/io.h>
 #include <asm/clock.h>
+#include <asm/suspend.h>
 #include <cpu/sh7723.h>
 
 static struct smsc911x_platform_config smsc911x_config = {
@@ -409,17 +408,49 @@ static struct platform_device ceu_device = {
 	},
 };
 
-struct spi_gpio_platform_data sdcard_cn3_platform_data = {
-	.sck = GPIO_PTD0,
-	.mosi = GPIO_PTD1,
-	.miso = GPIO_PTD2,
-	.num_chipselect = 1,
+static struct resource sdhi0_cn3_resources[] = {
+	[0] = {
+		.name	= "SDHI0",
+		.start	= 0x04ce0000,
+		.end	= 0x04ce01ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 101,
+		.flags  = IORESOURCE_IRQ,
+	},
 };
 
-static struct platform_device sdcard_cn3_device = {
-	.name		= "spi_gpio",
-	.dev	= {
-		.platform_data	= &sdcard_cn3_platform_data,
+static struct platform_device sdhi0_cn3_device = {
+	.name		= "sh_mobile_sdhi",
+	.id             = 0, /* "sdhi0" clock */
+	.num_resources	= ARRAY_SIZE(sdhi0_cn3_resources),
+	.resource	= sdhi0_cn3_resources,
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI0,
+	},
+};
+
+static struct resource sdhi1_cn7_resources[] = {
+	[0] = {
+		.name	= "SDHI1",
+		.start	= 0x04cf0000,
+		.end	= 0x04cf01ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 24,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sdhi1_cn7_device = {
+	.name		= "sh_mobile_sdhi",
+	.id             = 1, /* "sdhi1" clock */
+	.num_resources	= ARRAY_SIZE(sdhi1_cn7_resources),
+	.resource	= sdhi1_cn7_resources,
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI1,
 	},
 };
 
@@ -470,22 +501,26 @@ static struct platform_device *ap325rxa_devices[] __initdata = {
 	&lcdc_device,
 	&ceu_device,
 	&nand_flash_device,
-	&sdcard_cn3_device,
+	&sdhi0_cn3_device,
+	&sdhi1_cn7_device,
 	&ap325rxa_camera[0],
 	&ap325rxa_camera[1],
 };
 
-static struct spi_board_info ap325rxa_spi_devices[] = {
-	{
-		.modalias = "mmc_spi",
-		.max_speed_hz = 5000000,
-		.chip_select = 0,
-		.controller_data = (void *) GPIO_PTD5,
-	},
-};
+extern char ap325rxa_sdram_enter_start;
+extern char ap325rxa_sdram_enter_end;
+extern char ap325rxa_sdram_leave_start;
+extern char ap325rxa_sdram_leave_end;
 
 static int __init ap325rxa_devices_setup(void)
 {
+	/* register board specific self-refresh code */
+	sh_mobile_register_self_refresh(SUSP_SH_STANDBY | SUSP_SH_SF,
+					&ap325rxa_sdram_enter_start,
+					&ap325rxa_sdram_enter_end,
+					&ap325rxa_sdram_leave_start,
+					&ap325rxa_sdram_leave_end);
+
 	/* LD3 and LD4 LEDs */
 	gpio_request(GPIO_PTX5, NULL); /* RUN */
 	gpio_direction_output(GPIO_PTX5, 1);
@@ -578,12 +613,28 @@ static int __init ap325rxa_devices_setup(void)
 
 	platform_resource_setup_memory(&ceu_device, "ceu", 4 << 20);
 
+	/* SDHI0 - CN3 - SD CARD */
+	gpio_request(GPIO_FN_SDHI0CD_PTD, NULL);
+	gpio_request(GPIO_FN_SDHI0WP_PTD, NULL);
+	gpio_request(GPIO_FN_SDHI0D3_PTD, NULL);
+	gpio_request(GPIO_FN_SDHI0D2_PTD, NULL);
+	gpio_request(GPIO_FN_SDHI0D1_PTD, NULL);
+	gpio_request(GPIO_FN_SDHI0D0_PTD, NULL);
+	gpio_request(GPIO_FN_SDHI0CMD_PTD, NULL);
+	gpio_request(GPIO_FN_SDHI0CLK_PTD, NULL);
+
+	/* SDHI1 - CN7 - MICRO SD CARD */
+	gpio_request(GPIO_FN_SDHI1CD, NULL);
+	gpio_request(GPIO_FN_SDHI1D3, NULL);
+	gpio_request(GPIO_FN_SDHI1D2, NULL);
+	gpio_request(GPIO_FN_SDHI1D1, NULL);
+	gpio_request(GPIO_FN_SDHI1D0, NULL);
+	gpio_request(GPIO_FN_SDHI1CMD, NULL);
+	gpio_request(GPIO_FN_SDHI1CLK, NULL);
+
 	i2c_register_board_info(0, ap325rxa_i2c_devices,
 				ARRAY_SIZE(ap325rxa_i2c_devices));
 
-	spi_register_board_info(ap325rxa_spi_devices,
-				ARRAY_SIZE(ap325rxa_spi_devices));
-
 	return platform_add_devices(ap325rxa_devices,
 				ARRAY_SIZE(ap325rxa_devices));
 }
diff --git a/arch/sh/boards/mach-ecovec24/Makefile b/arch/sh/boards/mach-ecovec24/Makefile
index 51f852151655..e69bc82208fc 100644
--- a/arch/sh/boards/mach-ecovec24/Makefile
+++ b/arch/sh/boards/mach-ecovec24/Makefile
@@ -6,4 +6,4 @@
 # for more details.
 #
 
-obj-y	 := setup.o
\ No newline at end of file
+obj-y	 := setup.o sdram.o
\ No newline at end of file
diff --git a/arch/sh/boards/mach-ecovec24/sdram.S b/arch/sh/boards/mach-ecovec24/sdram.S
new file mode 100644
index 000000000000..833440044407
--- /dev/null
+++ b/arch/sh/boards/mach-ecovec24/sdram.S
@@ -0,0 +1,52 @@
+/*
+ * Ecovec24 sdram self/auto-refresh setup code
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sys.h>
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/suspend.h>
+#include <asm/romimage-macros.h>
+
+/* code to enter and leave self-refresh. must be self-contained.
+ * this code will be copied to on-chip memory and executed from there.
+ */
+	.balign 4
+ENTRY(ecovec24_sdram_enter_start)
+
+	/* DBSC: put memory in self-refresh mode */
+
+	ED 0xFD000010, 0x00000000 /* DBEN */
+	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
+	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
+	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
+	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
+
+	rts
+	 nop
+
+ENTRY(ecovec24_sdram_enter_end)
+
+	.balign 4
+ENTRY(ecovec24_sdram_leave_start)
+
+	/* DBSC: put memory in auto-refresh mode */
+
+	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
+	WAIT 1
+	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
+	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
+	ED 0xFD000010, 0x00000001 /* DBEN */
+	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
+
+	rts
+	 nop
+
+ENTRY(ecovec24_sdram_leave_end)
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 3b1ceb46fa54..826e62326d51 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -20,12 +20,14 @@
 #include <linux/i2c.h>
 #include <linux/i2c/tsc2007.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
+#include <linux/mfd/sh_mobile_sdhi.h>
 #include <video/sh_mobile_lcdc.h>
 #include <media/sh_mobile_ceu.h>
 #include <asm/heartbeat.h>
 #include <asm/sh_eth.h>
-#include <asm/sh_keysc.h>
 #include <asm/clock.h>
+#include <asm/suspend.h>
 #include <cpu/sh7724.h>
 
 /*
@@ -147,6 +149,9 @@ static struct platform_device sh_eth_device = {
 	},
 	.num_resources = ARRAY_SIZE(sh_eth_resources),
 	.resource = sh_eth_resources,
+	.archdata = {
+		.hwblk_id = HWBLK_ETHER,
+	},
 };
 
 /* USB0 host */
@@ -185,30 +190,18 @@ static struct platform_device usb0_host_device = {
 	.resource	= usb0_host_resources,
 };
 
-/*
- * USB1
- *
- * CN5 can use both host/function,
- * and we can determine it by checking PTB[3]
- *
- * This time only USB1 host is supported.
- */
+/* USB1 host/function */
 void usb1_port_power(int port, int power)
 {
-	if (!gpio_get_value(GPIO_PTB3)) {
-		printk(KERN_ERR "USB1 function is not supported\n");
-		return;
-	}
-
 	gpio_set_value(GPIO_PTB5, power);
 }
 
-static struct r8a66597_platdata usb1_host_data = {
+static struct r8a66597_platdata usb1_common_data = {
 	.on_chip = 1,
 	.port_power = usb1_port_power,
 };
 
-static struct resource usb1_host_resources[] = {
+static struct resource usb1_common_resources[] = {
 	[0] = {
 		.start	= 0xa4d90000,
 		.end	= 0xa4d90124 - 1,
@@ -221,16 +214,16 @@ static struct resource usb1_host_resources[] = {
 	},
 };
 
-static struct platform_device usb1_host_device = {
-	.name		= "r8a66597_hcd",
+static struct platform_device usb1_common_device = {
+	/* .name will be added in arch_setup */
 	.id		= 1,
 	.dev = {
 		.dma_mask		= NULL,         /*  not use dma */
 		.coherent_dma_mask	= 0xffffffff,
-		.platform_data		= &usb1_host_data,
+		.platform_data		= &usb1_common_data,
 	},
-	.num_resources	= ARRAY_SIZE(usb1_host_resources),
-	.resource	= usb1_host_resources,
+	.num_resources	= ARRAY_SIZE(usb1_common_resources),
+	.resource	= usb1_common_resources,
 };
 
 /* LCDC */
@@ -428,16 +421,90 @@ static struct i2c_board_info ts_i2c_clients = {
 	.irq		= IRQ0,
 };
 
+/* SHDI0 */
+static void sdhi0_set_pwr(struct platform_device *pdev, int state)
+{
+	gpio_set_value(GPIO_PTB6, state);
+}
+
+static struct sh_mobile_sdhi_info sdhi0_info = {
+	.set_pwr = sdhi0_set_pwr,
+};
+
+static struct resource sdhi0_resources[] = {
+	[0] = {
+		.name	= "SDHI0",
+		.start  = 0x04ce0000,
+		.end    = 0x04ce01ff,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 101,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sdhi0_device = {
+	.name           = "sh_mobile_sdhi",
+	.num_resources  = ARRAY_SIZE(sdhi0_resources),
+	.resource       = sdhi0_resources,
+	.id             = 0,
+	.dev	= {
+		.platform_data	= &sdhi0_info,
+	},
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI0,
+	},
+};
+
+/* SHDI1 */
+static void sdhi1_set_pwr(struct platform_device *pdev, int state)
+{
+	gpio_set_value(GPIO_PTB7, state);
+}
+
+static struct sh_mobile_sdhi_info sdhi1_info = {
+	.set_pwr = sdhi1_set_pwr,
+};
+
+static struct resource sdhi1_resources[] = {
+	[0] = {
+		.name	= "SDHI1",
+		.start  = 0x04cf0000,
+		.end    = 0x04cf01ff,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 24,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sdhi1_device = {
+	.name           = "sh_mobile_sdhi",
+	.num_resources  = ARRAY_SIZE(sdhi1_resources),
+	.resource       = sdhi1_resources,
+	.id             = 1,
+	.dev	= {
+		.platform_data	= &sdhi1_info,
+	},
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI1,
+	},
+};
+
 static struct platform_device *ecovec_devices[] __initdata = {
 	&heartbeat_device,
 	&nor_flash_device,
 	&sh_eth_device,
 	&usb0_host_device,
-	&usb1_host_device, /* USB1 host support */
+	&usb1_common_device,
 	&lcdc_device,
 	&ceu0_device,
 	&ceu1_device,
 	&keysc_device,
+	&sdhi0_device,
+	&sdhi1_device,
 };
 
 #define EEPROM_ADDR 0x50
@@ -466,12 +533,9 @@ static u8 mac_read(struct i2c_adapter *a, u8 command)
 	return buf;
 }
 
-#define MAC_LEN 6
-static void __init sh_eth_init(void)
+static void __init sh_eth_init(struct sh_eth_plat_data *pd)
 {
 	struct i2c_adapter *a = i2c_get_adapter(1);
-	struct clk *eth_clk;
-	u8 mac[MAC_LEN];
 	int i;
 
 	if (!a) {
@@ -479,39 +543,30 @@ static void __init sh_eth_init(void)
 		return;
 	}
 
-	eth_clk = clk_get(NULL, "eth0");
-	if (!eth_clk) {
-		pr_err("can not get eth0 clk\n");
-		return;
-	}
-
 	/* read MAC address frome EEPROM */
-	for (i = 0; i < MAC_LEN; i++) {
-		mac[i] = mac_read(a, 0x10 + i);
+	for (i = 0; i < sizeof(pd->mac_addr); i++) {
+		pd->mac_addr[i] = mac_read(a, 0x10 + i);
 		msleep(10);
 	}
-
-	/* clock enable */
-	clk_enable(eth_clk);
-
-	/* reset sh-eth */
-	ctrl_outl(0x1, SH_ETH_ADDR + 0x0);
-
-	/* set MAC addr */
-	ctrl_outl((mac[0] << 24) |
-		  (mac[1] << 16) |
-		  (mac[2] <<  8) |
-		  (mac[3] <<  0), SH_ETH_MAHR);
-	ctrl_outl((mac[4] <<  8) |
-		  (mac[5] <<  0), SH_ETH_MALR);
-
-	clk_put(eth_clk);
 }
 
 #define PORT_HIZA 0xA4050158
 #define IODRIVEA  0xA405018A
+
+extern char ecovec24_sdram_enter_start;
+extern char ecovec24_sdram_enter_end;
+extern char ecovec24_sdram_leave_start;
+extern char ecovec24_sdram_leave_end;
+
 static int __init arch_setup(void)
 {
+	/* register board specific self-refresh code */
+	sh_mobile_register_self_refresh(SUSP_SH_STANDBY | SUSP_SH_SF,
+					&ecovec24_sdram_enter_start,
+					&ecovec24_sdram_enter_end,
+					&ecovec24_sdram_leave_start,
+					&ecovec24_sdram_leave_end);
+
 	/* enable STATUS0, STATUS2 and PDSTATUS */
 	gpio_request(GPIO_FN_STATUS0, NULL);
 	gpio_request(GPIO_FN_STATUS2, NULL);
@@ -561,6 +616,14 @@ static int __init arch_setup(void)
 	ctrl_outw(0x0600, 0xa40501d4);
 	ctrl_outw(0x0600, 0xa4050192);
 
+	if (gpio_get_value(GPIO_PTB3)) {
+		printk(KERN_INFO "USB1 function is selected\n");
+		usb1_common_device.name = "r8a66597_udc";
+	} else {
+		printk(KERN_INFO "USB1 host is selected\n");
+		usb1_common_device.name = "r8a66597_hcd";
+	}
+
 	/* enable LCDC */
 	gpio_request(GPIO_FN_LCDD23,   NULL);
 	gpio_request(GPIO_FN_LCDD22,   NULL);
@@ -603,8 +666,8 @@ static int __init arch_setup(void)
 	gpio_direction_output(GPIO_PTR1, 0);
 	gpio_direction_output(GPIO_PTA2, 0);
 
-	/* I/O buffer drive ability is low */
-	ctrl_outw((ctrl_inw(IODRIVEA) & ~0x00c0) | 0x0040 , IODRIVEA);
+	/* I/O buffer drive ability is high */
+	ctrl_outw((ctrl_inw(IODRIVEA) & ~0x00c0) | 0x0080 , IODRIVEA);
 
 	if (gpio_get_value(GPIO_PTE6)) {
 		/* DVI */
@@ -710,6 +773,33 @@ static int __init arch_setup(void)
 	gpio_direction_input(GPIO_PTR5);
 	gpio_direction_input(GPIO_PTR6);
 
+	/* enable SDHI0 (needs DS2.4 set to ON) */
+	gpio_request(GPIO_FN_SDHI0CD,  NULL);
+	gpio_request(GPIO_FN_SDHI0WP,  NULL);
+	gpio_request(GPIO_FN_SDHI0CMD, NULL);
+	gpio_request(GPIO_FN_SDHI0CLK, NULL);
+	gpio_request(GPIO_FN_SDHI0D3,  NULL);
+	gpio_request(GPIO_FN_SDHI0D2,  NULL);
+	gpio_request(GPIO_FN_SDHI0D1,  NULL);
+	gpio_request(GPIO_FN_SDHI0D0,  NULL);
+	gpio_request(GPIO_PTB6, NULL);
+	gpio_direction_output(GPIO_PTB6, 0);
+
+	/* enable SDHI1 (needs DS2.6,7 set to ON,OFF) */
+	gpio_request(GPIO_FN_SDHI1CD,  NULL);
+	gpio_request(GPIO_FN_SDHI1WP,  NULL);
+	gpio_request(GPIO_FN_SDHI1CMD, NULL);
+	gpio_request(GPIO_FN_SDHI1CLK, NULL);
+	gpio_request(GPIO_FN_SDHI1D3,  NULL);
+	gpio_request(GPIO_FN_SDHI1D2,  NULL);
+	gpio_request(GPIO_FN_SDHI1D1,  NULL);
+	gpio_request(GPIO_FN_SDHI1D0,  NULL);
+	gpio_request(GPIO_PTB7, NULL);
+	gpio_direction_output(GPIO_PTB7, 0);
+
+	/* I/O buffer drive ability is high for SDHI1 */
+	ctrl_outw((ctrl_inw(IODRIVEA) & ~0x3000) | 0x2000 , IODRIVEA);
+
 	/* enable I2C device */
 	i2c_register_board_info(1, i2c1_devices,
 				ARRAY_SIZE(i2c1_devices));
@@ -721,12 +811,11 @@ arch_initcall(arch_setup);
 
 static int __init devices_setup(void)
 {
-	sh_eth_init();
+	sh_eth_init(&sh_eth_plat);
 	return 0;
 }
 device_initcall(devices_setup);
 
-
 static struct sh_machine_vector mv_ecovec __initmv = {
 	.mv_name	= "R0P7724 (EcoVec)",
 };
diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c
index 566e69d8d729..f663c14d8885 100644
--- a/arch/sh/boards/mach-highlander/setup.c
+++ b/arch/sh/boards/mach-highlander/setup.c
@@ -384,7 +384,7 @@ static unsigned char irl2irq[HL_NR_IRL];
 
 static int highlander_irq_demux(int irq)
 {
-	if (irq >= HL_NR_IRL || !irl2irq[irq])
+	if (irq >= HL_NR_IRL || irq < 0 || !irl2irq[irq])
 		return irq;
 
 	return irl2irq[irq];
diff --git a/arch/sh/boards/mach-kfr2r09/Makefile b/arch/sh/boards/mach-kfr2r09/Makefile
index 5d5867826e3b..4e577a3bf658 100644
--- a/arch/sh/boards/mach-kfr2r09/Makefile
+++ b/arch/sh/boards/mach-kfr2r09/Makefile
@@ -1,2 +1,2 @@
-obj-y	 := setup.o
+obj-y	 := setup.o sdram.o
 obj-$(CONFIG_FB_SH_MOBILE_LCDC)	+=  lcd_wqvga.o
diff --git a/arch/sh/boards/mach-kfr2r09/sdram.S b/arch/sh/boards/mach-kfr2r09/sdram.S
new file mode 100644
index 000000000000..0c9f55bec2fe
--- /dev/null
+++ b/arch/sh/boards/mach-kfr2r09/sdram.S
@@ -0,0 +1,80 @@
+/*
+ * KFR2R09 sdram self/auto-refresh setup code
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sys.h>
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/suspend.h>
+#include <asm/romimage-macros.h>
+
+/* code to enter and leave self-refresh. must be self-contained.
+ * this code will be copied to on-chip memory and executed from there.
+ */
+	.balign 4
+ENTRY(kfr2r09_sdram_enter_start)
+
+	/* DBSC: put memory in self-refresh mode */
+
+	ED 0xFD000010, 0x00000000 /* DBEN */
+	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
+	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
+	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
+	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
+
+	rts
+	 nop
+
+ENTRY(kfr2r09_sdram_enter_end)
+
+	.balign 4
+ENTRY(kfr2r09_sdram_leave_start)
+
+	/* DBSC: put memory in auto-refresh mode */
+
+	mov.l	@(SH_SLEEP_MODE, r5), r0
+	tst	#SUSP_SH_RSTANDBY, r0
+	bf	resume_rstandby
+
+	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
+	WAIT 1
+	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
+	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
+	ED 0xFD000010, 0x00000001 /* DBEN */
+	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
+
+	rts
+	 nop
+
+resume_rstandby:
+
+	/* DBSC: re-initialize and put in auto-refresh */
+
+	ED 0xFD000108, 0x40000301 /* DBPDCNT0 */
+	ED 0xFD000020, 0x011B0002 /* DBCONF */
+	ED 0xFD000030, 0x03060E02 /* DBTR0 */
+	ED 0xFD000034, 0x01020102 /* DBTR1 */
+	ED 0xFD000038, 0x01090406 /* DBTR2 */
+	ED 0xFD000008, 0x00000004 /* DBKIND */
+	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
+	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
+	ED 0xFD000018, 0x00000001 /* DBCKECNT */
+	WAIT 1
+	ED 0xFD000010, 0x00000001 /* DBEN */
+	ED 0xFD000044, 0x000004AF /* DBRFPDN1 */
+	ED 0xFD000048, 0x20CF0037 /* DBRFPDN2 */
+	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
+	ED 0xFD000108, 0x40000300 /* DBPDCNT0 */
+	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
+
+	rts
+	 nop
+
+ENTRY(kfr2r09_sdram_leave_end)
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index c08d33fe2104..87438d6603d6 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -16,13 +16,16 @@
 #include <linux/clk.h>
 #include <linux/gpio.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/i2c.h>
 #include <linux/usb/r8a66597.h>
+#include <media/soc_camera.h>
+#include <media/sh_mobile_ceu.h>
 #include <video/sh_mobile_lcdc.h>
+#include <asm/suspend.h>
 #include <asm/clock.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
-#include <asm/sh_keysc.h>
 #include <cpu/sh7724.h>
 #include <mach/kfr2r09.h>
 
@@ -212,11 +215,154 @@ static struct platform_device kfr2r09_usb0_gadget_device = {
 	.resource	= kfr2r09_usb0_gadget_resources,
 };
 
+static struct sh_mobile_ceu_info sh_mobile_ceu_info = {
+	.flags = SH_CEU_FLAG_USE_8BIT_BUS,
+};
+
+static struct resource kfr2r09_ceu_resources[] = {
+	[0] = {
+		.name	= "CEU",
+		.start	= 0xfe910000,
+		.end	= 0xfe91009f,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 52,
+		.end  = 52,
+		.flags  = IORESOURCE_IRQ,
+	},
+	[2] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device kfr2r09_ceu_device = {
+	.name		= "sh_mobile_ceu",
+	.id             = 0, /* "ceu0" clock */
+	.num_resources	= ARRAY_SIZE(kfr2r09_ceu_resources),
+	.resource	= kfr2r09_ceu_resources,
+	.dev	= {
+		.platform_data	= &sh_mobile_ceu_info,
+	},
+	.archdata = {
+		.hwblk_id = HWBLK_CEU0,
+	},
+};
+
+static struct i2c_board_info kfr2r09_i2c_camera = {
+	I2C_BOARD_INFO("rj54n1cb0c", 0x50),
+};
+
+static struct clk *camera_clk;
+
+#define DRVCRB 0xA405018C
+static int camera_power(struct device *dev, int mode)
+{
+	int ret;
+
+	if (mode) {
+		long rate;
+
+		camera_clk = clk_get(NULL, "video_clk");
+		if (IS_ERR(camera_clk))
+			return PTR_ERR(camera_clk);
+
+		/* set VIO_CKO clock to 25MHz */
+		rate = clk_round_rate(camera_clk, 25000000);
+		ret = clk_set_rate(camera_clk, rate);
+		if (ret < 0)
+			goto eclkrate;
+
+		/* set DRVCRB
+		 *
+		 * use 1.8 V for VccQ_VIO
+		 * use 2.85V for VccQ_SR
+		 */
+		ctrl_outw((ctrl_inw(DRVCRB) & ~0x0003) | 0x0001, DRVCRB);
+
+		/* reset clear */
+		ret = gpio_request(GPIO_PTB4, NULL);
+		if (ret < 0)
+			goto eptb4;
+		ret = gpio_request(GPIO_PTB7, NULL);
+		if (ret < 0)
+			goto eptb7;
+
+		ret = gpio_direction_output(GPIO_PTB4, 1);
+		if (!ret)
+			ret = gpio_direction_output(GPIO_PTB7, 1);
+		if (ret < 0)
+			goto egpioout;
+		msleep(1);
+
+		ret = clk_enable(camera_clk);	/* start VIO_CKO */
+		if (ret < 0)
+			goto eclkon;
+
+		return 0;
+	}
+
+	ret = 0;
+
+	clk_disable(camera_clk);
+eclkon:
+	gpio_set_value(GPIO_PTB7, 0);
+egpioout:
+	gpio_set_value(GPIO_PTB4, 0);
+	gpio_free(GPIO_PTB7);
+eptb7:
+	gpio_free(GPIO_PTB4);
+eptb4:
+eclkrate:
+	clk_put(camera_clk);
+	return ret;
+}
+
+static struct soc_camera_link rj54n1_link = {
+	.power		= camera_power,
+	.board_info	= &kfr2r09_i2c_camera,
+	.i2c_adapter_id	= 1,
+	.module_name	= "rj54n1cb0c",
+};
+
+static struct platform_device kfr2r09_camera = {
+	.name	= "soc-camera-pdrv",
+	.id	= 0,
+	.dev	= {
+		.platform_data = &rj54n1_link,
+	},
+};
+
+static struct resource kfr2r09_sh_sdhi0_resources[] = {
+	[0] = {
+		.name	= "SDHI0",
+		.start  = 0x04ce0000,
+		.end    = 0x04ce01ff,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 101,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device kfr2r09_sh_sdhi0_device = {
+	.name           = "sh_mobile_sdhi",
+	.num_resources  = ARRAY_SIZE(kfr2r09_sh_sdhi0_resources),
+	.resource       = kfr2r09_sh_sdhi0_resources,
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI0,
+	},
+};
+
 static struct platform_device *kfr2r09_devices[] __initdata = {
 	&kfr2r09_nor_flash_device,
 	&kfr2r09_nand_flash_device,
 	&kfr2r09_sh_keysc_device,
 	&kfr2r09_sh_lcdc_device,
+	&kfr2r09_ceu_device,
+	&kfr2r09_camera,
+	&kfr2r09_sh_sdhi0_device,
 };
 
 #define BSC_CS0BCR 0xfec10004
@@ -268,11 +414,59 @@ static int kfr2r09_usb0_gadget_i2c_setup(void)
 
 	return 0;
 }
+
+static int kfr2r09_serial_i2c_setup(void)
+{
+	struct i2c_adapter *a;
+	struct i2c_msg msg;
+	unsigned char buf[2];
+	int ret;
+
+	a = i2c_get_adapter(0);
+	if (!a)
+		return -ENODEV;
+
+	/* set bit 6 (the 7th bit) of chip at 0x09, register 0x13 */
+	buf[0] = 0x13;
+	msg.addr = 0x09;
+	msg.buf = buf;
+	msg.len = 1;
+	msg.flags = 0;
+	ret = i2c_transfer(a, &msg, 1);
+	if (ret != 1)
+		return -ENODEV;
+
+	buf[0] = 0;
+	msg.addr = 0x09;
+	msg.buf = buf;
+	msg.len = 1;
+	msg.flags = I2C_M_RD;
+	ret = i2c_transfer(a, &msg, 1);
+	if (ret != 1)
+		return -ENODEV;
+
+	buf[1] = buf[0] | (1 << 6);
+	buf[0] = 0x13;
+	msg.addr = 0x09;
+	msg.buf = buf;
+	msg.len = 2;
+	msg.flags = 0;
+	ret = i2c_transfer(a, &msg, 1);
+	if (ret != 1)
+		return -ENODEV;
+
+	return 0;
+}
 #else
 static int kfr2r09_usb0_gadget_i2c_setup(void)
 {
 	return -ENODEV;
 }
+
+static int kfr2r09_serial_i2c_setup(void)
+{
+	return -ENODEV;
+}
 #endif
 
 static int kfr2r09_usb0_gadget_setup(void)
@@ -299,11 +493,27 @@ static int kfr2r09_usb0_gadget_setup(void)
 	return 0;
 }
 
+extern char kfr2r09_sdram_enter_start;
+extern char kfr2r09_sdram_enter_end;
+extern char kfr2r09_sdram_leave_start;
+extern char kfr2r09_sdram_leave_end;
+
 static int __init kfr2r09_devices_setup(void)
 {
+	/* register board specific self-refresh code */
+	sh_mobile_register_self_refresh(SUSP_SH_STANDBY | SUSP_SH_SF |
+					SUSP_SH_RSTANDBY,
+					&kfr2r09_sdram_enter_start,
+					&kfr2r09_sdram_enter_end,
+					&kfr2r09_sdram_leave_start,
+					&kfr2r09_sdram_leave_end);
+
 	/* enable SCIF1 serial port for YC401 console support */
 	gpio_request(GPIO_FN_SCIF1_RXD, NULL);
 	gpio_request(GPIO_FN_SCIF1_TXD, NULL);
+	kfr2r09_serial_i2c_setup(); /* ECONTMSK(bit6=L10ONEN) set 1 */
+	gpio_request(GPIO_PTG3, NULL); /* HPON_ON */
+	gpio_direction_output(GPIO_PTG3, 1); /* HPON_ON = H */
 
 	/* setup NOR flash at CS0 */
 	ctrl_outl(0x36db0400, BSC_CS0BCR);
@@ -361,6 +571,32 @@ static int __init kfr2r09_devices_setup(void)
 	if (kfr2r09_usb0_gadget_setup() == 0)
 		platform_device_register(&kfr2r09_usb0_gadget_device);
 
+	/* CEU */
+	gpio_request(GPIO_FN_VIO_CKO, NULL);
+	gpio_request(GPIO_FN_VIO0_CLK, NULL);
+	gpio_request(GPIO_FN_VIO0_VD, NULL);
+	gpio_request(GPIO_FN_VIO0_HD, NULL);
+	gpio_request(GPIO_FN_VIO0_FLD, NULL);
+	gpio_request(GPIO_FN_VIO0_D7, NULL);
+	gpio_request(GPIO_FN_VIO0_D6, NULL);
+	gpio_request(GPIO_FN_VIO0_D5, NULL);
+	gpio_request(GPIO_FN_VIO0_D4, NULL);
+	gpio_request(GPIO_FN_VIO0_D3, NULL);
+	gpio_request(GPIO_FN_VIO0_D2, NULL);
+	gpio_request(GPIO_FN_VIO0_D1, NULL);
+	gpio_request(GPIO_FN_VIO0_D0, NULL);
+
+	platform_resource_setup_memory(&kfr2r09_ceu_device, "ceu", 4 << 20);
+
+	/* SDHI0 connected to yc304 */
+	gpio_request(GPIO_FN_SDHI0CD, NULL);
+	gpio_request(GPIO_FN_SDHI0D3, NULL);
+	gpio_request(GPIO_FN_SDHI0D2, NULL);
+	gpio_request(GPIO_FN_SDHI0D1, NULL);
+	gpio_request(GPIO_FN_SDHI0D0, NULL);
+	gpio_request(GPIO_FN_SDHI0CMD, NULL);
+	gpio_request(GPIO_FN_SDHI0CLK, NULL);
+
 	return platform_add_devices(kfr2r09_devices,
 				    ARRAY_SIZE(kfr2r09_devices));
 }
diff --git a/arch/sh/boards/mach-migor/Makefile b/arch/sh/boards/mach-migor/Makefile
index 5f231dd25c0e..4601a89e5ac7 100644
--- a/arch/sh/boards/mach-migor/Makefile
+++ b/arch/sh/boards/mach-migor/Makefile
@@ -1,2 +1,2 @@
-obj-y	 := setup.o
+obj-y	 := setup.o sdram.o
 obj-$(CONFIG_SH_MIGOR_QVGA)	+=  lcd_qvga.o
diff --git a/arch/sh/boards/mach-migor/sdram.S b/arch/sh/boards/mach-migor/sdram.S
new file mode 100644
index 000000000000..614aa3a1398c
--- /dev/null
+++ b/arch/sh/boards/mach-migor/sdram.S
@@ -0,0 +1,69 @@
+/*
+ * Migo-R sdram self/auto-refresh setup code
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sys.h>
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/suspend.h>
+#include <asm/romimage-macros.h>
+
+/* code to enter and leave self-refresh. must be self-contained.
+ * this code will be copied to on-chip memory and executed from there.
+ */
+	.balign 4
+ENTRY(migor_sdram_enter_start)
+
+	/* SBSC: disable power down and put in self-refresh mode */
+	mov.l	1f, r4
+	mov.l	2f, r1
+	mov.l	@r4, r2
+	or	r1, r2
+	mov.l   3f, r3
+	and	r3, r2
+	mov.l	r2, @r4
+
+	rts
+	 nop
+
+	.balign 4
+1:	.long	0xfe400008 /* SDCR0 */
+2:	.long	0x00000400
+3:	.long	0xffff7fff
+ENTRY(migor_sdram_enter_end)
+
+	.balign 4
+ENTRY(migor_sdram_leave_start)
+
+	/* SBSC: set auto-refresh mode */
+	mov.l	1f, r4
+	mov.l	@r4, r0
+	mov.l   4f, r1
+	and	r1, r0
+	mov.l	r0, @r4
+	mov.l	6f, r4
+	mov.l	8f, r0
+	mov.l	@r4, r1
+	mov	#-1, r4
+	add	r4, r1
+	or	r1, r0
+	mov.l	7f, r1
+	mov.l	r0, @r1
+
+	rts
+	 nop
+
+	.balign 4
+1:	.long	0xfe400008 /* SDCR0 */
+4:	.long	0xfffffbff
+6:	.long   0xfe40001c /* RTCOR */
+7:	.long   0xfe400018 /* RTCNT */
+8:	.long   0xa55a0000
+ENTRY(migor_sdram_leave_end)
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 6ed1fd32369e..9099b6da9957 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -11,6 +11,7 @@
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/nand.h>
 #include <linux/i2c.h>
@@ -18,8 +19,6 @@
 #include <linux/delay.h>
 #include <linux/clk.h>
 #include <linux/gpio.h>
-#include <linux/spi/spi.h>
-#include <linux/spi/spi_gpio.h>
 #include <video/sh_mobile_lcdc.h>
 #include <media/sh_mobile_ceu.h>
 #include <media/ov772x.h>
@@ -27,7 +26,7 @@
 #include <asm/clock.h>
 #include <asm/machvec.h>
 #include <asm/io.h>
-#include <asm/sh_keysc.h>
+#include <asm/suspend.h>
 #include <mach/migor.h>
 #include <cpu/sh7722.h>
 
@@ -390,17 +389,25 @@ static struct platform_device migor_ceu_device = {
 	},
 };
 
-struct spi_gpio_platform_data sdcard_cn9_platform_data = {
-	.sck = GPIO_PTD0,
-	.mosi = GPIO_PTD1,
-	.miso = GPIO_PTD2,
-	.num_chipselect = 1,
+static struct resource sdhi_cn9_resources[] = {
+	[0] = {
+		.name	= "SDHI",
+		.start	= 0x04ce0000,
+		.end	= 0x04ce01ff,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= 101,
+		.flags  = IORESOURCE_IRQ,
+	},
 };
 
-static struct platform_device sdcard_cn9_device = {
-	.name		= "spi_gpio",
-	.dev	= {
-		.platform_data	= &sdcard_cn9_platform_data,
+static struct platform_device sdhi_cn9_device = {
+	.name		= "sh_mobile_sdhi",
+	.num_resources	= ARRAY_SIZE(sdhi_cn9_resources),
+	.resource	= sdhi_cn9_resources,
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI,
 	},
 };
 
@@ -467,23 +474,24 @@ static struct platform_device *migor_devices[] __initdata = {
 	&migor_ceu_device,
 	&migor_nor_flash_device,
 	&migor_nand_flash_device,
-	&sdcard_cn9_device,
+	&sdhi_cn9_device,
 	&migor_camera[0],
 	&migor_camera[1],
 };
 
-static struct spi_board_info migor_spi_devices[] = {
-	{
-		.modalias = "mmc_spi",
-		.max_speed_hz = 5000000,
-		.chip_select = 0,
-		.controller_data = (void *) GPIO_PTD5,
-	},
-};
+extern char migor_sdram_enter_start;
+extern char migor_sdram_enter_end;
+extern char migor_sdram_leave_start;
+extern char migor_sdram_leave_end;
 
 static int __init migor_devices_setup(void)
 {
-
+	/* register board specific self-refresh code */
+	sh_mobile_register_self_refresh(SUSP_SH_STANDBY | SUSP_SH_SF,
+					&migor_sdram_enter_start,
+					&migor_sdram_enter_end,
+					&migor_sdram_leave_start,
+					&migor_sdram_leave_end);
 #ifdef CONFIG_PM
 	/* Let D11 LED show STATUS0 */
 	gpio_request(GPIO_FN_STATUS0, NULL);
@@ -525,6 +533,16 @@ static int __init migor_devices_setup(void)
 	gpio_request(GPIO_PTA1, NULL);
 	gpio_direction_input(GPIO_PTA1);
 
+	/* SDHI */
+	gpio_request(GPIO_FN_SDHICD, NULL);
+	gpio_request(GPIO_FN_SDHIWP, NULL);
+	gpio_request(GPIO_FN_SDHID3, NULL);
+	gpio_request(GPIO_FN_SDHID2, NULL);
+	gpio_request(GPIO_FN_SDHID1, NULL);
+	gpio_request(GPIO_FN_SDHID0, NULL);
+	gpio_request(GPIO_FN_SDHICMD, NULL);
+	gpio_request(GPIO_FN_SDHICLK, NULL);
+
 	/* Touch Panel */
 	gpio_request(GPIO_FN_IRQ6, NULL);
 
@@ -612,9 +630,6 @@ static int __init migor_devices_setup(void)
 	i2c_register_board_info(0, migor_i2c_devices,
 				ARRAY_SIZE(migor_i2c_devices));
 
-	spi_register_board_info(migor_spi_devices,
-				ARRAY_SIZE(migor_spi_devices));
-
 	return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
 }
 arch_initcall(migor_devices_setup);
diff --git a/arch/sh/boards/mach-r2d/irq.c b/arch/sh/boards/mach-r2d/irq.c
index c70fecedcac4..78d7b27c80da 100644
--- a/arch/sh/boards/mach-r2d/irq.c
+++ b/arch/sh/boards/mach-r2d/irq.c
@@ -116,7 +116,7 @@ static unsigned char irl2irq[R2D_NR_IRL];
 
 int rts7751r2d_irq_demux(int irq)
 {
-	if (irq >= R2D_NR_IRL || !irl2irq[irq])
+	if (irq >= R2D_NR_IRL || irq < 0 || !irl2irq[irq])
 		return irq;
 
 	return irl2irq[irq];
diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c
index 02d21a3e2a8f..4eb31acfafef 100644
--- a/arch/sh/boards/mach-se/7722/irq.c
+++ b/arch/sh/boards/mach-se/7722/irq.c
@@ -16,15 +16,17 @@
 #include <asm/io.h>
 #include <mach-se/mach/se7722.h>
 
+unsigned int se7722_fpga_irq[SE7722_FPGA_IRQ_NR] = { 0, };
+
 static void disable_se7722_irq(unsigned int irq)
 {
-	unsigned int bit = irq - SE7722_FPGA_IRQ_BASE;
+	unsigned int bit = (unsigned int)get_irq_chip_data(irq);
 	ctrl_outw(ctrl_inw(IRQ01_MASK) | 1 << bit, IRQ01_MASK);
 }
 
 static void enable_se7722_irq(unsigned int irq)
 {
-	unsigned int bit = irq - SE7722_FPGA_IRQ_BASE;
+	unsigned int bit = (unsigned int)get_irq_chip_data(irq);
 	ctrl_outw(ctrl_inw(IRQ01_MASK) & ~(1 << bit), IRQ01_MASK);
 }
 
@@ -38,18 +40,15 @@ static struct irq_chip se7722_irq_chip __read_mostly = {
 static void se7722_irq_demux(unsigned int irq, struct irq_desc *desc)
 {
 	unsigned short intv = ctrl_inw(IRQ01_STS);
-	struct irq_desc *ext_desc;
-	unsigned int ext_irq = SE7722_FPGA_IRQ_BASE;
+	unsigned int ext_irq = 0;
 
 	intv &= (1 << SE7722_FPGA_IRQ_NR) - 1;
 
-	while (intv) {
-		if (intv & 1) {
-			ext_desc = irq_desc + ext_irq;
-			handle_level_irq(ext_irq, ext_desc);
-		}
-		intv >>= 1;
-		ext_irq++;
+	for (; intv; intv >>= 1, ext_irq++) {
+		if (!(intv & 1))
+			continue;
+
+		generic_handle_irq(se7722_fpga_irq[ext_irq]);
 	}
 }
 
@@ -63,11 +62,18 @@ void __init init_se7722_IRQ(void)
 	ctrl_outw(0, IRQ01_MASK);       /* disable all irqs */
 	ctrl_outw(0x2000, 0xb03fffec);  /* mrshpc irq enable */
 
-	for (i = 0; i < SE7722_FPGA_IRQ_NR; i++)
-		set_irq_chip_and_handler_name(SE7722_FPGA_IRQ_BASE + i,
+	for (i = 0; i < SE7722_FPGA_IRQ_NR; i++) {
+		se7722_fpga_irq[i] = create_irq();
+		if (se7722_fpga_irq[i] < 0)
+			return;
+
+		set_irq_chip_and_handler_name(se7722_fpga_irq[i],
 					      &se7722_irq_chip,
 					      handle_level_irq, "level");
 
+		set_irq_chip_data(se7722_fpga_irq[i], (void *)i);
+	}
+
 	set_irq_chained_handler(IRQ0_IRQ, se7722_irq_demux);
 	set_irq_type(IRQ0_IRQ, IRQ_TYPE_LEVEL_LOW);
 
diff --git a/arch/sh/boards/mach-se/7722/setup.c b/arch/sh/boards/mach-se/7722/setup.c
index 36374078e521..b1cb9425b600 100644
--- a/arch/sh/boards/mach-se/7722/setup.c
+++ b/arch/sh/boards/mach-se/7722/setup.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/ata_platform.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/smc91x.h>
 #include <mach-se/mach/se7722.h>
 #include <mach-se/mach/mrshpc.h>
@@ -21,7 +22,6 @@
 #include <asm/clock.h>
 #include <asm/io.h>
 #include <asm/heartbeat.h>
-#include <asm/sh_keysc.h>
 #include <cpu/sh7722.h>
 
 /* Heartbeat */
@@ -60,8 +60,7 @@ static struct resource smc91x_eth_resources[] = {
 		.flags  = IORESOURCE_MEM,
 	},
 	[1] = {
-		.start  = SMC_IRQ,
-		.end    = SMC_IRQ,
+		/* Filled in later */
 		.flags  = IORESOURCE_IRQ,
 	},
 };
@@ -90,8 +89,7 @@ static struct resource cf_ide_resources[] = {
 		.flags  = IORESOURCE_IO,
 	},
 	[2] = {
-		.start  = MRSHPC_IRQ0,
-		.end    = MRSHPC_IRQ0,
+		/* Filled in later */
 		.flags  = IORESOURCE_IRQ,
 	},
 };
@@ -153,6 +151,14 @@ static struct platform_device *se7722_devices[] __initdata = {
 static int __init se7722_devices_setup(void)
 {
 	mrshpc_setup_windows();
+
+	/* Wire-up dynamic vectors */
+	cf_ide_resources[2].start = cf_ide_resources[2].end =
+		se7722_fpga_irq[SE7722_FPGA_IRQ_MRSHPC0];
+
+	smc91x_eth_resources[1].start = smc91x_eth_resources[1].end =
+		se7722_fpga_irq[SE7722_FPGA_IRQ_SMC];
+
 	return platform_add_devices(se7722_devices, ARRAY_SIZE(se7722_devices));
 }
 device_initcall(se7722_devices_setup);
@@ -193,6 +199,5 @@ static void __init se7722_setup(char **cmdline_p)
 static struct sh_machine_vector mv_se7722 __initmv = {
 	.mv_name                = "Solution Engine 7722" ,
 	.mv_setup               = se7722_setup ,
-	.mv_nr_irqs		= SE7722_FPGA_IRQ_BASE + SE7722_FPGA_IRQ_NR,
 	.mv_init_irq		= init_se7722_IRQ,
 };
diff --git a/arch/sh/boards/mach-se/7724/Makefile b/arch/sh/boards/mach-se/7724/Makefile
index 349cbd6ce82d..a08b36830f0e 100644
--- a/arch/sh/boards/mach-se/7724/Makefile
+++ b/arch/sh/boards/mach-se/7724/Makefile
@@ -7,4 +7,4 @@
 #
 #
 
-obj-y	 := setup.o irq.o
\ No newline at end of file
+obj-y	 := setup.o irq.o sdram.o
diff --git a/arch/sh/boards/mach-se/7724/sdram.S b/arch/sh/boards/mach-se/7724/sdram.S
new file mode 100644
index 000000000000..9040167d5022
--- /dev/null
+++ b/arch/sh/boards/mach-se/7724/sdram.S
@@ -0,0 +1,52 @@
+/*
+ * MS7724SE sdram self/auto-refresh setup code
+ *
+ *  Copyright (C) 2009 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/sys.h>
+#include <linux/errno.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/suspend.h>
+#include <asm/romimage-macros.h>
+
+/* code to enter and leave self-refresh. must be self-contained.
+ * this code will be copied to on-chip memory and executed from there.
+ */
+	.balign 4
+ENTRY(ms7724se_sdram_enter_start)
+
+	/* DBSC: put memory in self-refresh mode */
+
+	ED 0xFD000010, 0x00000000 /* DBEN */
+	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
+	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
+	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
+	ED 0xFD000040, 0x00000001 /* DBRFPDN0 */
+
+	rts
+	 nop
+
+ENTRY(ms7724se_sdram_enter_end)
+
+	.balign 4
+ENTRY(ms7724se_sdram_leave_start)
+
+	/* DBSC: put memory in auto-refresh mode */
+
+	ED 0xFD000040, 0x00000000 /* DBRFPDN0 */
+	WAIT 1
+	ED 0xFD000014, 0x00000002 /* DBCMDCNT (PALL) */
+	ED 0xFD000014, 0x00000004 /* DBCMDCNT (REF) */
+	ED 0xFD000010, 0x00000001 /* DBEN */
+	ED 0xFD000040, 0x00010000 /* DBRFPDN0 */
+
+	rts
+	 nop
+
+ENTRY(ms7724se_sdram_leave_end)
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 0894bba9fade..4b0f0c0dc2b8 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -19,6 +19,7 @@
 #include <linux/smc91x.h>
 #include <linux/gpio.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/usb/r8a66597.h>
 #include <video/sh_mobile_lcdc.h>
 #include <media/sh_mobile_ceu.h>
@@ -27,7 +28,7 @@
 #include <asm/heartbeat.h>
 #include <asm/sh_eth.h>
 #include <asm/clock.h>
-#include <asm/sh_keysc.h>
+#include <asm/suspend.h>
 #include <cpu/sh7724.h>
 #include <mach-se/mach/se7724.h>
 
@@ -451,6 +452,52 @@ static struct platform_device sh7724_usb1_gadget_device = {
 	.resource	= sh7724_usb1_gadget_resources,
 };
 
+static struct resource sdhi0_cn7_resources[] = {
+	[0] = {
+		.name	= "SDHI0",
+		.start  = 0x04ce0000,
+		.end    = 0x04ce01ff,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 101,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sdhi0_cn7_device = {
+	.name           = "sh_mobile_sdhi",
+	.id		= 0,
+	.num_resources  = ARRAY_SIZE(sdhi0_cn7_resources),
+	.resource       = sdhi0_cn7_resources,
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI0,
+	},
+};
+
+static struct resource sdhi1_cn8_resources[] = {
+	[0] = {
+		.name	= "SDHI1",
+		.start  = 0x04cf0000,
+		.end    = 0x04cf01ff,
+		.flags  = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start  = 24,
+		.flags  = IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device sdhi1_cn8_device = {
+	.name           = "sh_mobile_sdhi",
+	.id		= 1,
+	.num_resources  = ARRAY_SIZE(sdhi1_cn8_resources),
+	.resource       = sdhi1_cn8_resources,
+	.archdata = {
+		.hwblk_id = HWBLK_SDHI1,
+	},
+};
+
 static struct platform_device *ms7724se_devices[] __initdata = {
 	&heartbeat_device,
 	&smc91x_eth_device,
@@ -463,6 +510,8 @@ static struct platform_device *ms7724se_devices[] __initdata = {
 	&sh7724_usb0_host_device,
 	&sh7724_usb1_gadget_device,
 	&fsi_device,
+	&sdhi0_cn7_device,
+	&sdhi1_cn8_device,
 };
 
 #define EEPROM_OP   0xBA206000
@@ -487,7 +536,7 @@ static int __init sh_eth_is_eeprom_ready(void)
 static void __init sh_eth_init(void)
 {
 	int i;
-	u16 mac[3];
+	u16 mac;
 
 	/* check EEPROM status */
 	if (!sh_eth_is_eeprom_ready())
@@ -501,16 +550,10 @@ static void __init sh_eth_init(void)
 		if (!sh_eth_is_eeprom_ready())
 			return;
 
-		mac[i] = ctrl_inw(EEPROM_DATA);
-		mac[i] = ((mac[i] & 0xFF) << 8) | (mac[i] >> 8); /* swap */
+		mac = ctrl_inw(EEPROM_DATA);
+		sh_eth_plat.mac_addr[i << 1] = mac & 0xff;
+		sh_eth_plat.mac_addr[(i << 1) + 1] = mac >> 8;
 	}
-
-	/* reset sh-eth */
-	ctrl_outl(0x1, SH_ETH_ADDR + 0x0);
-
-	/* set MAC addr */
-	ctrl_outl(((mac[0] << 16) | (mac[1])), SH_ETH_MAHR);
-	ctrl_outl((mac[2]), SH_ETH_MALR);
 }
 
 #define SW4140    0xBA201000
@@ -527,11 +570,22 @@ static void __init sh_eth_init(void)
 #define SW41_G    0x4000
 #define SW41_H    0x8000
 
+extern char ms7724se_sdram_enter_start;
+extern char ms7724se_sdram_enter_end;
+extern char ms7724se_sdram_leave_start;
+extern char ms7724se_sdram_leave_end;
+
 static int __init devices_setup(void)
 {
 	u16 sw = ctrl_inw(SW4140); /* select camera, monitor */
 	struct clk *fsia_clk;
 
+	/* register board specific self-refresh code */
+	sh_mobile_register_self_refresh(SUSP_SH_STANDBY | SUSP_SH_SF,
+					&ms7724se_sdram_enter_start,
+					&ms7724se_sdram_enter_end,
+					&ms7724se_sdram_leave_start,
+					&ms7724se_sdram_leave_end);
 	/* Reset Release */
 	ctrl_outw(ctrl_inw(FPGA_OUT) &
 		  ~((1 << 1)  | /* LAN */
@@ -701,6 +755,26 @@ static int __init devices_setup(void)
 	clk_set_rate(&fsimcka_clk, 11000);
 	clk_put(fsia_clk);
 
+	/* SDHI0 connected to cn7 */
+	gpio_request(GPIO_FN_SDHI0CD, NULL);
+	gpio_request(GPIO_FN_SDHI0WP, NULL);
+	gpio_request(GPIO_FN_SDHI0D3, NULL);
+	gpio_request(GPIO_FN_SDHI0D2, NULL);
+	gpio_request(GPIO_FN_SDHI0D1, NULL);
+	gpio_request(GPIO_FN_SDHI0D0, NULL);
+	gpio_request(GPIO_FN_SDHI0CMD, NULL);
+	gpio_request(GPIO_FN_SDHI0CLK, NULL);
+
+	/* SDHI1 connected to cn8 */
+	gpio_request(GPIO_FN_SDHI1CD, NULL);
+	gpio_request(GPIO_FN_SDHI1WP, NULL);
+	gpio_request(GPIO_FN_SDHI1D3, NULL);
+	gpio_request(GPIO_FN_SDHI1D2, NULL);
+	gpio_request(GPIO_FN_SDHI1D1, NULL);
+	gpio_request(GPIO_FN_SDHI1D0, NULL);
+	gpio_request(GPIO_FN_SDHI1CMD, NULL);
+	gpio_request(GPIO_FN_SDHI1CLK, NULL);
+
 	/*
 	 * enable SH-Eth
 	 *
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index fd56a71ca9d9..b51b1fc4baae 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -131,7 +131,7 @@ void decompress_kernel(void)
 #ifdef CONFIG_SUPERH64
 	output_addr = (CONFIG_MEMORY_START + 0x2000);
 #else
-	output_addr = PHYSADDR((unsigned long)&_text+PAGE_SIZE);
+	output_addr = __pa((unsigned long)&_text+PAGE_SIZE);
 #ifdef CONFIG_29BIT
 	output_addr |= P2SEG;
 #endif
diff --git a/arch/sh/boot/romimage/Makefile b/arch/sh/boot/romimage/Makefile
index 5806eee84f6f..f473a24a2d92 100644
--- a/arch/sh/boot/romimage/Makefile
+++ b/arch/sh/boot/romimage/Makefile
@@ -4,16 +4,22 @@
 # create an image suitable for burning to flash from zImage
 #
 
-targets		:= vmlinux head.o
+targets		:= vmlinux head.o zeropage.bin piggy.o
 
 OBJECTS = $(obj)/head.o
-LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext 0 -e romstart
+LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext 0 -e romstart \
+		   -T $(obj)/../../kernel/vmlinux.lds
 
 $(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o FORCE
 	$(call if_changed,ld)
 	@:
 
+OBJCOPYFLAGS += -j .empty_zero_page
+
+$(obj)/zeropage.bin: vmlinux FORCE
+	$(call if_changed,objcopy)
+
 LDFLAGS_piggy.o := -r --format binary --oformat $(ld-bfd) -T
 
-$(obj)/piggy.o: $(obj)/vmlinux.scr arch/sh/boot/zImage FORCE
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/zeropage.bin arch/sh/boot/zImage FORCE
 	$(call if_changed,ld)
diff --git a/arch/sh/boot/romimage/head.S b/arch/sh/boot/romimage/head.S
index 219bc626dd71..93e779a405ec 100644
--- a/arch/sh/boot/romimage/head.S
+++ b/arch/sh/boot/romimage/head.S
@@ -5,6 +5,44 @@
  */
 
 .text
+	#include <asm/page.h>
+
 	.global	romstart
 romstart:
+	/* include board specific setup code */
 #include <mach/romimage.h>
+
+	/* copy the empty_zero_page contents to where vmlinux expects it */
+	mova	empty_zero_page_src, r0
+	mov.l	empty_zero_page_dst, r1
+	mov	#(PAGE_SHIFT - 4), r4
+	mov	#1, r3
+	shld	r4, r3 /* r3 = PAGE_SIZE / 16 */
+
+1:
+	mov.l	@r0, r4
+	mov.l	@(4, r0), r5
+	mov.l	@(8, r0), r6
+	mov.l	@(12, r0), r7
+	add	#16,r0
+	mov.l	r4, @r1
+	mov.l	r5, @(4, r1)
+	mov.l	r6, @(8, r1)
+	mov.l	r7, @(12, r1)
+	dt	r3
+	add	#16,r1
+	bf	1b
+
+	/* jump to the zImage entry point located after the zero page data */
+	mov	#PAGE_SHIFT, r4
+	mov	#1, r1
+	shld	r4, r1
+	mova	empty_zero_page_src, r0
+	add	r1, r0
+	jmp	@r0
+	 nop
+
+	.align 2
+empty_zero_page_dst:
+	.long	_text
+empty_zero_page_src:
diff --git a/arch/sh/drivers/dma/dma-sysfs.c b/arch/sh/drivers/dma/dma-sysfs.c
index 347ee11351ec..1ee631d3725e 100644
--- a/arch/sh/drivers/dma/dma-sysfs.c
+++ b/arch/sh/drivers/dma/dma-sysfs.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/sysdev.h>
 #include <linux/platform_device.h>
-#include <linux/module.h>
 #include <linux/err.h>
 #include <linux/string.h>
 #include <asm/dma.h>
@@ -21,7 +20,6 @@
 static struct sysdev_class dma_sysclass = {
 	.name = "dma",
 };
-EXPORT_SYMBOL(dma_sysclass);
 
 static ssize_t dma_show_devices(struct sys_device *dev,
 				struct sysdev_attribute *attr, char *buf)
diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig
deleted file mode 100644
index e8db585a6638..000000000000
--- a/arch/sh/drivers/pci/Kconfig
+++ /dev/null
@@ -1,19 +0,0 @@
-config PCI
-	bool "PCI support"
-	depends on SYS_SUPPORTS_PCI
-	help
-	  Find out whether you have a PCI motherboard. PCI is the name of a
-	  bus system, i.e. the way the CPU talks to the other stuff inside
-	  your box. If you have PCI, say Y, otherwise N.
-
-config SH_PCIDMA_NONCOHERENT
-	bool "Cache and PCI noncoherent"
-	depends on PCI
-	default y
-	help
-	  Enable this option if your platform does not have a CPU cache which
-	  remains coherent with PCI DMA. It is safest to say 'Y', although you
-	  will see better performance if you can say 'N', because the PCI DMA
-	  code will not have to flush the CPU's caches. If you have a PCI host
-	  bridge integrated with your SH CPU, refer carefully to the chip specs
-	  to see if you can say 'N' here. Otherwise, leave it as 'Y'.
diff --git a/arch/sh/include/asm/addrspace.h b/arch/sh/include/asm/addrspace.h
index 80d40813e057..99d6b3ecbe22 100644
--- a/arch/sh/include/asm/addrspace.h
+++ b/arch/sh/include/asm/addrspace.h
@@ -28,9 +28,6 @@
 /* Returns the privileged segment base of a given address  */
 #define PXSEG(a)	(((unsigned long)(a)) & 0xe0000000)
 
-/* Returns the physical address of a PnSEG (n=1,2) address   */
-#define PHYSADDR(a)	(((unsigned long)(a)) & 0x1fffffff)
-
 #if defined(CONFIG_29BIT) || defined(CONFIG_PMB_FIXED)
 /*
  * Map an address to a certain privileged segment
@@ -60,5 +57,11 @@
 #define P3_ADDR_MAX		P4SEG
 #endif
 
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PMB
+extern int __in_29bit_mode(void);
+#endif /* CONFIG_PMB */
+#endif /* __ASSEMBLY__ */
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_ADDRSPACE_H */
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index e8e78137c6f5..b16388d71954 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -78,11 +78,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
-/* Atomic operations are already serializing on SH */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
+#define smp_mb__before_atomic_dec()	smp_mb()
+#define smp_mb__after_atomic_dec()	smp_mb()
+#define smp_mb__before_atomic_inc()	smp_mb()
+#define smp_mb__after_atomic_inc()	smp_mb()
 
 #include <asm-generic/atomic-long.h>
 #include <asm-generic/atomic64.h>
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index ebe595b7ab1f..98511e4d28cb 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -26,8 +26,8 @@
 /*
  * clear_bit() doesn't provide any barrier for the compiler.
  */
-#define smp_mb__before_clear_bit()	barrier()
-#define smp_mb__after_clear_bit()	barrier()
+#define smp_mb__before_clear_bit()	smp_mb()
+#define smp_mb__after_clear_bit()	smp_mb()
 
 #ifdef CONFIG_SUPERH32
 static inline unsigned long ffz(unsigned long word)
diff --git a/arch/sh/include/asm/bugs.h b/arch/sh/include/asm/bugs.h
index 46260fcbdf4b..02a19a1c033a 100644
--- a/arch/sh/include/asm/bugs.h
+++ b/arch/sh/include/asm/bugs.h
@@ -14,11 +14,15 @@
 
 #include <asm/processor.h>
 
+extern void select_idle_routine(void);
+
 static void __init check_bugs(void)
 {
 	extern unsigned long loops_per_jiffy;
 	char *p = &init_utsname()->machine[2]; /* "sh" */
 
+	select_idle_routine();
+
 	current_cpu_data.loops_per_jiffy = loops_per_jiffy;
 
 	switch (current_cpu_data.family) {
diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h
index 69d56dd4c968..87ced133a363 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -1,219 +1,108 @@
 #ifndef __ASM_SH_DMA_MAPPING_H
 #define __ASM_SH_DMA_MAPPING_H
 
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-debug.h>
-#include <asm/cacheflush.h>
-#include <asm/io.h>
+extern struct dma_map_ops *dma_ops;
+extern void no_iommu_init(void);
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+	return dma_ops;
+}
+
 #include <asm-generic/dma-coherent.h>
+#include <asm-generic/dma-mapping-common.h>
+
+static inline int dma_supported(struct device *dev, u64 mask)
+{
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
-extern struct bus_type pci_bus_type;
+	if (ops->dma_supported)
+		return ops->dma_supported(dev, mask);
 
-#define dma_supported(dev, mask)	(1)
+	return 1;
+}
 
 static inline int dma_set_mask(struct device *dev, u64 mask)
 {
+	struct dma_map_ops *ops = get_dma_ops(dev);
+
 	if (!dev->dma_mask || !dma_supported(dev, mask))
 		return -EIO;
+	if (ops->set_dma_mask)
+		return ops->set_dma_mask(dev, mask);
 
 	*dev->dma_mask = mask;
 
 	return 0;
 }
 
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			 dma_addr_t *dma_handle, gfp_t flag);
-
-void dma_free_coherent(struct device *dev, size_t size,
-		       void *vaddr, dma_addr_t dma_handle);
-
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		    enum dma_data_direction dir);
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
 #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-#define dma_is_consistent(d, h) (1)
-
-static inline dma_addr_t dma_map_single(struct device *dev,
-					void *ptr, size_t size,
-					enum dma_data_direction dir)
-{
-	dma_addr_t addr = virt_to_phys(ptr);
-
-#if defined(CONFIG_PCI) && !defined(CONFIG_SH_PCIDMA_NONCOHERENT)
-	if (dev->bus == &pci_bus_type)
-		return addr;
-#endif
-	dma_cache_sync(dev, ptr, size, dir);
-
-	debug_dma_map_page(dev, virt_to_page(ptr),
-			   (unsigned long)ptr & ~PAGE_MASK, size,
-			   dir, addr, true);
-
-	return addr;
-}
-
-static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
-				    size_t size, enum dma_data_direction dir)
-{
-	debug_dma_unmap_page(dev, addr, size, dir, true);
-}
 
-static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
-			     int nents, enum dma_data_direction dir)
-{
-	int i;
-
-	for (i = 0; i < nents; i++) {
-#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT)
-		dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
+#ifdef CONFIG_DMA_COHERENT
+#define dma_is_consistent(d, h) (1)
+#else
+#define dma_is_consistent(d, h) (0)
 #endif
-		sg[i].dma_address = sg_phys(&sg[i]);
-		sg[i].dma_length = sg[i].length;
-	}
 
-	debug_dma_map_sg(dev, sg, nents, i, dir);
-
-	return nents;
-}
-
-static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-				int nents, enum dma_data_direction dir)
-{
-	debug_dma_unmap_sg(dev, sg, nents, dir);
-}
-
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
-				      unsigned long offset, size_t size,
-				      enum dma_data_direction dir)
-{
-	return dma_map_single(dev, page_address(page) + offset, size, dir);
-}
-
-static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
-				  size_t size, enum dma_data_direction dir)
-{
-	dma_unmap_single(dev, dma_address, size, dir);
-}
-
-static inline void __dma_sync_single(struct device *dev, dma_addr_t dma_handle,
-				   size_t size, enum dma_data_direction dir)
+static inline int dma_get_cache_alignment(void)
 {
-#if defined(CONFIG_PCI) && !defined(CONFIG_SH_PCIDMA_NONCOHERENT)
-	if (dev->bus == &pci_bus_type)
-		return;
-#endif
-	dma_cache_sync(dev, phys_to_virt(dma_handle), size, dir);
+	/*
+	 * Each processor family will define its own L1_CACHE_SHIFT,
+	 * L1_CACHE_BYTES wraps to this, so this is always safe.
+	 */
+	return L1_CACHE_BYTES;
 }
 
-static inline void dma_sync_single_range(struct device *dev,
-					 dma_addr_t dma_handle,
-					 unsigned long offset, size_t size,
-					 enum dma_data_direction dir)
+static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-#if defined(CONFIG_PCI) && !defined(CONFIG_SH_PCIDMA_NONCOHERENT)
-	if (dev->bus == &pci_bus_type)
-		return;
-#endif
-	dma_cache_sync(dev, phys_to_virt(dma_handle) + offset, size, dir);
-}
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
-static inline void __dma_sync_sg(struct device *dev, struct scatterlist *sg,
-			       int nelems, enum dma_data_direction dir)
-{
-	int i;
+	if (ops->mapping_error)
+		return ops->mapping_error(dev, dma_addr);
 
-	for (i = 0; i < nelems; i++) {
-#if !defined(CONFIG_PCI) || defined(CONFIG_SH_PCIDMA_NONCOHERENT)
-		dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
-#endif
-		sg[i].dma_address = sg_phys(&sg[i]);
-		sg[i].dma_length = sg[i].length;
-	}
+	return dma_addr == 0;
 }
 
-static inline void dma_sync_single_for_cpu(struct device *dev,
-					   dma_addr_t dma_handle, size_t size,
-					   enum dma_data_direction dir)
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *dma_handle, gfp_t gfp)
 {
-	__dma_sync_single(dev, dma_handle, size, dir);
-	debug_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
-}
+	struct dma_map_ops *ops = get_dma_ops(dev);
+	void *memory;
 
-static inline void dma_sync_single_for_device(struct device *dev,
-					      dma_addr_t dma_handle,
-					      size_t size,
-					      enum dma_data_direction dir)
-{
-	__dma_sync_single(dev, dma_handle, size, dir);
-	debug_dma_sync_single_for_device(dev, dma_handle, size, dir);
-}
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
+		return memory;
+	if (!ops->alloc_coherent)
+		return NULL;
 
-static inline void dma_sync_single_range_for_cpu(struct device *dev,
-						 dma_addr_t dma_handle,
-						 unsigned long offset,
-						 size_t size,
-						 enum dma_data_direction direction)
-{
-	dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
-	debug_dma_sync_single_range_for_cpu(dev, dma_handle,
-					    offset, size, direction);
-}
+	memory = ops->alloc_coherent(dev, size, dma_handle, gfp);
+	debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
 
-static inline void dma_sync_single_range_for_device(struct device *dev,
-						    dma_addr_t dma_handle,
-						    unsigned long offset,
-						    size_t size,
-						    enum dma_data_direction direction)
-{
-	dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
-	debug_dma_sync_single_range_for_device(dev, dma_handle,
-					       offset, size, direction);
+	return memory;
 }
 
-
-static inline void dma_sync_sg_for_cpu(struct device *dev,
-				       struct scatterlist *sg, int nelems,
-				       enum dma_data_direction dir)
+static inline void dma_free_coherent(struct device *dev, size_t size,
+				     void *vaddr, dma_addr_t dma_handle)
 {
-	__dma_sync_sg(dev, sg, nelems, dir);
-	debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
-}
+	struct dma_map_ops *ops = get_dma_ops(dev);
 
-static inline void dma_sync_sg_for_device(struct device *dev,
-					  struct scatterlist *sg, int nelems,
-					  enum dma_data_direction dir)
-{
-	__dma_sync_sg(dev, sg, nelems, dir);
-	debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
-}
+	WARN_ON(irqs_disabled());	/* for portability */
 
-static inline int dma_get_cache_alignment(void)
-{
-	/*
-	 * Each processor family will define its own L1_CACHE_SHIFT,
-	 * L1_CACHE_BYTES wraps to this, so this is always safe.
-	 */
-	return L1_CACHE_BYTES;
-}
+	if (dma_release_from_coherent(dev, get_order(size), vaddr))
+		return;
 
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-	return dma_addr == 0;
+	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
+	if (ops->free_coherent)
+		ops->free_coherent(dev, size, vaddr, dma_handle);
 }
 
-#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
-
-extern int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-			    dma_addr_t device_addr, size_t size, int flags);
-
-extern void
-dma_release_declared_memory(struct device *dev);
-
-extern void *
-dma_mark_declared_memory_occupied(struct device *dev,
-				  dma_addr_t device_addr, size_t size);
+/* arch/sh/mm/consistent.c */
+extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+					dma_addr_t *dma_addr, gfp_t flag);
+extern void dma_generic_free_coherent(struct device *dev, size_t size,
+				      void *vaddr, dma_addr_t dma_handle);
 
 #endif /* __ASM_SH_DMA_MAPPING_H */
diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h
index ced6795891a6..bdccbbfdc0bd 100644
--- a/arch/sh/include/asm/dwarf.h
+++ b/arch/sh/include/asm/dwarf.h
@@ -194,6 +194,12 @@
 #define DWARF_ARCH_RA_REG	17
 
 #ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/bug.h>
+#include <linux/list.h>
+#include <linux/module.h>
+
 /*
  * Read either the frame pointer (r14) or the stack pointer (r15).
  * NOTE: this MUST be inlined.
@@ -241,6 +247,12 @@ struct dwarf_cie {
 
 	unsigned long flags;
 #define DWARF_CIE_Z_AUGMENTATION	(1 << 0)
+
+	/*
+	 * 'mod' will be non-NULL if this CIE came from a module's
+	 * .eh_frame section.
+	 */
+	struct module *mod;
 };
 
 /**
@@ -255,6 +267,12 @@ struct dwarf_fde {
 	unsigned char *instructions;
 	unsigned char *end;
 	struct list_head link;
+
+	/*
+	 * 'mod' will be non-NULL if this FDE came from a module's
+	 * .eh_frame section.
+	 */
+	struct module *mod;
 };
 
 /**
@@ -364,6 +382,12 @@ static inline unsigned int DW_CFA_operand(unsigned long insn)
 
 extern struct dwarf_frame *dwarf_unwind_stack(unsigned long,
 					      struct dwarf_frame *);
+extern void dwarf_free_frame(struct dwarf_frame *);
+
+extern int module_dwarf_finalize(const Elf_Ehdr *, const Elf_Shdr *,
+				 struct module *);
+extern void module_dwarf_cleanup(struct module *);
+
 #endif /* !__ASSEMBLY__ */
 
 #define CFI_STARTPROC	.cfi_startproc
@@ -391,6 +415,10 @@ extern struct dwarf_frame *dwarf_unwind_stack(unsigned long,
 static inline void dwarf_unwinder_init(void)
 {
 }
+
+#define module_dwarf_finalize(hdr, sechdrs, me)	(0)
+#define module_dwarf_cleanup(mod)		do { } while (0)
+
 #endif
 
 #endif /* CONFIG_DWARF_UNWINDER */
diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h
index 721fcc4d5e98..5ac1e40a511c 100644
--- a/arch/sh/include/asm/fixmap.h
+++ b/arch/sh/include/asm/fixmap.h
@@ -14,9 +14,9 @@
 #define _ASM_FIXMAP_H
 
 #include <linux/kernel.h>
+#include <linux/threads.h>
 #include <asm/page.h>
 #ifdef CONFIG_HIGHMEM
-#include <linux/threads.h>
 #include <asm/kmap_types.h>
 #endif
 
@@ -46,9 +46,15 @@
  * fix-mapped?
  */
 enum fixed_addresses {
-#define FIX_N_COLOURS 16
+	/*
+	 * The FIX_CMAP entries are used by kmap_coherent() to get virtual
+	 * addresses which are of a known color, and so their values are
+	 * important. __fix_to_virt(FIX_CMAP_END - n) must give an address
+	 * which is the same color as a page (n<<PAGE_SHIFT).
+	 */
+#define FIX_N_COLOURS 8
 	FIX_CMAP_BEGIN,
-	FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS,
+	FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS) - 1,
 	FIX_UNCACHED,
 #ifdef CONFIG_HIGHMEM
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
diff --git a/arch/sh/include/asm/fpu.h b/arch/sh/include/asm/fpu.h
index 1d3aee04b5cc..fb6bbb9b1cc8 100644
--- a/arch/sh/include/asm/fpu.h
+++ b/arch/sh/include/asm/fpu.h
@@ -18,16 +18,15 @@ static inline void grab_fpu(struct pt_regs *regs)
 
 struct task_struct;
 
-extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs);
+extern void save_fpu(struct task_struct *__tsk);
+void fpu_state_restore(struct pt_regs *regs);
 #else
 
+#define save_fpu(tsk)		do { } while (0)
 #define release_fpu(regs)	do { } while (0)
 #define grab_fpu(regs)		do { } while (0)
+#define fpu_state_restore(regs)	do { } while (0)
 
-static inline void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
-{
-	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
-}
 #endif
 
 struct user_regset;
@@ -39,19 +38,28 @@ extern int fpregs_get(struct task_struct *target,
 		      unsigned int pos, unsigned int count,
 		      void *kbuf, void __user *ubuf);
 
+static inline void __unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs)
+{
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		save_fpu(tsk);
+		release_fpu(regs);
+	} else
+		tsk->fpu_counter = 0;
+}
+
 static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs)
 {
 	preempt_disable();
-	if (test_tsk_thread_flag(tsk, TIF_USEDFPU))
-		save_fpu(tsk, regs);
+	__unlazy_fpu(tsk, regs);
 	preempt_enable();
 }
 
 static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs)
 {
 	preempt_disable();
-	if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {
-		clear_tsk_thread_flag(tsk, TIF_USEDFPU);
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
 		release_fpu(regs);
 	}
 	preempt_enable();
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 12f3a31f20af..13e9966464c2 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -35,4 +35,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
 
+#ifndef __ASSEMBLY__
+
+/* arch/sh/kernel/return_address.c */
+extern void *return_address(unsigned int);
+
+#define HAVE_ARCH_CALLER_ADDR
+
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+#define CALLER_ADDR1 ((unsigned long)return_address(1))
+#define CALLER_ADDR2 ((unsigned long)return_address(2))
+#define CALLER_ADDR3 ((unsigned long)return_address(3))
+#define CALLER_ADDR4 ((unsigned long)return_address(4))
+#define CALLER_ADDR5 ((unsigned long)return_address(5))
+#define CALLER_ADDR6 ((unsigned long)return_address(6))
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* __ASM_SH_FTRACE_H */
diff --git a/arch/sh/include/asm/gpio.h b/arch/sh/include/asm/gpio.h
index 61f93da2c62e..f8d9a731e903 100644
--- a/arch/sh/include/asm/gpio.h
+++ b/arch/sh/include/asm/gpio.h
@@ -20,7 +20,7 @@
 #endif
 
 #define ARCH_NR_GPIOS 512
-#include <asm-generic/gpio.h>
+#include <linux/sh_pfc.h>
 
 #ifdef CONFIG_GPIOLIB
 
@@ -53,84 +53,4 @@ static inline int irq_to_gpio(unsigned int irq)
 
 #endif /* CONFIG_GPIOLIB */
 
-typedef unsigned short pinmux_enum_t;
-typedef unsigned short pinmux_flag_t;
-
-#define PINMUX_TYPE_NONE            0
-#define PINMUX_TYPE_FUNCTION        1
-#define PINMUX_TYPE_GPIO            2
-#define PINMUX_TYPE_OUTPUT          3
-#define PINMUX_TYPE_INPUT           4
-#define PINMUX_TYPE_INPUT_PULLUP    5
-#define PINMUX_TYPE_INPUT_PULLDOWN  6
-
-#define PINMUX_FLAG_TYPE            (0x7)
-#define PINMUX_FLAG_WANT_PULLUP     (1 << 3)
-#define PINMUX_FLAG_WANT_PULLDOWN   (1 << 4)
-
-#define PINMUX_FLAG_DBIT_SHIFT      5
-#define PINMUX_FLAG_DBIT            (0x1f << PINMUX_FLAG_DBIT_SHIFT)
-#define PINMUX_FLAG_DREG_SHIFT      10
-#define PINMUX_FLAG_DREG            (0x3f << PINMUX_FLAG_DREG_SHIFT)
-
-struct pinmux_gpio {
-	pinmux_enum_t enum_id;
-	pinmux_flag_t flags;
-};
-
-#define PINMUX_GPIO(gpio, data_or_mark) [gpio] = { data_or_mark }
-#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
-
-struct pinmux_cfg_reg {
-	unsigned long reg, reg_width, field_width;
-	unsigned long *cnt;
-	pinmux_enum_t *enum_ids;
-};
-
-#define PINMUX_CFG_REG(name, r, r_width, f_width) \
-	.reg = r, .reg_width = r_width, .field_width = f_width,		\
-	.cnt = (unsigned long [r_width / f_width]) {}, \
-	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
-
-struct pinmux_data_reg {
-	unsigned long reg, reg_width, reg_shadow;
-	pinmux_enum_t *enum_ids;
-};
-
-#define PINMUX_DATA_REG(name, r, r_width) \
-	.reg = r, .reg_width = r_width,	\
-	.enum_ids = (pinmux_enum_t [r_width]) \
-
-struct pinmux_range {
-	pinmux_enum_t begin;
-	pinmux_enum_t end;
-	pinmux_enum_t force;
-};
-
-struct pinmux_info {
-	char *name;
-	pinmux_enum_t reserved_id;
-	struct pinmux_range data;
-	struct pinmux_range input;
-	struct pinmux_range input_pd;
-	struct pinmux_range input_pu;
-	struct pinmux_range output;
-	struct pinmux_range mark;
-	struct pinmux_range function;
-
-	unsigned first_gpio, last_gpio;
-
-	struct pinmux_gpio *gpios;
-	struct pinmux_cfg_reg *cfg_regs;
-	struct pinmux_data_reg *data_regs;
-
-	pinmux_enum_t *gpio_data;
-	unsigned int gpio_data_size;
-
-	unsigned long *gpio_in_use;
-	struct gpio_chip chip;
-};
-
-int register_pinmux(struct pinmux_info *pip);
-
 #endif /* __ASM_SH_GPIO_H */
diff --git a/arch/sh/include/asm/hardirq.h b/arch/sh/include/asm/hardirq.h
index a5be4afa790b..48b191313a99 100644
--- a/arch/sh/include/asm/hardirq.h
+++ b/arch/sh/include/asm/hardirq.h
@@ -1,9 +1,16 @@
 #ifndef __ASM_SH_HARDIRQ_H
 #define __ASM_SH_HARDIRQ_H
 
-extern void ack_bad_irq(unsigned int irq);
-#define ack_bad_irq ack_bad_irq
+#include <linux/threads.h>
+#include <linux/irq.h>
+
+typedef struct {
+	unsigned int __softirq_pending;
+	unsigned int __nmi_count;		/* arch dependent */
+} ____cacheline_aligned irq_cpustat_t;
 
-#include <asm-generic/hardirq.h>
+#include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
+
+extern void ack_bad_irq(unsigned int irq);
 
 #endif /* __ASM_SH_HARDIRQ_H */
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 5be45ea4dfec..512cd3e9d0ca 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -90,15 +90,11 @@
 #define ctrl_outl		__raw_writel
 #define ctrl_outq		__raw_writeq
 
+extern unsigned long generic_io_base;
+
 static inline void ctrl_delay(void)
 {
-#ifdef CONFIG_CPU_SH4
-	__raw_readw(CCN_PVR);
-#elif defined(P2SEG)
-	__raw_readw(P2SEG);
-#else
-#error "Need a dummy address for delay"
-#endif
+	__raw_readw(generic_io_base);
 }
 
 #define __BUILD_MEMORY_STRING(bwlq, type)				\
@@ -186,8 +182,6 @@ __BUILD_MEMORY_STRING(q, u64)
 
 #define IO_SPACE_LIMIT 0xffffffff
 
-extern unsigned long generic_io_base;
-
 /*
  * This function provides a method for the generic case where a
  * board-specific ioport_map simply needs to return the port + some
@@ -246,7 +240,7 @@ void __iounmap(void __iomem *addr);
 static inline void __iomem *
 __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 {
-#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED)
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) && !defined(CONFIG_PMB)
 	unsigned long last_addr = offset + size - 1;
 #endif
 	void __iomem *ret;
@@ -255,7 +249,7 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 	if (ret)
 		return ret;
 
-#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED)
+#if defined(CONFIG_SUPERH32) && !defined(CONFIG_PMB_FIXED) && !defined(CONFIG_PMB)
 	/*
 	 * For P1 and P2 space this is trivial, as everything is already
 	 * mapped. Uncached access for P1 addresses are done through P2.
diff --git a/arch/sh/include/asm/irqflags.h b/arch/sh/include/asm/irqflags.h
index 46e71da5be6b..a741153b41c2 100644
--- a/arch/sh/include/asm/irqflags.h
+++ b/arch/sh/include/asm/irqflags.h
@@ -1,34 +1,9 @@
 #ifndef __ASM_SH_IRQFLAGS_H
 #define __ASM_SH_IRQFLAGS_H
 
-#ifdef CONFIG_SUPERH32
-#include "irqflags_32.h"
-#else
-#include "irqflags_64.h"
-#endif
+#define RAW_IRQ_DISABLED	0xf0
+#define RAW_IRQ_ENABLED		0x00
 
-#define raw_local_save_flags(flags) \
-		do { (flags) = __raw_local_save_flags(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
-{
-	return (flags != 0);
-}
-
-static inline int raw_irqs_disabled(void)
-{
-	unsigned long flags = __raw_local_save_flags();
-
-	return raw_irqs_disabled_flags(flags);
-}
-
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline void raw_local_irq_restore(unsigned long flags)
-{
-	if ((flags & 0xf0) != 0xf0)
-		raw_local_irq_enable();
-}
+#include <asm-generic/irqflags.h>
 
 #endif /* __ASM_SH_IRQFLAGS_H */
diff --git a/arch/sh/include/asm/irqflags_32.h b/arch/sh/include/asm/irqflags_32.h
deleted file mode 100644
index 60218f541340..000000000000
--- a/arch/sh/include/asm/irqflags_32.h
+++ /dev/null
@@ -1,99 +0,0 @@
-#ifndef __ASM_SH_IRQFLAGS_32_H
-#define __ASM_SH_IRQFLAGS_32_H
-
-static inline void raw_local_irq_enable(void)
-{
-	unsigned long __dummy0, __dummy1;
-
-	__asm__ __volatile__ (
-		"stc	sr, %0\n\t"
-		"and	%1, %0\n\t"
-#ifdef CONFIG_CPU_HAS_SR_RB
-		"stc	r6_bank, %1\n\t"
-		"or	%1, %0\n\t"
-#endif
-		"ldc	%0, sr\n\t"
-		: "=&r" (__dummy0), "=r" (__dummy1)
-		: "1" (~0x000000f0)
-		: "memory"
-	);
-}
-
-static inline void raw_local_irq_disable(void)
-{
-	unsigned long flags;
-
-	__asm__ __volatile__ (
-		"stc	sr, %0\n\t"
-		"or	#0xf0, %0\n\t"
-		"ldc	%0, sr\n\t"
-		: "=&z" (flags)
-		: /* no inputs */
-		: "memory"
-	);
-}
-
-static inline void set_bl_bit(void)
-{
-	unsigned long __dummy0, __dummy1;
-
-	__asm__ __volatile__ (
-		"stc	sr, %0\n\t"
-		"or	%2, %0\n\t"
-		"and	%3, %0\n\t"
-		"ldc	%0, sr\n\t"
-		: "=&r" (__dummy0), "=r" (__dummy1)
-		: "r" (0x10000000), "r" (0xffffff0f)
-		: "memory"
-	);
-}
-
-static inline void clear_bl_bit(void)
-{
-	unsigned long __dummy0, __dummy1;
-
-	__asm__ __volatile__ (
-		"stc	sr, %0\n\t"
-		"and	%2, %0\n\t"
-		"ldc	%0, sr\n\t"
-		: "=&r" (__dummy0), "=r" (__dummy1)
-		: "1" (~0x10000000)
-		: "memory"
-	);
-}
-
-static inline unsigned long __raw_local_save_flags(void)
-{
-	unsigned long flags;
-
-	__asm__ __volatile__ (
-		"stc	sr, %0\n\t"
-		"and	#0xf0, %0\n\t"
-		: "=&z" (flags)
-		: /* no inputs */
-		: "memory"
-	);
-
-	return flags;
-}
-
-static inline unsigned long __raw_local_irq_save(void)
-{
-	unsigned long flags, __dummy;
-
-	__asm__ __volatile__ (
-		"stc	sr, %1\n\t"
-		"mov	%1, %0\n\t"
-		"or	#0xf0, %0\n\t"
-		"ldc	%0, sr\n\t"
-		"mov	%1, %0\n\t"
-		"and	#0xf0, %0\n\t"
-		: "=&z" (flags), "=&r" (__dummy)
-		: /* no inputs */
-		: "memory"
-	);
-
-	return flags;
-}
-
-#endif /* __ASM_SH_IRQFLAGS_32_H */
diff --git a/arch/sh/include/asm/irqflags_64.h b/arch/sh/include/asm/irqflags_64.h
deleted file mode 100644
index 88f65222c1d4..000000000000
--- a/arch/sh/include/asm/irqflags_64.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef __ASM_SH_IRQFLAGS_64_H
-#define __ASM_SH_IRQFLAGS_64_H
-
-#include <cpu/registers.h>
-
-#define SR_MASK_LL	0x00000000000000f0LL
-#define SR_BL_LL	0x0000000010000000LL
-
-static inline void raw_local_irq_enable(void)
-{
-	unsigned long long __dummy0, __dummy1 = ~SR_MASK_LL;
-
-	__asm__ __volatile__("getcon	" __SR ", %0\n\t"
-			     "and	%0, %1, %0\n\t"
-			     "putcon	%0, " __SR "\n\t"
-			     : "=&r" (__dummy0)
-			     : "r" (__dummy1));
-}
-
-static inline void raw_local_irq_disable(void)
-{
-	unsigned long long __dummy0, __dummy1 = SR_MASK_LL;
-
-	__asm__ __volatile__("getcon	" __SR ", %0\n\t"
-			     "or	%0, %1, %0\n\t"
-			     "putcon	%0, " __SR "\n\t"
-			     : "=&r" (__dummy0)
-			     : "r" (__dummy1));
-}
-
-static inline void set_bl_bit(void)
-{
-	unsigned long long __dummy0, __dummy1 = SR_BL_LL;
-
-	__asm__ __volatile__("getcon	" __SR ", %0\n\t"
-			     "or	%0, %1, %0\n\t"
-			     "putcon	%0, " __SR "\n\t"
-			     : "=&r" (__dummy0)
-			     : "r" (__dummy1));
-
-}
-
-static inline void clear_bl_bit(void)
-{
-	unsigned long long __dummy0, __dummy1 = ~SR_BL_LL;
-
-	__asm__ __volatile__("getcon	" __SR ", %0\n\t"
-			     "and	%0, %1, %0\n\t"
-			     "putcon	%0, " __SR "\n\t"
-			     : "=&r" (__dummy0)
-			     : "r" (__dummy1));
-}
-
-static inline unsigned long __raw_local_save_flags(void)
-{
-	unsigned long long __dummy = SR_MASK_LL;
-	unsigned long flags;
-
-	__asm__ __volatile__ (
-		"getcon	" __SR ", %0\n\t"
-		"and	%0, %1, %0"
-		: "=&r" (flags)
-		: "r" (__dummy));
-
-	return flags;
-}
-
-static inline unsigned long __raw_local_irq_save(void)
-{
-	unsigned long long __dummy0, __dummy1 = SR_MASK_LL;
-	unsigned long flags;
-
-	__asm__ __volatile__ (
-		"getcon	" __SR ", %1\n\t"
-		"or	%1, r63, %0\n\t"
-		"or	%1, %2, %1\n\t"
-		"putcon	%1, " __SR "\n\t"
-		"and	%0, %2, %0"
-		: "=&r" (flags), "=&r" (__dummy0)
-		: "r" (__dummy1));
-
-	return flags;
-}
-
-#endif /* __ASM_SH_IRQFLAGS_64_H */
diff --git a/arch/sh/include/asm/mmu.h b/arch/sh/include/asm/mmu.h
index f5963037c9d6..c7426ad9926e 100644
--- a/arch/sh/include/asm/mmu.h
+++ b/arch/sh/include/asm/mmu.h
@@ -7,12 +7,16 @@
 #define PMB_PASCR		0xff000070
 #define PMB_IRMCR		0xff000078
 
+#define PASCR_SE		0x80000000
+
 #define PMB_ADDR		0xf6100000
 #define PMB_DATA		0xf7100000
 #define PMB_ENTRY_MAX		16
 #define PMB_E_MASK		0x0000000f
 #define PMB_E_SHIFT		8
 
+#define PMB_PFN_MASK		0xff000000
+
 #define PMB_SZ_16M		0x00000000
 #define PMB_SZ_64M		0x00000010
 #define PMB_SZ_128M		0x00000080
@@ -62,17 +66,10 @@ struct pmb_entry {
 };
 
 /* arch/sh/mm/pmb.c */
-int __set_pmb_entry(unsigned long vpn, unsigned long ppn,
-		    unsigned long flags, int *entry);
-int set_pmb_entry(struct pmb_entry *pmbe);
-void clear_pmb_entry(struct pmb_entry *pmbe);
-struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
-			    unsigned long flags);
-void pmb_free(struct pmb_entry *pmbe);
 long pmb_remap(unsigned long virt, unsigned long phys,
 	       unsigned long size, unsigned long flags);
 void pmb_unmap(unsigned long addr);
+int pmb_init(void);
 #endif /* __ASSEMBLY__ */
 
 #endif /* __MMU_H */
-
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 4163950cd1c6..67f3999b544e 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -3,8 +3,6 @@
 
 #ifdef __KERNEL__
 
-#include <linux/dma-mapping.h>
-
 /* Can be used to override the logic in pci_scan_bus for skipping
    already-configured bus numbers - to be used for buggy BIOSes
    or architectures with incomplete PCI setup by the loader */
@@ -54,30 +52,18 @@ static inline void pcibios_penalize_isa_irq(int irq, int active)
  * address space.  The networking and block device layers use
  * this boolean for bounce buffer decisions.
  */
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <asm/scatterlist.h>
-#include <linux/string.h>
-#include <asm/io.h>
+#define PCI_DMA_BUS_IS_PHYS	(dma_ops->is_phys)
 
 /* pci_unmap_{single,page} being a nop depends upon the
  * configuration.
  */
-#ifdef CONFIG_SH_PCIDMA_NONCOHERENT
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	\
-	dma_addr_t ADDR_NAME;
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)		\
-	__u32 LEN_NAME;
-#define pci_unmap_addr(PTR, ADDR_NAME)			\
-	((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)		\
-	(((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME)			\
-	((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
-	(((PTR)->LEN_NAME) = (VAL))
+#ifdef CONFIG_DMA_NONCOHERENT
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)		__u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME)		((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)	(((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME)		((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)	(((PTR)->LEN_NAME) = (VAL))
 #else
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
 #define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
index 11a302297ab7..3d0c9f36d150 100644
--- a/arch/sh/include/asm/perf_event.h
+++ b/arch/sh/include/asm/perf_event.h
@@ -1,8 +1,35 @@
 #ifndef __ASM_SH_PERF_EVENT_H
 #define __ASM_SH_PERF_EVENT_H
 
-/* SH only supports software events through this interface. */
-static inline void set_perf_event_pending(void) {}
+struct hw_perf_event;
+
+#define MAX_HWEVENTS	2
+
+struct sh_pmu {
+	const char	*name;
+	unsigned int	num_events;
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
+	void		(*enable)(struct hw_perf_event *, int);
+	void		(*disable)(struct hw_perf_event *, int);
+	u64		(*read)(int);
+	int		(*event_map)(int);
+	unsigned int	max_events;
+	unsigned long	raw_event_mask;
+	const int	(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+				       [PERF_COUNT_HW_CACHE_OP_MAX]
+				       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+/* arch/sh/kernel/perf_event.c */
+extern int register_sh_pmu(struct sh_pmu *);
+extern int reserve_pmc_hardware(void);
+extern void release_pmc_hardware(void);
+
+static inline void set_perf_event_pending(void)
+{
+	/* Nothing to see here, move along. */
+}
 
 #define PERF_EVENT_INDEX_OFFSET	0
 
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 4f3efa7d5a64..ba3046e4f06f 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -75,13 +75,31 @@ static inline unsigned long long neff_sign_extend(unsigned long val)
 #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
 #define FIRST_USER_ADDRESS	0
 
-#ifdef CONFIG_32BIT
-#define PHYS_ADDR_MASK		0xffffffff
+#define PHYS_ADDR_MASK29		0x1fffffff
+#define PHYS_ADDR_MASK32		0xffffffff
+
+#ifdef CONFIG_PMB
+static inline unsigned long phys_addr_mask(void)
+{
+	/* Is the MMU in 29bit mode? */
+	if (__in_29bit_mode())
+		return PHYS_ADDR_MASK29;
+
+	return PHYS_ADDR_MASK32;
+}
+#elif defined(CONFIG_32BIT)
+static inline unsigned long phys_addr_mask(void)
+{
+	return PHYS_ADDR_MASK32;
+}
 #else
-#define PHYS_ADDR_MASK		0x1fffffff
+static inline unsigned long phys_addr_mask(void)
+{
+	return PHYS_ADDR_MASK29;
+}
 #endif
 
-#define PTE_PHYS_MASK		(PHYS_ADDR_MASK & PAGE_MASK)
+#define PTE_PHYS_MASK		(phys_addr_mask() & PAGE_MASK)
 #define PTE_FLAGS_MASK		(~(PTE_PHYS_MASK) << PAGE_SHIFT)
 
 #ifdef CONFIG_SUPERH32
diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h
index c0d359ce337b..b35435516203 100644
--- a/arch/sh/include/asm/pgtable_32.h
+++ b/arch/sh/include/asm/pgtable_32.h
@@ -108,7 +108,7 @@ static inline unsigned long copy_ptea_attributes(unsigned long x)
 #define _PAGE_CLEAR_FLAGS	(_PAGE_PROTNONE | _PAGE_ACCESSED | _PAGE_FILE)
 #endif
 
-#define _PAGE_FLAGS_HARDWARE_MASK	(PHYS_ADDR_MASK & ~(_PAGE_CLEAR_FLAGS))
+#define _PAGE_FLAGS_HARDWARE_MASK	(phys_addr_mask() & ~(_PAGE_CLEAR_FLAGS))
 
 /* Hardware flags, page size encoding */
 #if !defined(CONFIG_MMU)
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 9a8714945dc9..1f3d6fab660c 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -56,6 +56,7 @@ asmlinkage void __init sh_cpu_init(void);
 #define SR_DSP		0x00001000
 #define SR_IMASK	0x000000f0
 #define SR_FD		0x00008000
+#define SR_MD		0x40000000
 
 /*
  * DSP structure and data
@@ -136,7 +137,7 @@ struct mm_struct;
 extern void release_thread(struct task_struct *);
 
 /* Prepare to copy thread state - unlazy all lazy status */
-#define prepare_to_copy(tsk)	do { } while (0)
+void prepare_to_copy(struct task_struct *tsk);
 
 /*
  * create a kernel thread without removing it from tasklists
diff --git a/arch/sh/include/asm/scatterlist.h b/arch/sh/include/asm/scatterlist.h
index 327cc2e4c97b..e38d1d4c7f6f 100644
--- a/arch/sh/include/asm/scatterlist.h
+++ b/arch/sh/include/asm/scatterlist.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_SH_SCATTERLIST_H
 #define __ASM_SH_SCATTERLIST_H
 
-#define ISA_DMA_THRESHOLD	PHYS_ADDR_MASK
+#define ISA_DMA_THRESHOLD	phys_addr_mask()
 
 #include <asm-generic/scatterlist.h>
 
diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h
index 5c8ea28ff7a4..fe9c2a1ad047 100644
--- a/arch/sh/include/asm/suspend.h
+++ b/arch/sh/include/asm/suspend.h
@@ -2,6 +2,7 @@
 #define _ASM_SH_SUSPEND_H
 
 #ifndef __ASSEMBLY__
+#include <linux/notifier.h>
 static inline int arch_prepare_suspend(void) { return 0; }
 
 #include <asm/ptrace.h>
@@ -19,6 +20,69 @@ void sh_mobile_setup_cpuidle(void);
 static inline void sh_mobile_setup_cpuidle(void) {}
 #endif
 
+/* notifier chains for pre/post sleep hooks */
+extern struct atomic_notifier_head sh_mobile_pre_sleep_notifier_list;
+extern struct atomic_notifier_head sh_mobile_post_sleep_notifier_list;
+
+/* priority levels for notifiers */
+#define SH_MOBILE_SLEEP_BOARD	0
+#define SH_MOBILE_SLEEP_CPU	1
+#define SH_MOBILE_PRE(x)	(x)
+#define SH_MOBILE_POST(x)	(-(x))
+
+/* board code registration function for self-refresh assembly snippets */
+void sh_mobile_register_self_refresh(unsigned long flags,
+				     void *pre_start, void *pre_end,
+				     void *post_start, void *post_end);
+
+/* register structure for address/data information */
+struct sh_sleep_regs {
+	unsigned long stbcr;
+	unsigned long bar;
+
+	/* MMU */
+	unsigned long pteh;
+	unsigned long ptel;
+	unsigned long ttb;
+	unsigned long tea;
+	unsigned long mmucr;
+	unsigned long ptea;
+	unsigned long pascr;
+	unsigned long irmcr;
+
+	/* Cache */
+	unsigned long ccr;
+	unsigned long ramcr;
+};
+
+/* data area for low-level sleep code */
+struct sh_sleep_data {
+	/* current sleep mode (SUSP_SH_...) */
+	unsigned long mode;
+
+	/* addresses of board specific self-refresh snippets */
+	unsigned long sf_pre;
+	unsigned long sf_post;
+
+	/* address of resume code */
+	unsigned long resume;
+
+	/* register state saved and restored by the assembly code */
+	unsigned long vbr;
+	unsigned long spc;
+	unsigned long sr;
+	unsigned long sp;
+
+	/* structure for keeping register addresses */
+	struct sh_sleep_regs addr;
+
+	/* structure for saving/restoring register state */
+	struct sh_sleep_regs data;
+};
+
+/* a bitmap of supported sleep modes (SUSP_SH..) */
+extern unsigned long sh_mobile_sleep_supported;
+
 #endif
 
 /* flags passed to assembly suspend code */
@@ -27,5 +91,6 @@ static inline void sh_mobile_setup_cpuidle(void) {}
 #define SUSP_SH_RSTANDBY	(1 << 2) /* SH-Mobile R-standby mode */
 #define SUSP_SH_USTANDBY	(1 << 3) /* SH-Mobile U-standby mode */
 #define SUSP_SH_SF		(1 << 4) /* Enable self-refresh */
+#define SUSP_SH_MMU		(1 << 5) /* Save/restore MMU and cache */
 
 #endif /* _ASM_SH_SUSPEND_H */
diff --git a/arch/sh/include/asm/system.h b/arch/sh/include/asm/system.h
index b5c5acdc8c0e..c15415b4b169 100644
--- a/arch/sh/include/asm/system.h
+++ b/arch/sh/include/asm/system.h
@@ -171,10 +171,6 @@ BUILD_TRAP_HANDLER(fpu_error);
 BUILD_TRAP_HANDLER(fpu_state_restore);
 BUILD_TRAP_HANDLER(nmi);
 
-#ifdef CONFIG_BUG
-extern void handle_BUG(struct pt_regs *);
-#endif
-
 #define arch_align_stack(x) (x)
 
 struct mem_access {
diff --git a/arch/sh/include/asm/system_32.h b/arch/sh/include/asm/system_32.h
index 607d413f6168..06814f5b59c7 100644
--- a/arch/sh/include/asm/system_32.h
+++ b/arch/sh/include/asm/system_32.h
@@ -232,4 +232,33 @@ asmlinkage void do_exception_error(unsigned long r4, unsigned long r5,
 				   unsigned long r6, unsigned long r7,
 				   struct pt_regs __regs);
 
+static inline void set_bl_bit(void)
+{
+	unsigned long __dummy0, __dummy1;
+
+	__asm__ __volatile__ (
+		"stc	sr, %0\n\t"
+		"or	%2, %0\n\t"
+		"and	%3, %0\n\t"
+		"ldc	%0, sr\n\t"
+		: "=&r" (__dummy0), "=r" (__dummy1)
+		: "r" (0x10000000), "r" (0xffffff0f)
+		: "memory"
+	);
+}
+
+static inline void clear_bl_bit(void)
+{
+	unsigned long __dummy0, __dummy1;
+
+	__asm__ __volatile__ (
+		"stc	sr, %0\n\t"
+		"and	%2, %0\n\t"
+		"ldc	%0, sr\n\t"
+		: "=&r" (__dummy0), "=r" (__dummy1)
+		: "1" (~0x10000000)
+		: "memory"
+	);
+}
+
 #endif /* __ASM_SH_SYSTEM_32_H */
diff --git a/arch/sh/include/asm/system_64.h b/arch/sh/include/asm/system_64.h
index 8e4a03e7966c..ab1dd917ea87 100644
--- a/arch/sh/include/asm/system_64.h
+++ b/arch/sh/include/asm/system_64.h
@@ -12,6 +12,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <cpu/registers.h>
 #include <asm/processor.h>
 
 /*
@@ -47,4 +48,29 @@ static inline reg_size_t register_align(void *val)
 	return (unsigned long long)(signed long long)(signed long)val;
 }
 
+#define SR_BL_LL	0x0000000010000000LL
+
+static inline void set_bl_bit(void)
+{
+	unsigned long long __dummy0, __dummy1 = SR_BL_LL;
+
+	__asm__ __volatile__("getcon	" __SR ", %0\n\t"
+			     "or	%0, %1, %0\n\t"
+			     "putcon	%0, " __SR "\n\t"
+			     : "=&r" (__dummy0)
+			     : "r" (__dummy1));
+
+}
+
+static inline void clear_bl_bit(void)
+{
+	unsigned long long __dummy0, __dummy1 = ~SR_BL_LL;
+
+	__asm__ __volatile__("getcon	" __SR ", %0\n\t"
+			     "and	%0, %1, %0\n\t"
+			     "putcon	%0, " __SR "\n\t"
+			     : "=&r" (__dummy0)
+			     : "r" (__dummy1));
+}
+
 #endif /* __ASM_SH_SYSTEM_64_H */
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index bdeb9d46d17d..1f3d927e2265 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -19,6 +19,7 @@ struct thread_info {
 	struct task_struct	*task;		/* main task structure */
 	struct exec_domain	*exec_domain;	/* execution domain */
 	unsigned long		flags;		/* low level flags */
+	__u32			status;		/* thread synchronous flags */
 	__u32			cpu;
 	int			preempt_count; /* 0 => preemptable, <0 => BUG */
 	mm_segment_t		addr_limit;	/* thread address space */
@@ -50,6 +51,7 @@ struct thread_info {
 	.task		= &tsk,			\
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
+	.status		= 0,			\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
@@ -111,13 +113,11 @@ extern void free_thread_info(struct thread_info *ti);
 #define TIF_SYSCALL_TRACE	0	/* syscall trace active */
 #define TIF_SIGPENDING		1	/* signal pending */
 #define TIF_NEED_RESCHED	2	/* rescheduling necessary */
-#define TIF_RESTORE_SIGMASK	3	/* restore signal mask in do_signal() */
 #define TIF_SINGLESTEP		4	/* singlestepping active */
 #define TIF_SYSCALL_AUDIT	5	/* syscall auditing active */
 #define TIF_SECCOMP		6	/* secure computing */
 #define TIF_NOTIFY_RESUME	7	/* callback before returning to user */
 #define TIF_SYSCALL_TRACEPOINT	8	/* for ftrace syscall instrumentation */
-#define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		18
 #define TIF_FREEZE		19	/* Freezing for suspend */
@@ -125,13 +125,11 @@ extern void free_thread_info(struct thread_info *ti);
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_USEDFPU		(1 << TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
 
@@ -149,13 +147,33 @@ extern void free_thread_info(struct thread_info *ti);
 /* work to do on any return to u-space */
 #define _TIF_ALLWORK_MASK	(_TIF_SYSCALL_TRACE | _TIF_SIGPENDING      | \
 				 _TIF_NEED_RESCHED  | _TIF_SYSCALL_AUDIT   | \
-				 _TIF_SINGLESTEP    | _TIF_RESTORE_SIGMASK | \
-				 _TIF_NOTIFY_RESUME | _TIF_SYSCALL_TRACEPOINT)
+				 _TIF_SINGLESTEP    | _TIF_NOTIFY_RESUME   | \
+				 _TIF_SYSCALL_TRACEPOINT)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK		(_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \
 				 _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP))
 
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_RESTORE_SIGMASK	0x0001	/* restore signal mask in do_signal() */
+#define TS_USEDFPU		0x0002	/* FPU used by this task this quantum */
+
+#ifndef __ASSEMBLY__
+#define HAVE_SET_RESTORE_SIGMASK	1
+static inline void set_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	ti->status |= TS_RESTORE_SIGMASK;
+	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+}
+#endif	/* !__ASSEMBLY__ */
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASM_SH_THREAD_INFO_H */
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 65e7bd2f2240..37cdadd975ac 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -40,6 +40,14 @@
 
 #endif
 
+#define mc_capable()    (1)
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+
+#define topology_core_cpumask(cpu)	(&cpu_core_map[cpu])
+
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_SH_TOPOLOGY_H */
diff --git a/arch/sh/include/asm/ubc.h b/arch/sh/include/asm/ubc.h
index 4ca4b7717371..9bf961684431 100644
--- a/arch/sh/include/asm/ubc.h
+++ b/arch/sh/include/asm/ubc.h
@@ -60,16 +60,5 @@
 #define BRCR_UBDE		(1 << 0)
 #endif
 
-#ifndef __ASSEMBLY__
-/* arch/sh/kernel/cpu/ubc.S */
-extern void ubc_sleep(void);
-
-#ifdef CONFIG_UBC_WAKEUP
-extern void ubc_wakeup(void);
-#else
-#define ubc_wakeup()	do { } while (0)
-#endif
-#endif
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_UBC_H */
diff --git a/arch/sh/include/asm/watchdog.h b/arch/sh/include/asm/watchdog.h
index 2fe7cee9e43a..19dfff5c8511 100644
--- a/arch/sh/include/asm/watchdog.h
+++ b/arch/sh/include/asm/watchdog.h
@@ -2,6 +2,8 @@
  * include/asm-sh/watchdog.h
  *
  * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2009 Siemens AG
+ * Copyright (C) 2009 Valentin Sitdikov
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -61,6 +63,61 @@
 #define WTCSR_CKS_2048	0x06
 #define WTCSR_CKS_4096	0x07
 
+#if defined(CONFIG_CPU_SUBTYPE_SH7785) || defined(CONFIG_CPU_SUBTYPE_SH7780)
+/**
+ * 	sh_wdt_read_cnt - Read from Counter
+ * 	Reads back the WTCNT value.
+ */
+static inline __u32 sh_wdt_read_cnt(void)
+{
+	return ctrl_inl(WTCNT_R);
+}
+
+/**
+ *	sh_wdt_write_cnt - Write to Counter
+ *	@val: Value to write
+ *
+ *	Writes the given value @val to the lower byte of the timer counter.
+ *	The upper byte is set manually on each write.
+ */
+static inline void sh_wdt_write_cnt(__u32 val)
+{
+	ctrl_outl((WTCNT_HIGH << 24) | (__u32)val, WTCNT);
+}
+
+/**
+ *	sh_wdt_write_bst - Write to Counter
+ *	@val: Value to write
+ *
+ *	Writes the given value @val to the lower byte of the timer counter.
+ *	The upper byte is set manually on each write.
+ */
+static inline void sh_wdt_write_bst(__u32 val)
+{
+	ctrl_outl((WTBST_HIGH << 24) | (__u32)val, WTBST);
+}
+/**
+ * 	sh_wdt_read_csr - Read from Control/Status Register
+ *
+ *	Reads back the WTCSR value.
+ */
+static inline __u32 sh_wdt_read_csr(void)
+{
+	return ctrl_inl(WTCSR_R);
+}
+
+/**
+ * 	sh_wdt_write_csr - Write to Control/Status Register
+ * 	@val: Value to write
+ *
+ * 	Writes the given value @val to the lower byte of the control/status
+ * 	register. The upper byte is set manually on each write.
+ */
+static inline void sh_wdt_write_csr(__u32 val)
+{
+	ctrl_outl((WTCSR_HIGH << 24) | (__u32)val, WTCSR);
+}
+#else
 /**
  * 	sh_wdt_read_cnt - Read from Counter
  * 	Reads back the WTCNT value.
@@ -103,6 +160,6 @@ static inline void sh_wdt_write_csr(__u8 val)
 {
 	ctrl_outw((WTCSR_HIGH << 8) | (__u16)val, WTCSR);
 }
-
+#endif /* CONFIG_CPU_SUBTYPE_SH7785 || CONFIG_CPU_SUBTYPE_SH7780 */
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_WATCHDOG_H */
diff --git a/arch/sh/include/cpu-sh4/cpu/watchdog.h b/arch/sh/include/cpu-sh4/cpu/watchdog.h
index 259f6a0ce23d..7672301d0c70 100644
--- a/arch/sh/include/cpu-sh4/cpu/watchdog.h
+++ b/arch/sh/include/cpu-sh4/cpu/watchdog.h
@@ -2,6 +2,8 @@
  * include/asm-sh/cpu-sh4/watchdog.h
  *
  * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2009 Siemens AG
+ * Copyright (C) 2009 Sitdikov Valentin
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -10,9 +12,20 @@
 #ifndef __ASM_CPU_SH4_WATCHDOG_H
 #define __ASM_CPU_SH4_WATCHDOG_H
 
+#if defined(CONFIG_CPU_SUBTYPE_SH7785) || defined(CONFIG_CPU_SUBTYPE_SH7780)
+/* Prefix definition */
+#define WTBST_HIGH	0x55
+/* Register definitions */
+#define WTCNT_R		0xffcc0010 /*WDTCNT*/
+#define WTCSR		0xffcc0004 /*WDTCSR*/
+#define WTCNT		0xffcc0000 /*WDTST*/
+#define WTST		WTCNT
+#define WTBST		0xffcc0008 /*WDTBST*/
+#else
 /* Register definitions */
 #define WTCNT		0xffc00008
 #define WTCSR		0xffc0000c
+#endif
 
 /* Bit definitions */
 #define WTCSR_TME	0x80
diff --git a/arch/sh/include/mach-ecovec24/mach/partner-jet-setup.txt b/arch/sh/include/mach-ecovec24/mach/partner-jet-setup.txt
index 8b8e4fa1fee9..cc737b807334 100644
--- a/arch/sh/include/mach-ecovec24/mach/partner-jet-setup.txt
+++ b/arch/sh/include/mach-ecovec24/mach/partner-jet-setup.txt
@@ -22,13 +22,12 @@ ED 0xff000010, 0x00000004
 LIST "setup clocks"
 ED 0xa4150024, 0x00004000
 ED 0xa4150000, 0x8E003508
-ED 0xa4150004, 0x00000000
 
 WAIT 1
 
 LIST "BSC"
 ED 0xff800020, 0xa5a50000
-ED 0xfec10000, 0x00000013
+ED 0xfec10000, 0x00001013
 ED 0xfec10004, 0x11110400
 ED 0xfec10024, 0x00000440
 
diff --git a/arch/sh/include/mach-se/mach/se7722.h b/arch/sh/include/mach-se/mach/se7722.h
index e971d9a82f4a..16505bfb8a9e 100644
--- a/arch/sh/include/mach-se/mach/se7722.h
+++ b/arch/sh/include/mach-se/mach/se7722.h
@@ -92,18 +92,11 @@
 #define SE7722_FPGA_IRQ_MRSHPC1	3 /* IRQ1 */
 #define SE7722_FPGA_IRQ_MRSHPC2	4 /* IRQ1 */
 #define SE7722_FPGA_IRQ_MRSHPC3	5 /* IRQ1 */
-
 #define SE7722_FPGA_IRQ_NR	6
-#define SE7722_FPGA_IRQ_BASE	110
-
-#define MRSHPC_IRQ3    	(SE7722_FPGA_IRQ_BASE + SE7722_FPGA_IRQ_MRSHPC3)
-#define MRSHPC_IRQ2    	(SE7722_FPGA_IRQ_BASE + SE7722_FPGA_IRQ_MRSHPC2)
-#define MRSHPC_IRQ1    	(SE7722_FPGA_IRQ_BASE + SE7722_FPGA_IRQ_MRSHPC1)
-#define MRSHPC_IRQ0    	(SE7722_FPGA_IRQ_BASE + SE7722_FPGA_IRQ_MRSHPC0)
-#define SMC_IRQ		(SE7722_FPGA_IRQ_BASE + SE7722_FPGA_IRQ_SMC)
-#define USB_IRQ		(SE7722_FPGA_IRQ_BASE + SE7722_FPGA_IRQ_USB)
 
 /* arch/sh/boards/se/7722/irq.c */
+extern unsigned int se7722_fpga_irq[];
+
 void init_se7722_IRQ(void);
 
 #define __IO_PREFIX		se7722
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index a2d0a40f3848..0471a3eb25ed 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -9,8 +9,12 @@ ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
-obj-y	:= debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o	\
-	   machvec.o nmi_debug.o process_$(BITS).o ptrace_$(BITS).o	\
+CFLAGS_REMOVE_return_address.o = -pg
+
+obj-y	:= debugtraps.o dma-nommu.o dumpstack.o 			\
+	   idle.o io.o io_generic.o irq.o				\
+	   irq_$(BITS).o machvec.o nmi_debug.o process_$(BITS).o 	\
+	   ptrace_$(BITS).o return_address.o				\
 	   setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o		\
 	   syscalls_$(BITS).o time.o topology.o traps.o			\
 	   traps_$(BITS).o unwinder.o
@@ -28,13 +32,13 @@ obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_IO_TRAPPED)	+= io_trapped.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_GENERIC_GPIO)	+= gpio.o
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 obj-$(CONFIG_DUMP_CODE)		+= disassemble.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o
 obj-$(CONFIG_DWARF_UNWINDER)	+= dwarf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_callchain.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST)	+= localtimer.o
 
diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
index d218e808294e..08a2be775b6c 100644
--- a/arch/sh/kernel/asm-offsets.c
+++ b/arch/sh/kernel/asm-offsets.c
@@ -34,5 +34,28 @@ int main(void)
 	DEFINE(PBE_NEXT, offsetof(struct pbe, next));
 	DEFINE(SWSUSP_ARCH_REGS_SIZE, sizeof(struct swsusp_arch_regs));
 #endif
+
+	DEFINE(SH_SLEEP_MODE, offsetof(struct sh_sleep_data, mode));
+	DEFINE(SH_SLEEP_SF_PRE, offsetof(struct sh_sleep_data, sf_pre));
+	DEFINE(SH_SLEEP_SF_POST, offsetof(struct sh_sleep_data, sf_post));
+	DEFINE(SH_SLEEP_RESUME, offsetof(struct sh_sleep_data, resume));
+	DEFINE(SH_SLEEP_VBR, offsetof(struct sh_sleep_data, vbr));
+	DEFINE(SH_SLEEP_SPC, offsetof(struct sh_sleep_data, spc));
+	DEFINE(SH_SLEEP_SR, offsetof(struct sh_sleep_data, sr));
+	DEFINE(SH_SLEEP_SP, offsetof(struct sh_sleep_data, sp));
+	DEFINE(SH_SLEEP_BASE_ADDR, offsetof(struct sh_sleep_data, addr));
+	DEFINE(SH_SLEEP_BASE_DATA, offsetof(struct sh_sleep_data, data));
+	DEFINE(SH_SLEEP_REG_STBCR, offsetof(struct sh_sleep_regs, stbcr));
+	DEFINE(SH_SLEEP_REG_BAR, offsetof(struct sh_sleep_regs, bar));
+	DEFINE(SH_SLEEP_REG_PTEH, offsetof(struct sh_sleep_regs, pteh));
+	DEFINE(SH_SLEEP_REG_PTEL, offsetof(struct sh_sleep_regs, ptel));
+	DEFINE(SH_SLEEP_REG_TTB, offsetof(struct sh_sleep_regs, ttb));
+	DEFINE(SH_SLEEP_REG_TEA, offsetof(struct sh_sleep_regs, tea));
+	DEFINE(SH_SLEEP_REG_MMUCR, offsetof(struct sh_sleep_regs, mmucr));
+	DEFINE(SH_SLEEP_REG_PTEA, offsetof(struct sh_sleep_regs, ptea));
+	DEFINE(SH_SLEEP_REG_PASCR, offsetof(struct sh_sleep_regs, pascr));
+	DEFINE(SH_SLEEP_REG_IRMCR, offsetof(struct sh_sleep_regs, irmcr));
+	DEFINE(SH_SLEEP_REG_CCR, offsetof(struct sh_sleep_regs, ccr));
+	DEFINE(SH_SLEEP_REG_RAMCR, offsetof(struct sh_sleep_regs, ramcr));
 	return 0;
 }
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile
index 3d6b9312dc47..d97c803719ec 100644
--- a/arch/sh/kernel/cpu/Makefile
+++ b/arch/sh/kernel/cpu/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_ARCH_SHMOBILE)	+= shmobile/
 
 # Common interfaces.
 
-obj-$(CONFIG_UBC_WAKEUP)	+= ubc.o
 obj-$(CONFIG_SH_ADC)		+= adc.o
 obj-$(CONFIG_SH_CLK_CPG)	+= clock-cpg.o
 
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index e932ebef4738..89b4b76c0d76 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -75,16 +75,11 @@ static void __init expmask_init(void)
 	/*
 	 * Future proofing.
 	 *
-	 * Disable support for slottable sleep instruction
-	 * and non-nop instructions in the rte delay slot.
+	 * Disable support for slottable sleep instruction, non-nop
+	 * instructions in the rte delay slot, and associative writes to
+	 * the memory-mapped cache array.
 	 */
-	expmask &= ~(EXPMASK_RTEDS | EXPMASK_BRDSSLP);
-
-	/*
-	 * Enable associative writes to the memory-mapped cache array
-	 * until the cache flush ops have been rewritten.
-	 */
-	expmask |= EXPMASK_MMCAW;
+	expmask &= ~(EXPMASK_RTEDS | EXPMASK_BRDSSLP | EXPMASK_MMCAW);
 
 	__raw_writel(expmask, EXPMASK);
 	ctrl_barrier();
@@ -311,12 +306,12 @@ asmlinkage void __init sh_cpu_init(void)
 	if (fpu_disabled) {
 		printk("FPU Disabled\n");
 		current_cpu_data.flags &= ~CPU_HAS_FPU;
-		disable_fpu();
 	}
 
 	/* FPU initialization */
+	disable_fpu();
 	if ((current_cpu_data.flags & CPU_HAS_FPU)) {
-		clear_thread_flag(TIF_USEDFPU);
+		current_thread_info()->status &= ~TS_USEDFPU;
 		clear_used_math();
 	}
 
@@ -338,17 +333,6 @@ asmlinkage void __init sh_cpu_init(void)
 	}
 #endif
 
-	/*
-	 * Some brain-damaged loaders decided it would be a good idea to put
-	 * the UBC to sleep. This causes some issues when it comes to things
-	 * like PTRACE_SINGLESTEP or doing hardware watchpoints in GDB.  So ..
-	 * we wake it up and hope that all is well.
-	 */
-#ifdef CONFIG_SUPERH32
-	if (raw_smp_processor_id() == 0)
-		ubc_wakeup();
-#endif
-
 	speculative_execution_init();
 	expmask_init();
 }
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index 6df2fb98eb30..d395ce5740e7 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -25,14 +25,12 @@
 
 /*
  * Save FPU registers onto task structure.
- * Assume called with FPU enabled (SR.FD=0).
  */
 void
-save_fpu(struct task_struct *tsk, struct pt_regs *regs)
+save_fpu(struct task_struct *tsk)
 {
 	unsigned long dummy;
 
-	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
 	enable_fpu();
 	asm volatile("sts.l	fpul, @-%0\n\t"
 		     "sts.l	fpscr, @-%0\n\t"
@@ -60,7 +58,6 @@ save_fpu(struct task_struct *tsk, struct pt_regs *regs)
 		     : "memory");
 
 	disable_fpu();
-	release_fpu(regs);
 }
 
 static void
@@ -598,31 +595,31 @@ BUILD_TRAP_HANDLER(fpu_error)
 	struct task_struct *tsk = current;
 	TRAP_HANDLER_DECL;
 
-	save_fpu(tsk, regs);
+	__unlazy_fpu(tsk, regs);
 	if (ieee_fpe_handler(regs)) {
 		tsk->thread.fpu.hard.fpscr &=
 			~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 		grab_fpu(regs);
 		restore_fpu(tsk);
-		set_tsk_thread_flag(tsk, TIF_USEDFPU);
+		task_thread_info(tsk)->status |= TS_USEDFPU;
 		return;
 	}
 
 	force_sig(SIGFPE, tsk);
 }
 
-BUILD_TRAP_HANDLER(fpu_state_restore)
+void fpu_state_restore(struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
-	TRAP_HANDLER_DECL;
 
 	grab_fpu(regs);
-	if (!user_mode(regs)) {
+	if (unlikely(!user_mode(regs))) {
 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
+		BUG();
 		return;
 	}
 
-	if (used_math()) {
+	if (likely(used_math())) {
 		/* Using the FPU again.  */
 		restore_fpu(tsk);
 	} else	{
@@ -630,5 +627,13 @@ BUILD_TRAP_HANDLER(fpu_state_restore)
 		fpu_init();
 		set_used_math();
 	}
-	set_tsk_thread_flag(tsk, TIF_USEDFPU);
+	task_thread_info(tsk)->status |= TS_USEDFPU;
+	tsk->fpu_counter++;
+}
+
+BUILD_TRAP_HANDLER(fpu_state_restore)
+{
+	TRAP_HANDLER_DECL;
+
+	fpu_state_restore(regs);
 }
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index bb407ef0b91e..3f7e2a22c7c2 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -297,41 +297,8 @@ ENTRY(vbr_base)
 !
 	.balign 	256,0,256
 general_exception:
-#ifndef CONFIG_CPU_SUBTYPE_SHX3
 	bra	handle_exception
 	 sts	pr, k3		! save original pr value in k3
-#else
-	mov.l	1f, k4
-	mov.l	@k4, k4
-
-	! Is EXPEVT larger than 0x800?
-	mov	#0x8, k0
-	shll8	k0
-	cmp/hs	k0, k4
-	bf	0f
-
-	! then add 0x580 (k2 is 0xd80 or 0xda0)
-	mov	#0x58, k0
-	shll2	k0
-	shll2	k0
-	add	k0, k4
-0:
-	! Setup stack and save DSP context (k0 contains original r15 on return)
-	bsr	prepare_stack
-	 nop
-
-	! Save registers / Switch to bank 0
-	mov		k4, k2		! keep vector in k2
-	mov.l	1f, k4		! SR bits to clear in k4
-	bsr	save_regs	! needs original pr value in k3
-	 nop
-
-	bra	handle_exception_special
-	 nop
-
-	.align	2
-1:	.long	EXPEVT
-#endif
 
 ! prepare_stack()
 ! - roll back gRB
diff --git a/arch/sh/kernel/cpu/sh4/Makefile b/arch/sh/kernel/cpu/sh4/Makefile
index 203b18347b83..3a1dbc709831 100644
--- a/arch/sh/kernel/cpu/sh4/Makefile
+++ b/arch/sh/kernel/cpu/sh4/Makefile
@@ -9,6 +9,11 @@ obj-$(CONFIG_HIBERNATION)		+= $(addprefix ../sh3/, swsusp.o)
 obj-$(CONFIG_SH_FPU)			+= fpu.o softfloat.o
 obj-$(CONFIG_SH_STORE_QUEUES)		+= sq.o
 
+# Perf events
+perf-$(CONFIG_CPU_SUBTYPE_SH7750)	:= perf_event.o
+perf-$(CONFIG_CPU_SUBTYPE_SH7750S)	:= perf_event.o
+perf-$(CONFIG_CPU_SUBTYPE_SH7091)	:= perf_event.o
+
 # CPU subtype setup
 obj-$(CONFIG_CPU_SUBTYPE_SH7750)	+= setup-sh7750.o
 obj-$(CONFIG_CPU_SUBTYPE_SH7750R)	+= setup-sh7750.o
@@ -27,4 +32,5 @@ endif
 # Additional clocks by subtype
 clock-$(CONFIG_CPU_SUBTYPE_SH4_202)	+= clock-sh4-202.o
 
-obj-y	+= $(clock-y)
+obj-y					+= $(clock-y)
+obj-$(CONFIG_PERF_EVENTS)		+= $(perf-y)
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index e3ea5411da6d..e97857aec8a0 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -41,13 +41,11 @@ static unsigned int fpu_exception_flags;
 
 /*
  * Save FPU registers onto task structure.
- * Assume called with FPU enabled (SR.FD=0).
  */
-void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
+void save_fpu(struct task_struct *tsk)
 {
 	unsigned long dummy;
 
-	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
 	enable_fpu();
 	asm volatile ("sts.l	fpul, @-%0\n\t"
 		      "sts.l	fpscr, @-%0\n\t"
@@ -92,7 +90,6 @@ void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
 		      :"memory");
 
 	disable_fpu();
-	release_fpu(regs);
 }
 
 static void restore_fpu(struct task_struct *tsk)
@@ -285,7 +282,6 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 		/* fcnvsd */
 		struct task_struct *tsk = current;
 
-		save_fpu(tsk, regs);
 		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
 			/* FPU error */
 			denormal_to_double(&tsk->thread.fpu.hard,
@@ -462,7 +458,7 @@ BUILD_TRAP_HANDLER(fpu_error)
 	struct task_struct *tsk = current;
 	TRAP_HANDLER_DECL;
 
-	save_fpu(tsk, regs);
+	__unlazy_fpu(tsk, regs);
 	fpu_exception_flags = 0;
 	if (ieee_fpe_handler(regs)) {
 		tsk->thread.fpu.hard.fpscr &=
@@ -473,7 +469,7 @@ BUILD_TRAP_HANDLER(fpu_error)
 		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
 		grab_fpu(regs);
 		restore_fpu(tsk);
-		set_tsk_thread_flag(tsk, TIF_USEDFPU);
+		task_thread_info(tsk)->status |= TS_USEDFPU;
 		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
 		     (fpu_exception_flags >> 2)) == 0) {
 			return;
@@ -483,18 +479,18 @@ BUILD_TRAP_HANDLER(fpu_error)
 	force_sig(SIGFPE, tsk);
 }
 
-BUILD_TRAP_HANDLER(fpu_state_restore)
+void fpu_state_restore(struct pt_regs *regs)
 {
 	struct task_struct *tsk = current;
-	TRAP_HANDLER_DECL;
 
 	grab_fpu(regs);
-	if (!user_mode(regs)) {
+	if (unlikely(!user_mode(regs))) {
 		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
+		BUG();
 		return;
 	}
 
-	if (used_math()) {
+	if (likely(used_math())) {
 		/* Using the FPU again.  */
 		restore_fpu(tsk);
 	} else {
@@ -502,5 +498,13 @@ BUILD_TRAP_HANDLER(fpu_state_restore)
 		fpu_init();
 		set_used_math();
 	}
-	set_tsk_thread_flag(tsk, TIF_USEDFPU);
+	task_thread_info(tsk)->status |= TS_USEDFPU;
+	tsk->fpu_counter++;
+}
+
+BUILD_TRAP_HANDLER(fpu_state_restore)
+{
+	TRAP_HANDLER_DECL;
+
+	fpu_state_restore(regs);
 }
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
new file mode 100644
index 000000000000..7f9ecc9c2d02
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -0,0 +1,253 @@
+/*
+ * Performance events support for SH7750-style performance counters
+ *
+ *  Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/perf_event.h>
+#include <asm/processor.h>
+
+#define PM_CR_BASE	0xff000084	/* 16-bit */
+#define PM_CTR_BASE	0xff100004	/* 32-bit */
+
+#define PMCR(n)		(PM_CR_BASE + ((n) * 0x04))
+#define PMCTRH(n)	(PM_CTR_BASE + 0x00 + ((n) * 0x08))
+#define PMCTRL(n)	(PM_CTR_BASE + 0x04 + ((n) * 0x08))
+
+#define PMCR_PMM_MASK	0x0000003f
+
+#define PMCR_CLKF	0x00000100
+#define PMCR_PMCLR	0x00002000
+#define PMCR_PMST	0x00004000
+#define PMCR_PMEN	0x00008000
+
+static struct sh_pmu sh7750_pmu;
+
+/*
+ * There are a number of events supported by each counter (33 in total).
+ * Since we have 2 counters, each counter will take the event code as it
+ * corresponds to the PMCR PMM setting. Each counter can be configured
+ * independently.
+ *
+ *	Event Code	Description
+ *	----------	-----------
+ *
+ *	0x01		Operand read access
+ *	0x02		Operand write access
+ *	0x03		UTLB miss
+ *	0x04		Operand cache read miss
+ *	0x05		Operand cache write miss
+ *	0x06		Instruction fetch (w/ cache)
+ *	0x07		Instruction TLB miss
+ *	0x08		Instruction cache miss
+ *	0x09		All operand accesses
+ *	0x0a		All instruction accesses
+ *	0x0b		OC RAM operand access
+ *	0x0d		On-chip I/O space access
+ *	0x0e		Operand access (r/w)
+ *	0x0f		Operand cache miss (r/w)
+ *	0x10		Branch instruction
+ *	0x11		Branch taken
+ *	0x12		BSR/BSRF/JSR
+ *	0x13		Instruction execution
+ *	0x14		Instruction execution in parallel
+ *	0x15		FPU Instruction execution
+ *	0x16		Interrupt
+ *	0x17		NMI
+ *	0x18		trapa instruction execution
+ *	0x19		UBCA match
+ *	0x1a		UBCB match
+ *	0x21		Instruction cache fill
+ *	0x22		Operand cache fill
+ *	0x23		Elapsed time
+ *	0x24		Pipeline freeze by I-cache miss
+ *	0x25		Pipeline freeze by D-cache miss
+ *	0x27		Pipeline freeze by branch instruction
+ *	0x28		Pipeline freeze by CPU register
+ *	0x29		Pipeline freeze by FPU
+ */
+
+static const int sh7750_general_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0023,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x000a,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0006,	/* I-cache */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x0008,	/* I-cache */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x0010,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= -1,
+	[PERF_COUNT_HW_BUS_CYCLES]		= -1,
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+static const int sh7750_cache_events
+			[PERF_COUNT_HW_CACHE_MAX]
+			[PERF_COUNT_HW_CACHE_OP_MAX]
+			[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+	[ C(L1D) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0001,
+			[ C(RESULT_MISS)   ] = 0x0004,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0002,
+			[ C(RESULT_MISS)   ] = 0x0005,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(L1I) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0006,
+			[ C(RESULT_MISS)   ] = 0x0008,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(LL) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(DTLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0x0003,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(ITLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0x0007,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+
+	[ C(BPU) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+};
+
+static int sh7750_event_map(int event)
+{
+	return sh7750_general_events[event];
+}
+
+static u64 sh7750_pmu_read(int idx)
+{
+	return (u64)((u64)(__raw_readl(PMCTRH(idx)) & 0xffff) << 32) |
+			   __raw_readl(PMCTRL(idx));
+}
+
+static void sh7750_pmu_disable(struct hw_perf_event *hwc, int idx)
+{
+	unsigned int tmp;
+
+	tmp = __raw_readw(PMCR(idx));
+	tmp &= ~(PMCR_PMM_MASK | PMCR_PMEN);
+	__raw_writew(tmp, PMCR(idx));
+}
+
+static void sh7750_pmu_enable(struct hw_perf_event *hwc, int idx)
+{
+	__raw_writew(__raw_readw(PMCR(idx)) | PMCR_PMCLR, PMCR(idx));
+	__raw_writew(hwc->config | PMCR_PMEN | PMCR_PMST, PMCR(idx));
+}
+
+static void sh7750_pmu_disable_all(void)
+{
+	int i;
+
+	for (i = 0; i < sh7750_pmu.num_events; i++)
+		__raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i));
+}
+
+static void sh7750_pmu_enable_all(void)
+{
+	int i;
+
+	for (i = 0; i < sh7750_pmu.num_events; i++)
+		__raw_writew(__raw_readw(PMCR(i)) | PMCR_PMEN, PMCR(i));
+}
+
+static struct sh_pmu sh7750_pmu = {
+	.name		= "SH7750",
+	.num_events	= 2,
+	.event_map	= sh7750_event_map,
+	.max_events	= ARRAY_SIZE(sh7750_general_events),
+	.raw_event_mask	= PMCR_PMM_MASK,
+	.cache_events	= &sh7750_cache_events,
+	.read		= sh7750_pmu_read,
+	.disable	= sh7750_pmu_disable,
+	.enable		= sh7750_pmu_enable,
+	.disable_all	= sh7750_pmu_disable_all,
+	.enable_all	= sh7750_pmu_enable_all,
+};
+
+static int __init sh7750_pmu_init(void)
+{
+	/*
+	 * Make sure this CPU actually has perf counters.
+	 */
+	if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
+		pr_notice("HW perf events unsupported, software events only.\n");
+		return -ENODEV;
+	}
+
+	return register_sh_pmu(&sh7750_pmu);
+}
+arch_initcall(sh7750_pmu_init);
diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile
index 490d5dc9e372..33bab477d2e2 100644
--- a/arch/sh/kernel/cpu/sh4a/Makefile
+++ b/arch/sh/kernel/cpu/sh4a/Makefile
@@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786)	:= pinmux-sh7786.o
 obj-y				+= $(clock-y)
 obj-$(CONFIG_SMP)		+= $(smp-y)
 obj-$(CONFIG_GENERIC_GPIO)	+= $(pinmux-y)
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index dfe9192be63e..9db743802f06 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -152,7 +152,7 @@ struct clk div6_clks[] = {
 	SH_CLK_DIV6("fsia_clk", &div3_clk, FCLKACR, 0),
 	SH_CLK_DIV6("fsib_clk", &div3_clk, FCLKBCR, 0),
 	SH_CLK_DIV6("irda_clk", &div3_clk, IRDACLKCR, 0),
-	SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, 0),
+	SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, CLK_ENABLE_ON_INIT),
 };
 
 #define R_CLK (&r_clk)
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
new file mode 100644
index 000000000000..eddc21973fa1
--- /dev/null
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -0,0 +1,269 @@
+/*
+ * Performance events support for SH-4A performance counters
+ *
+ *  Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/perf_event.h>
+#include <asm/processor.h>
+
+#define PPC_CCBR(idx)	(0xff200800 + (sizeof(u32) * idx))
+#define PPC_PMCTR(idx)	(0xfc100000 + (sizeof(u32) * idx))
+
+#define CCBR_CIT_MASK	(0x7ff << 6)
+#define CCBR_DUC	(1 << 3)
+#define CCBR_CMDS	(1 << 1)
+#define CCBR_PPCE	(1 << 0)
+
+#define PPC_PMCAT	0xfc100080
+
+#define PMCAT_OVF3	(1 << 27)
+#define PMCAT_CNN3	(1 << 26)
+#define PMCAT_CLR3	(1 << 25)
+#define PMCAT_OVF2	(1 << 19)
+#define PMCAT_CLR2	(1 << 17)
+#define PMCAT_OVF1	(1 << 11)
+#define PMCAT_CNN1	(1 << 10)
+#define PMCAT_CLR1	(1 << 9)
+#define PMCAT_OVF0	(1 << 3)
+#define PMCAT_CLR0	(1 << 1)
+
+static struct sh_pmu sh4a_pmu;
+
+/*
+ * Supported raw event codes:
+ *
+ *	Event Code	Description
+ *	----------	-----------
+ *
+ *	0x0000		number of elapsed cycles
+ *	0x0200		number of elapsed cycles in privileged mode
+ *	0x0280		number of elapsed cycles while SR.BL is asserted
+ *	0x0202		instruction execution
+ *	0x0203		instruction execution in parallel
+ *	0x0204		number of unconditional branches
+ *	0x0208		number of exceptions
+ *	0x0209		number of interrupts
+ *	0x0220		UTLB miss caused by instruction fetch
+ *	0x0222		UTLB miss caused by operand access
+ *	0x02a0		number of ITLB misses
+ *	0x0028		number of accesses to instruction memories
+ *	0x0029		number of accesses to instruction cache
+ *	0x002a		instruction cache miss
+ *	0x022e		number of access to instruction X/Y memory
+ *	0x0030		number of reads to operand memories
+ *	0x0038		number of writes to operand memories
+ *	0x0031		number of operand cache read accesses
+ *	0x0039		number of operand cache write accesses
+ *	0x0032		operand cache read miss
+ *	0x003a		operand cache write miss
+ *	0x0236		number of reads to operand X/Y memory
+ *	0x023e		number of writes to operand X/Y memory
+ *	0x0237		number of reads to operand U memory
+ *	0x023f		number of writes to operand U memory
+ *	0x0337		number of U memory read buffer misses
+ *	0x02b4		number of wait cycles due to operand read access
+ *	0x02bc		number of wait cycles due to operand write access
+ *	0x0033		number of wait cycles due to operand cache read miss
+ *	0x003b		number of wait cycles due to operand cache write miss
+ */
+
+/*
+ * Special reserved bits used by hardware emulators, read values will
+ * vary, but writes must always be 0.
+ */
+#define PMCAT_EMU_CLR_MASK	((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0))
+
+static const int sh4a_general_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]		= 0x0000,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x0202,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0029,	/* I-cache */
+	[PERF_COUNT_HW_CACHE_MISSES]		= 0x002a,	/* I-cache */
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x0204,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= -1,
+	[PERF_COUNT_HW_BUS_CYCLES]		= -1,
+};
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+static const int sh4a_cache_events
+			[PERF_COUNT_HW_CACHE_MAX]
+			[PERF_COUNT_HW_CACHE_OP_MAX]
+			[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+	[ C(L1D) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0031,
+			[ C(RESULT_MISS)   ] = 0x0032,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0039,
+			[ C(RESULT_MISS)   ] = 0x003a,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(L1I) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0029,
+			[ C(RESULT_MISS)   ] = 0x002a,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(LL) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0030,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0038,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(DTLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0x0222,
+			[ C(RESULT_MISS)   ] = 0x0220,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0,
+		},
+	},
+
+	[ C(ITLB) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = 0,
+			[ C(RESULT_MISS)   ] = 0x02a0,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+
+	[ C(BPU) ] = {
+		[ C(OP_READ) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_WRITE) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+		[ C(OP_PREFETCH) ] = {
+			[ C(RESULT_ACCESS) ] = -1,
+			[ C(RESULT_MISS)   ] = -1,
+		},
+	},
+};
+
+static int sh4a_event_map(int event)
+{
+	return sh4a_general_events[event];
+}
+
+static u64 sh4a_pmu_read(int idx)
+{
+	return __raw_readl(PPC_PMCTR(idx));
+}
+
+static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx)
+{
+	unsigned int tmp;
+
+	tmp = __raw_readl(PPC_CCBR(idx));
+	tmp &= ~(CCBR_CIT_MASK | CCBR_DUC);
+	__raw_writel(tmp, PPC_CCBR(idx));
+}
+
+static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx)
+{
+	unsigned int tmp;
+
+	tmp = __raw_readl(PPC_PMCAT);
+	tmp &= ~PMCAT_EMU_CLR_MASK;
+	tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0;
+	__raw_writel(tmp, PPC_PMCAT);
+
+	tmp = __raw_readl(PPC_CCBR(idx));
+	tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE;
+	__raw_writel(tmp, PPC_CCBR(idx));
+
+	__raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx));
+}
+
+static void sh4a_pmu_disable_all(void)
+{
+	int i;
+
+	for (i = 0; i < sh4a_pmu.num_events; i++)
+		__raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i));
+}
+
+static void sh4a_pmu_enable_all(void)
+{
+	int i;
+
+	for (i = 0; i < sh4a_pmu.num_events; i++)
+		__raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i));
+}
+
+static struct sh_pmu sh4a_pmu = {
+	.name		= "SH-4A",
+	.num_events	= 2,
+	.event_map	= sh4a_event_map,
+	.max_events	= ARRAY_SIZE(sh4a_general_events),
+	.raw_event_mask	= 0x3ff,
+	.cache_events	= &sh4a_cache_events,
+	.read		= sh4a_pmu_read,
+	.disable	= sh4a_pmu_disable,
+	.enable		= sh4a_pmu_enable,
+	.disable_all	= sh4a_pmu_disable_all,
+	.enable_all	= sh4a_pmu_enable_all,
+};
+
+static int __init sh4a_pmu_init(void)
+{
+	/*
+	 * Make sure this CPU actually has perf counters.
+	 */
+	if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
+		pr_notice("HW perf events unsupported, software events only.\n");
+		return -ENODEV;
+	}
+
+	return register_sh_pmu(&sh4a_pmu);
+}
+arch_initcall(sh4a_pmu_init);
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index f3851fd757ec..845e89c936e7 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -20,6 +20,8 @@
 #include <linux/uio_driver.h>
 #include <linux/sh_timer.h>
 #include <linux/io.h>
+#include <linux/notifier.h>
+#include <asm/suspend.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
 #include <cpu/sh7724.h>
@@ -202,7 +204,7 @@ static struct resource veu0_resources[] = {
 	[0] = {
 		.name	= "VEU3F0",
 		.start	= 0xfe920000,
-		.end	= 0xfe9200cb - 1,
+		.end	= 0xfe9200cb,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
@@ -234,7 +236,7 @@ static struct resource veu1_resources[] = {
 	[0] = {
 		.name	= "VEU3F1",
 		.start	= 0xfe924000,
-		.end	= 0xfe9240cb - 1,
+		.end	= 0xfe9240cb,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
@@ -523,6 +525,70 @@ static struct platform_device jpu_device = {
 	},
 };
 
+/* SPU2DSP0 */
+static struct uio_info spu0_platform_data = {
+	.name = "SPU2DSP0",
+	.version = "0",
+	.irq = 86,
+};
+
+static struct resource spu0_resources[] = {
+	[0] = {
+		.name	= "SPU2DSP0",
+		.start	= 0xFE200000,
+		.end	= 0xFE2FFFFF,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device spu0_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 4,
+	.dev = {
+		.platform_data	= &spu0_platform_data,
+	},
+	.resource	= spu0_resources,
+	.num_resources	= ARRAY_SIZE(spu0_resources),
+	.archdata = {
+		.hwblk_id = HWBLK_SPU,
+	},
+};
+
+/* SPU2DSP1 */
+static struct uio_info spu1_platform_data = {
+	.name = "SPU2DSP1",
+	.version = "0",
+	.irq = 87,
+};
+
+static struct resource spu1_resources[] = {
+	[0] = {
+		.name	= "SPU2DSP1",
+		.start	= 0xFE300000,
+		.end	= 0xFE3FFFFF,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		/* place holder for contiguous memory */
+	},
+};
+
+static struct platform_device spu1_device = {
+	.name		= "uio_pdrv_genirq",
+	.id		= 5,
+	.dev = {
+		.platform_data	= &spu1_platform_data,
+	},
+	.resource	= spu1_resources,
+	.num_resources	= ARRAY_SIZE(spu1_resources),
+	.archdata = {
+		.hwblk_id = HWBLK_SPU,
+	},
+};
+
 static struct platform_device *sh7724_devices[] __initdata = {
 	&cmt_device,
 	&tmu0_device,
@@ -539,6 +605,8 @@ static struct platform_device *sh7724_devices[] __initdata = {
 	&veu0_device,
 	&veu1_device,
 	&jpu_device,
+	&spu0_device,
+	&spu1_device,
 };
 
 static int __init sh7724_devices_setup(void)
@@ -547,6 +615,8 @@ static int __init sh7724_devices_setup(void)
 	platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20);
 	platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20);
 	platform_resource_setup_memory(&jpu_device,  "jpu",  2 << 20);
+	platform_resource_setup_memory(&spu0_device, "spu0", 2 << 20);
+	platform_resource_setup_memory(&spu1_device, "spu1", 2 << 20);
 
 	return platform_add_devices(sh7724_devices,
 				    ARRAY_SIZE(sh7724_devices));
@@ -827,3 +897,193 @@ void __init plat_irq_setup(void)
 {
 	register_intc_controller(&intc_desc);
 }
+
+static struct {
+	/* BSC */
+	unsigned long mmselr;
+	unsigned long cs0bcr;
+	unsigned long cs4bcr;
+	unsigned long cs5abcr;
+	unsigned long cs5bbcr;
+	unsigned long cs6abcr;
+	unsigned long cs6bbcr;
+	unsigned long cs4wcr;
+	unsigned long cs5awcr;
+	unsigned long cs5bwcr;
+	unsigned long cs6awcr;
+	unsigned long cs6bwcr;
+	/* INTC */
+	unsigned short ipra;
+	unsigned short iprb;
+	unsigned short iprc;
+	unsigned short iprd;
+	unsigned short ipre;
+	unsigned short iprf;
+	unsigned short iprg;
+	unsigned short iprh;
+	unsigned short ipri;
+	unsigned short iprj;
+	unsigned short iprk;
+	unsigned short iprl;
+	unsigned char imr0;
+	unsigned char imr1;
+	unsigned char imr2;
+	unsigned char imr3;
+	unsigned char imr4;
+	unsigned char imr5;
+	unsigned char imr6;
+	unsigned char imr7;
+	unsigned char imr8;
+	unsigned char imr9;
+	unsigned char imr10;
+	unsigned char imr11;
+	unsigned char imr12;
+	/* RWDT */
+	unsigned short rwtcnt;
+	unsigned short rwtcsr;
+	/* CPG */
+	unsigned long irdaclk;
+	unsigned long spuclk;
+} sh7724_rstandby_state;
+
+static int sh7724_pre_sleep_notifier_call(struct notifier_block *nb,
+					  unsigned long flags, void *unused)
+{
+	if (!(flags & SUSP_SH_RSTANDBY))
+		return NOTIFY_DONE;
+
+	/* BCR */
+	sh7724_rstandby_state.mmselr = __raw_readl(0xff800020); /* MMSELR */
+	sh7724_rstandby_state.mmselr |= 0xa5a50000;
+	sh7724_rstandby_state.cs0bcr = __raw_readl(0xfec10004); /* CS0BCR */
+	sh7724_rstandby_state.cs4bcr = __raw_readl(0xfec10010); /* CS4BCR */
+	sh7724_rstandby_state.cs5abcr = __raw_readl(0xfec10014); /* CS5ABCR */
+	sh7724_rstandby_state.cs5bbcr = __raw_readl(0xfec10018); /* CS5BBCR */
+	sh7724_rstandby_state.cs6abcr = __raw_readl(0xfec1001c); /* CS6ABCR */
+	sh7724_rstandby_state.cs6bbcr = __raw_readl(0xfec10020); /* CS6BBCR */
+	sh7724_rstandby_state.cs4wcr = __raw_readl(0xfec10030); /* CS4WCR */
+	sh7724_rstandby_state.cs5awcr = __raw_readl(0xfec10034); /* CS5AWCR */
+	sh7724_rstandby_state.cs5bwcr = __raw_readl(0xfec10038); /* CS5BWCR */
+	sh7724_rstandby_state.cs6awcr = __raw_readl(0xfec1003c); /* CS6AWCR */
+	sh7724_rstandby_state.cs6bwcr = __raw_readl(0xfec10040); /* CS6BWCR */
+
+	/* INTC */
+	sh7724_rstandby_state.ipra = __raw_readw(0xa4080000); /* IPRA */
+	sh7724_rstandby_state.iprb = __raw_readw(0xa4080004); /* IPRB */
+	sh7724_rstandby_state.iprc = __raw_readw(0xa4080008); /* IPRC */
+	sh7724_rstandby_state.iprd = __raw_readw(0xa408000c); /* IPRD */
+	sh7724_rstandby_state.ipre = __raw_readw(0xa4080010); /* IPRE */
+	sh7724_rstandby_state.iprf = __raw_readw(0xa4080014); /* IPRF */
+	sh7724_rstandby_state.iprg = __raw_readw(0xa4080018); /* IPRG */
+	sh7724_rstandby_state.iprh = __raw_readw(0xa408001c); /* IPRH */
+	sh7724_rstandby_state.ipri = __raw_readw(0xa4080020); /* IPRI */
+	sh7724_rstandby_state.iprj = __raw_readw(0xa4080024); /* IPRJ */
+	sh7724_rstandby_state.iprk = __raw_readw(0xa4080028); /* IPRK */
+	sh7724_rstandby_state.iprl = __raw_readw(0xa408002c); /* IPRL */
+	sh7724_rstandby_state.imr0 = __raw_readb(0xa4080080); /* IMR0 */
+	sh7724_rstandby_state.imr1 = __raw_readb(0xa4080084); /* IMR1 */
+	sh7724_rstandby_state.imr2 = __raw_readb(0xa4080088); /* IMR2 */
+	sh7724_rstandby_state.imr3 = __raw_readb(0xa408008c); /* IMR3 */
+	sh7724_rstandby_state.imr4 = __raw_readb(0xa4080090); /* IMR4 */
+	sh7724_rstandby_state.imr5 = __raw_readb(0xa4080094); /* IMR5 */
+	sh7724_rstandby_state.imr6 = __raw_readb(0xa4080098); /* IMR6 */
+	sh7724_rstandby_state.imr7 = __raw_readb(0xa408009c); /* IMR7 */
+	sh7724_rstandby_state.imr8 = __raw_readb(0xa40800a0); /* IMR8 */
+	sh7724_rstandby_state.imr9 = __raw_readb(0xa40800a4); /* IMR9 */
+	sh7724_rstandby_state.imr10 = __raw_readb(0xa40800a8); /* IMR10 */
+	sh7724_rstandby_state.imr11 = __raw_readb(0xa40800ac); /* IMR11 */
+	sh7724_rstandby_state.imr12 = __raw_readb(0xa40800b0); /* IMR12 */
+
+	/* RWDT */
+	sh7724_rstandby_state.rwtcnt = __raw_readb(0xa4520000); /* RWTCNT */
+	sh7724_rstandby_state.rwtcnt |= 0x5a00;
+	sh7724_rstandby_state.rwtcsr = __raw_readb(0xa4520004); /* RWTCSR */
+	sh7724_rstandby_state.rwtcsr |= 0xa500;
+	__raw_writew(sh7724_rstandby_state.rwtcsr & 0x07, 0xa4520004);
+
+	/* CPG */
+	sh7724_rstandby_state.irdaclk = __raw_readl(0xa4150018); /* IRDACLKCR */
+	sh7724_rstandby_state.spuclk = __raw_readl(0xa415003c); /* SPUCLKCR */
+
+	return NOTIFY_DONE;
+}
+
+static int sh7724_post_sleep_notifier_call(struct notifier_block *nb,
+					   unsigned long flags, void *unused)
+{
+	if (!(flags & SUSP_SH_RSTANDBY))
+		return NOTIFY_DONE;
+
+	/* BCR */
+	__raw_writel(sh7724_rstandby_state.mmselr, 0xff800020); /* MMSELR */
+	__raw_writel(sh7724_rstandby_state.cs0bcr, 0xfec10004); /* CS0BCR */
+	__raw_writel(sh7724_rstandby_state.cs4bcr, 0xfec10010); /* CS4BCR */
+	__raw_writel(sh7724_rstandby_state.cs5abcr, 0xfec10014); /* CS5ABCR */
+	__raw_writel(sh7724_rstandby_state.cs5bbcr, 0xfec10018); /* CS5BBCR */
+	__raw_writel(sh7724_rstandby_state.cs6abcr, 0xfec1001c); /* CS6ABCR */
+	__raw_writel(sh7724_rstandby_state.cs6bbcr, 0xfec10020); /* CS6BBCR */
+	__raw_writel(sh7724_rstandby_state.cs4wcr, 0xfec10030); /* CS4WCR */
+	__raw_writel(sh7724_rstandby_state.cs5awcr, 0xfec10034); /* CS5AWCR */
+	__raw_writel(sh7724_rstandby_state.cs5bwcr, 0xfec10038); /* CS5BWCR */
+	__raw_writel(sh7724_rstandby_state.cs6awcr, 0xfec1003c); /* CS6AWCR */
+	__raw_writel(sh7724_rstandby_state.cs6bwcr, 0xfec10040); /* CS6BWCR */
+
+	/* INTC */
+	__raw_writew(sh7724_rstandby_state.ipra, 0xa4080000); /* IPRA */
+	__raw_writew(sh7724_rstandby_state.iprb, 0xa4080004); /* IPRB */
+	__raw_writew(sh7724_rstandby_state.iprc, 0xa4080008); /* IPRC */
+	__raw_writew(sh7724_rstandby_state.iprd, 0xa408000c); /* IPRD */
+	__raw_writew(sh7724_rstandby_state.ipre, 0xa4080010); /* IPRE */
+	__raw_writew(sh7724_rstandby_state.iprf, 0xa4080014); /* IPRF */
+	__raw_writew(sh7724_rstandby_state.iprg, 0xa4080018); /* IPRG */
+	__raw_writew(sh7724_rstandby_state.iprh, 0xa408001c); /* IPRH */
+	__raw_writew(sh7724_rstandby_state.ipri, 0xa4080020); /* IPRI */
+	__raw_writew(sh7724_rstandby_state.iprj, 0xa4080024); /* IPRJ */
+	__raw_writew(sh7724_rstandby_state.iprk, 0xa4080028); /* IPRK */
+	__raw_writew(sh7724_rstandby_state.iprl, 0xa408002c); /* IPRL */
+	__raw_writeb(sh7724_rstandby_state.imr0, 0xa4080080); /* IMR0 */
+	__raw_writeb(sh7724_rstandby_state.imr1, 0xa4080084); /* IMR1 */
+	__raw_writeb(sh7724_rstandby_state.imr2, 0xa4080088); /* IMR2 */
+	__raw_writeb(sh7724_rstandby_state.imr3, 0xa408008c); /* IMR3 */
+	__raw_writeb(sh7724_rstandby_state.imr4, 0xa4080090); /* IMR4 */
+	__raw_writeb(sh7724_rstandby_state.imr5, 0xa4080094); /* IMR5 */
+	__raw_writeb(sh7724_rstandby_state.imr6, 0xa4080098); /* IMR6 */
+	__raw_writeb(sh7724_rstandby_state.imr7, 0xa408009c); /* IMR7 */
+	__raw_writeb(sh7724_rstandby_state.imr8, 0xa40800a0); /* IMR8 */
+	__raw_writeb(sh7724_rstandby_state.imr9, 0xa40800a4); /* IMR9 */
+	__raw_writeb(sh7724_rstandby_state.imr10, 0xa40800a8); /* IMR10 */
+	__raw_writeb(sh7724_rstandby_state.imr11, 0xa40800ac); /* IMR11 */
+	__raw_writeb(sh7724_rstandby_state.imr12, 0xa40800b0); /* IMR12 */
+
+	/* RWDT */
+	__raw_writew(sh7724_rstandby_state.rwtcnt, 0xa4520000); /* RWTCNT */
+	__raw_writew(sh7724_rstandby_state.rwtcsr, 0xa4520004); /* RWTCSR */
+
+	/* CPG */
+	__raw_writel(sh7724_rstandby_state.irdaclk, 0xa4150018); /* IRDACLKCR */
+	__raw_writel(sh7724_rstandby_state.spuclk, 0xa415003c); /* SPUCLKCR */
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block sh7724_pre_sleep_notifier = {
+	.notifier_call = sh7724_pre_sleep_notifier_call,
+	.priority = SH_MOBILE_PRE(SH_MOBILE_SLEEP_CPU),
+};
+
+static struct notifier_block sh7724_post_sleep_notifier = {
+	.notifier_call = sh7724_post_sleep_notifier_call,
+	.priority = SH_MOBILE_POST(SH_MOBILE_SLEEP_CPU),
+};
+
+static int __init sh7724_sleep_setup(void)
+{
+	atomic_notifier_chain_register(&sh_mobile_pre_sleep_notifier_list,
+				       &sh7724_pre_sleep_notifier);
+
+	atomic_notifier_chain_register(&sh_mobile_post_sleep_notifier_list,
+				       &sh7724_post_sleep_notifier);
+	return 0;
+}
+arch_initcall(sh7724_sleep_setup);
+
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index e848443deeb9..c7ba9166e18a 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -15,6 +15,15 @@
 #include <linux/sh_timer.h>
 #include <asm/mmzone.h>
 
+/*
+ * This intentionally only registers SCIF ports 0, 1, and 3. SCIF 2
+ * INTEVT values overlap with the FPU EXPEVT ones, requiring special
+ * demuxing in the exception dispatch path.
+ *
+ * As this overlap is something that never should have made it in to
+ * silicon in the first place, we just refuse to deal with the port at
+ * all rather than adding infrastructure to hack around it.
+ */
 static struct plat_sci_port sci_platform_data[] = {
 	{
 		.mapbase	= 0xffc30000,
@@ -27,11 +36,6 @@ static struct plat_sci_port sci_platform_data[] = {
 		.type		= PORT_SCIF,
 		.irqs		= { 44, 45, 47, 46 },
 	}, {
-		.mapbase	= 0xffc50000,
-		.flags		= UPF_BOOT_AUTOCONF,
-		.type		= PORT_SCIF,
-		.irqs		= { 48, 49, 51, 50 },
-	}, {
 		.mapbase	= 0xffc60000,
 		.flags		= UPF_BOOT_AUTOCONF,
 		.type		= PORT_SCIF,
@@ -268,7 +272,11 @@ enum {
 	UNUSED = 0,
 
 	/* interrupt sources */
-	IRL, IRQ0, IRQ1, IRQ2, IRQ3,
+	IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+	IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+	IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+	IRL_HHLL, IRL_HHLH, IRL_HHHL,
+	IRQ0, IRQ1, IRQ2, IRQ3,
 	HUDII,
 	TMU0, TMU1, TMU2, TMU3, TMU4, TMU5,
 	PCII0, PCII1, PCII2, PCII3, PCII4,
@@ -291,7 +299,7 @@ enum {
 	INTICI4, INTICI5, INTICI6, INTICI7,
 
 	/* interrupt groups */
-	PCII56789, SCIF0, SCIF1, SCIF2, SCIF3,
+	IRL, PCII56789, SCIF0, SCIF1, SCIF2, SCIF3,
 	DMAC0, DMAC1,
 };
 
@@ -309,8 +317,6 @@ static struct intc_vect vectors[] __initdata = {
 	INTC_VECT(SCIF0_BRI, 0x740), INTC_VECT(SCIF0_TXI, 0x760),
 	INTC_VECT(SCIF1_ERI, 0x780), INTC_VECT(SCIF1_RXI, 0x7a0),
 	INTC_VECT(SCIF1_BRI, 0x7c0), INTC_VECT(SCIF1_TXI, 0x7e0),
-	INTC_VECT(SCIF2_ERI, 0x800), INTC_VECT(SCIF2_RXI, 0x820),
-	INTC_VECT(SCIF2_BRI, 0x840), INTC_VECT(SCIF2_TXI, 0x860),
 	INTC_VECT(SCIF3_ERI, 0x880), INTC_VECT(SCIF3_RXI, 0x8a0),
 	INTC_VECT(SCIF3_BRI, 0x8c0), INTC_VECT(SCIF3_TXI, 0x8e0),
 	INTC_VECT(DMAC0_DMINT0, 0x900), INTC_VECT(DMAC0_DMINT1, 0x920),
@@ -344,10 +350,13 @@ static struct intc_vect vectors[] __initdata = {
 };
 
 static struct intc_group groups[] __initdata = {
+	INTC_GROUP(IRL, IRL_LLLL, IRL_LLLH, IRL_LLHL, IRL_LLHH,
+		   IRL_LHLL, IRL_LHLH, IRL_LHHL, IRL_LHHH,
+		   IRL_HLLL, IRL_HLLH, IRL_HLHL, IRL_HLHH,
+		   IRL_HHLL, IRL_HHLH, IRL_HHHL),
 	INTC_GROUP(PCII56789, PCII5, PCII6, PCII7, PCII8, PCII9),
 	INTC_GROUP(SCIF0, SCIF0_ERI, SCIF0_RXI, SCIF0_BRI, SCIF0_TXI),
 	INTC_GROUP(SCIF1, SCIF1_ERI, SCIF1_RXI, SCIF1_BRI, SCIF1_TXI),
-	INTC_GROUP(SCIF2, SCIF2_ERI, SCIF2_RXI, SCIF2_BRI, SCIF2_TXI),
 	INTC_GROUP(SCIF3, SCIF3_ERI, SCIF3_RXI, SCIF3_BRI, SCIF3_TXI),
 	INTC_GROUP(DMAC0, DMAC0_DMINT0, DMAC0_DMINT1, DMAC0_DMINT2,
 		   DMAC0_DMINT3, DMAC0_DMINT4, DMAC0_DMINT5, DMAC0_DMAE),
@@ -419,14 +428,14 @@ static DECLARE_INTC_DESC(intc_desc_irq, "shx3-irq", vectors_irq, groups,
 
 /* External interrupt pins in IRL mode */
 static struct intc_vect vectors_irl[] __initdata = {
-	INTC_VECT(IRL, 0x200), INTC_VECT(IRL, 0x220),
-	INTC_VECT(IRL, 0x240), INTC_VECT(IRL, 0x260),
-	INTC_VECT(IRL, 0x280), INTC_VECT(IRL, 0x2a0),
-	INTC_VECT(IRL, 0x2c0), INTC_VECT(IRL, 0x2e0),
-	INTC_VECT(IRL, 0x300), INTC_VECT(IRL, 0x320),
-	INTC_VECT(IRL, 0x340), INTC_VECT(IRL, 0x360),
-	INTC_VECT(IRL, 0x380), INTC_VECT(IRL, 0x3a0),
-	INTC_VECT(IRL, 0x3c0),
+	INTC_VECT(IRL_LLLL, 0x200), INTC_VECT(IRL_LLLH, 0x220),
+	INTC_VECT(IRL_LLHL, 0x240), INTC_VECT(IRL_LLHH, 0x260),
+	INTC_VECT(IRL_LHLL, 0x280), INTC_VECT(IRL_LHLH, 0x2a0),
+	INTC_VECT(IRL_LHHL, 0x2c0), INTC_VECT(IRL_LHHH, 0x2e0),
+	INTC_VECT(IRL_HLLL, 0x300), INTC_VECT(IRL_HLLH, 0x320),
+	INTC_VECT(IRL_HLHL, 0x340), INTC_VECT(IRL_HLHH, 0x360),
+	INTC_VECT(IRL_HHLL, 0x380), INTC_VECT(IRL_HHLH, 0x3a0),
+	INTC_VECT(IRL_HHHL, 0x3c0),
 };
 
 static DECLARE_INTC_DESC(intc_desc_irl, "shx3-irl", vectors_irl, groups,
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index 185ec3976a25..5863e0c4d02f 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -14,6 +14,13 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 
+#define STBCR_REG(phys_id) (0xfe400004 | (phys_id << 12))
+#define RESET_REG(phys_id) (0xfe400008 | (phys_id << 12))
+
+#define STBCR_MSTP	0x00000001
+#define STBCR_RESET	0x00000002
+#define STBCR_LTSLP	0x80000000
+
 static irqreturn_t ipi_interrupt_handler(int irq, void *arg)
 {
 	unsigned int message = (unsigned int)(long)arg;
@@ -21,9 +28,9 @@ static irqreturn_t ipi_interrupt_handler(int irq, void *arg)
 	unsigned int offs = 4 * cpu;
 	unsigned int x;
 
-	x = ctrl_inl(0xfe410070 + offs); /* C0INITICI..CnINTICI */
+	x = __raw_readl(0xfe410070 + offs); /* C0INITICI..CnINTICI */
 	x &= (1 << (message << 2));
-	ctrl_outl(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */
+	__raw_writel(x, 0xfe410080 + offs); /* C0INTICICLR..CnINTICICLR */
 
 	smp_message_recv(message);
 
@@ -37,6 +44,9 @@ void __init plat_smp_setup(void)
 
 	init_cpu_possible(cpumask_of(cpu));
 
+	/* Enable light sleep for the boot CPU */
+	__raw_writel(__raw_readl(STBCR_REG(cpu)) | STBCR_LTSLP, STBCR_REG(cpu));
+
 	__cpu_number_map[0] = 0;
 	__cpu_logical_map[0] = 0;
 
@@ -66,32 +76,23 @@ void __init plat_prepare_cpus(unsigned int max_cpus)
 			    "IPI", (void *)(long)i);
 }
 
-#define STBCR_REG(phys_id) (0xfe400004 | (phys_id << 12))
-#define RESET_REG(phys_id) (0xfe400008 | (phys_id << 12))
-
-#define STBCR_MSTP	0x00000001
-#define STBCR_RESET	0x00000002
-#define STBCR_LTSLP	0x80000000
-
-#define STBCR_AP_VAL	(STBCR_RESET | STBCR_LTSLP)
-
 void plat_start_cpu(unsigned int cpu, unsigned long entry_point)
 {
-	ctrl_outl(entry_point, RESET_REG(cpu));
+	__raw_writel(entry_point, RESET_REG(cpu));
 
-	if (!(ctrl_inl(STBCR_REG(cpu)) & STBCR_MSTP))
-		ctrl_outl(STBCR_MSTP, STBCR_REG(cpu));
+	if (!(__raw_readl(STBCR_REG(cpu)) & STBCR_MSTP))
+		__raw_writel(STBCR_MSTP, STBCR_REG(cpu));
 
-	while (!(ctrl_inl(STBCR_REG(cpu)) & STBCR_MSTP))
+	while (!(__raw_readl(STBCR_REG(cpu)) & STBCR_MSTP))
 		cpu_relax();
 
 	/* Start up secondary processor by sending a reset */
-	ctrl_outl(STBCR_AP_VAL, STBCR_REG(cpu));
+	__raw_writel(STBCR_RESET | STBCR_LTSLP, STBCR_REG(cpu));
 }
 
 int plat_smp_processor_id(void)
 {
-	return ctrl_inl(0xff000048); /* CPIDR */
+	return __raw_readl(0xff000048); /* CPIDR */
 }
 
 void plat_send_ipi(unsigned int cpu, unsigned int message)
@@ -100,5 +101,5 @@ void plat_send_ipi(unsigned int cpu, unsigned int message)
 
 	BUG_ON(cpu >= 4);
 
-	ctrl_outl(1 << (message << 2), addr); /* C0INTICI..CnINTICI */
+	__raw_writel(1 << (message << 2), addr); /* C0INTICI..CnINTICI */
 }
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index b0aacf675258..8f13f73cb2cb 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -933,7 +933,7 @@ ret_with_reschedule:
 
 	pta	restore_all, tr1
 
-	movi	(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r8
+	movi	_TIF_SIGPENDING, r8
 	and	r8, r7, r8
 	pta	work_notifysig, tr0
 	bne	r8, ZERO, tr0
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index 1c504bd972c3..83972aa319c2 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -87,25 +87,31 @@ void sh_mobile_setup_cpuidle(void)
 
 	dev->safe_state = state;
 
-	state = &dev->states[i++];
-	snprintf(state->name, CPUIDLE_NAME_LEN, "C1");
-	strncpy(state->desc, "SuperH Sleep Mode [SF]", CPUIDLE_DESC_LEN);
-	state->exit_latency = 100;
-	state->target_residency = 1 * 2;
-	state->power_usage = 1;
-	state->flags = 0;
-	state->flags |= CPUIDLE_FLAG_TIME_VALID;
-	state->enter = cpuidle_sleep_enter;
+	if (sh_mobile_sleep_supported & SUSP_SH_SF) {
+		state = &dev->states[i++];
+		snprintf(state->name, CPUIDLE_NAME_LEN, "C1");
+		strncpy(state->desc, "SuperH Sleep Mode [SF]",
+			CPUIDLE_DESC_LEN);
+		state->exit_latency = 100;
+		state->target_residency = 1 * 2;
+		state->power_usage = 1;
+		state->flags = 0;
+		state->flags |= CPUIDLE_FLAG_TIME_VALID;
+		state->enter = cpuidle_sleep_enter;
+	}
 
-	state = &dev->states[i++];
-	snprintf(state->name, CPUIDLE_NAME_LEN, "C2");
-	strncpy(state->desc, "SuperH Mobile Standby Mode [SF]", CPUIDLE_DESC_LEN);
-	state->exit_latency = 2300;
-	state->target_residency = 1 * 2;
-	state->power_usage = 1;
-	state->flags = 0;
-	state->flags |= CPUIDLE_FLAG_TIME_VALID;
-	state->enter = cpuidle_sleep_enter;
+	if (sh_mobile_sleep_supported & SUSP_SH_STANDBY) {
+		state = &dev->states[i++];
+		snprintf(state->name, CPUIDLE_NAME_LEN, "C2");
+		strncpy(state->desc, "SuperH Mobile Standby Mode [SF]",
+			CPUIDLE_DESC_LEN);
+		state->exit_latency = 2300;
+		state->target_residency = 1 * 2;
+		state->power_usage = 1;
+		state->flags = 0;
+		state->flags |= CPUIDLE_FLAG_TIME_VALID;
+		state->enter = cpuidle_sleep_enter;
+	}
 
 	dev->state_count = i;
 
diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c
index ee3c2aaf66fb..ca029a44743c 100644
--- a/arch/sh/kernel/cpu/shmobile/pm.c
+++ b/arch/sh/kernel/cpu/shmobile/pm.c
@@ -15,6 +15,13 @@
 #include <linux/suspend.h>
 #include <asm/suspend.h>
 #include <asm/uaccess.h>
+#include <asm/cacheflush.h>
+
+/*
+ * Notifier lists for pre/post sleep notification
+ */
+ATOMIC_NOTIFIER_HEAD(sh_mobile_pre_sleep_notifier_list);
+ATOMIC_NOTIFIER_HEAD(sh_mobile_post_sleep_notifier_list);
 
 /*
  * Sleep modes available on SuperH Mobile:
@@ -26,30 +33,105 @@
 #define SUSP_MODE_SLEEP		(SUSP_SH_SLEEP)
 #define SUSP_MODE_SLEEP_SF	(SUSP_SH_SLEEP | SUSP_SH_SF)
 #define SUSP_MODE_STANDBY_SF	(SUSP_SH_STANDBY | SUSP_SH_SF)
+#define SUSP_MODE_RSTANDBY	(SUSP_SH_RSTANDBY | SUSP_SH_MMU | SUSP_SH_SF)
+ /*
+  * U-standby mode is unsupported since it needs bootloader hacks
+  */
 
-/*
- * The following modes are not there yet:
- *
- * R-standby mode is unsupported, but will be added in the future
- * U-standby mode is low priority since it needs bootloader hacks
- */
-
-#define ILRAM_BASE 0xe5200000
-
-extern const unsigned char sh_mobile_standby[];
-extern const unsigned int sh_mobile_standby_size;
+#ifdef CONFIG_CPU_SUBTYPE_SH7724
+#define RAM_BASE 0xfd800000 /* RSMEM */
+#else
+#define RAM_BASE 0xe5200000 /* ILRAM */
+#endif
 
 void sh_mobile_call_standby(unsigned long mode)
 {
-	void *onchip_mem = (void *)ILRAM_BASE;
-	void (*standby_onchip_mem)(unsigned long, unsigned long) = onchip_mem;
+	void *onchip_mem = (void *)RAM_BASE;
+	struct sh_sleep_data *sdp = onchip_mem;
+	void (*standby_onchip_mem)(unsigned long, unsigned long);
+
+	/* code located directly after data structure */
+	standby_onchip_mem = (void *)(sdp + 1);
+
+	atomic_notifier_call_chain(&sh_mobile_pre_sleep_notifier_list,
+				   mode, NULL);
+
+	/* flush the caches if MMU flag is set */
+	if (mode & SUSP_SH_MMU)
+		flush_cache_all();
 
 	/* Let assembly snippet in on-chip memory handle the rest */
-	standby_onchip_mem(mode, ILRAM_BASE);
+	standby_onchip_mem(mode, RAM_BASE);
+
+	atomic_notifier_call_chain(&sh_mobile_post_sleep_notifier_list,
+				   mode, NULL);
+}
+
+extern char sh_mobile_sleep_enter_start;
+extern char sh_mobile_sleep_enter_end;
+
+extern char sh_mobile_sleep_resume_start;
+extern char sh_mobile_sleep_resume_end;
+
+unsigned long sh_mobile_sleep_supported = SUSP_SH_SLEEP;
+
+void sh_mobile_register_self_refresh(unsigned long flags,
+				     void *pre_start, void *pre_end,
+				     void *post_start, void *post_end)
+{
+	void *onchip_mem = (void *)RAM_BASE;
+	void *vp;
+	struct sh_sleep_data *sdp;
+	int n;
+
+	/* part 0: data area */
+	sdp = onchip_mem;
+	sdp->addr.stbcr = 0xa4150020; /* STBCR */
+	sdp->addr.bar = 0xa4150040; /* BAR */
+	sdp->addr.pteh = 0xff000000; /* PTEH */
+	sdp->addr.ptel = 0xff000004; /* PTEL */
+	sdp->addr.ttb = 0xff000008; /* TTB */
+	sdp->addr.tea = 0xff00000c; /* TEA */
+	sdp->addr.mmucr = 0xff000010; /* MMUCR */
+	sdp->addr.ptea = 0xff000034; /* PTEA */
+	sdp->addr.pascr = 0xff000070; /* PASCR */
+	sdp->addr.irmcr = 0xff000078; /* IRMCR */
+	sdp->addr.ccr = 0xff00001c; /* CCR */
+	sdp->addr.ramcr = 0xff000074; /* RAMCR */
+	vp = sdp + 1;
+
+	/* part 1: common code to enter sleep mode */
+	n = &sh_mobile_sleep_enter_end - &sh_mobile_sleep_enter_start;
+	memcpy(vp, &sh_mobile_sleep_enter_start, n);
+	vp += roundup(n, 4);
+
+	/* part 2: board specific code to enter self-refresh mode */
+	n = pre_end - pre_start;
+	memcpy(vp, pre_start, n);
+	sdp->sf_pre = (unsigned long)vp;
+	vp += roundup(n, 4);
+
+	/* part 3: board specific code to resume from self-refresh mode */
+	n = post_end - post_start;
+	memcpy(vp, post_start, n);
+	sdp->sf_post = (unsigned long)vp;
+	vp += roundup(n, 4);
+
+	/* part 4: common code to resume from sleep mode */
+	WARN_ON(vp > (onchip_mem + 0x600));
+	vp = onchip_mem + 0x600; /* located at interrupt vector */
+	n = &sh_mobile_sleep_resume_end - &sh_mobile_sleep_resume_start;
+	memcpy(vp, &sh_mobile_sleep_resume_start, n);
+	sdp->resume = (unsigned long)vp;
+
+	sh_mobile_sleep_supported |= flags;
 }
 
 static int sh_pm_enter(suspend_state_t state)
 {
+	if (!(sh_mobile_sleep_supported & SUSP_MODE_STANDBY_SF))
+		return -ENXIO;
+
 	local_irq_disable();
 	set_bl_bit();
 	sh_mobile_call_standby(SUSP_MODE_STANDBY_SF);
@@ -65,13 +147,6 @@ static struct platform_suspend_ops sh_pm_ops = {
 
 static int __init sh_pm_init(void)
 {
-	void *onchip_mem = (void *)ILRAM_BASE;
-
-	/* Copy the assembly snippet to the otherwise ununsed ILRAM */
-	memcpy(onchip_mem, sh_mobile_standby, sh_mobile_standby_size);
-	wmb();
-	ctrl_barrier();
-
 	suspend_set_ops(&sh_pm_ops);
 	sh_mobile_setup_cpuidle();
 	return 0;
diff --git a/arch/sh/kernel/cpu/shmobile/pm_runtime.c b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
index 7c615b17e209..6dcb8166a64d 100644
--- a/arch/sh/kernel/cpu/shmobile/pm_runtime.c
+++ b/arch/sh/kernel/cpu/shmobile/pm_runtime.c
@@ -45,12 +45,14 @@ static int __platform_pm_runtime_resume(struct platform_device *pdev)
 
 	dev_dbg(d, "__platform_pm_runtime_resume() [%d]\n", hwblk);
 
-	if (d->driver && d->driver->pm && d->driver->pm->runtime_resume) {
+	if (d->driver) {
 		hwblk_enable(hwblk_info, hwblk);
 		ret = 0;
 
 		if (test_bit(PDEV_ARCHDATA_FLAG_SUSP, &ad->flags)) {
-			ret = d->driver->pm->runtime_resume(d);
+			if (d->driver->pm && d->driver->pm->runtime_resume)
+				ret = d->driver->pm->runtime_resume(d);
+
 			if (!ret)
 				clear_bit(PDEV_ARCHDATA_FLAG_SUSP, &ad->flags);
 			else
@@ -73,12 +75,15 @@ static int __platform_pm_runtime_suspend(struct platform_device *pdev)
 
 	dev_dbg(d, "__platform_pm_runtime_suspend() [%d]\n", hwblk);
 
-	if (d->driver && d->driver->pm && d->driver->pm->runtime_suspend) {
+	if (d->driver) {
 		BUG_ON(!test_bit(PDEV_ARCHDATA_FLAG_IDLE, &ad->flags));
+		ret = 0;
 
-		hwblk_enable(hwblk_info, hwblk);
-		ret = d->driver->pm->runtime_suspend(d);
-		hwblk_disable(hwblk_info, hwblk);
+		if (d->driver->pm && d->driver->pm->runtime_suspend) {
+			hwblk_enable(hwblk_info, hwblk);
+			ret = d->driver->pm->runtime_suspend(d);
+			hwblk_disable(hwblk_info, hwblk);
+		}
 
 		if (!ret) {
 			set_bit(PDEV_ARCHDATA_FLAG_SUSP, &ad->flags);
diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S
index a439e6c7824f..e9dd7fa0abd2 100644
--- a/arch/sh/kernel/cpu/shmobile/sleep.S
+++ b/arch/sh/kernel/cpu/shmobile/sleep.S
@@ -20,79 +20,103 @@
  * Kernel mode register usage, see entry.S:
  *	k0	scratch
  *	k1	scratch
- *	k4	scratch
  */
 #define k0	r0
 #define k1	r1
-#define k4	r4
 
-/* manage self-refresh and enter standby mode.
+/* manage self-refresh and enter standby mode. must be self-contained.
  * this code will be copied to on-chip memory and executed from there.
  */
+	.balign 4
+ENTRY(sh_mobile_sleep_enter_start)
 
-	.balign 	4096,0,4096
-ENTRY(sh_mobile_standby)
+	/* save mode flags */
+	mov.l	r4, @(SH_SLEEP_MODE, r5)
 
 	/* save original vbr */
-	stc	vbr, r1
-	mova	saved_vbr, r0
-	mov.l	r1, @r0
+	stc	vbr, r0
+	mov.l	r0, @(SH_SLEEP_VBR, r5)
 
 	/* point vbr to our on-chip memory page */
 	ldc	r5, vbr
 
 	/* save return address */
-	mova	saved_spc, r0
-	sts	pr, r5
-	mov.l	r5, @r0
+	sts	pr, r0
+	mov.l	r0, @(SH_SLEEP_SPC, r5)
 
 	/* save sr */
-	mova	saved_sr, r0
-	stc	sr, r5
-	mov.l	r5, @r0
+	stc	sr, r0
+	mov.l	r0, @(SH_SLEEP_SR, r5)
 
-	/* save mode flags */
-	mova	saved_mode, r0
-	mov.l	r4, @r0
+	/* save sp */
+	mov.l	r15, @(SH_SLEEP_SP, r5)
+
+	/* save stbcr */
+	bsr     save_register
+	 mov    #SH_SLEEP_REG_STBCR, r0
+
+	/* save mmu and cache context if needed */
+	mov.l	@(SH_SLEEP_MODE, r5), r0
+	tst	#SUSP_SH_MMU, r0
+	bt	skip_mmu_save_disable
+
+       /* save mmu state */
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_PTEH, r0
+
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_PTEL, r0
+
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_TTB, r0
+
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_TEA, r0
+
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_MMUCR, r0
+
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_PTEA, r0
+
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_PASCR, r0
 
-	/* put mode flags in r0 */
-	mov	r4, r0
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_IRMCR, r0
 
+	/* invalidate TLBs and disable the MMU */
+	bsr	get_register
+	 mov	#SH_SLEEP_REG_MMUCR, r0
+	mov	#4, r1
+	mov.l	r1, @r0
+	icbi	@r0
+
+	/* save cache registers and disable caches */
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_CCR, r0
+
+	bsr	save_register
+	 mov	#SH_SLEEP_REG_RAMCR, r0
+
+	bsr	get_register
+	 mov	#SH_SLEEP_REG_CCR, r0
+	mov	#0, r1
+	mov.l	r1, @r0
+	icbi	@r0
+
+skip_mmu_save_disable:
+	/* call self-refresh entering code if needed */
+	mov.l	@(SH_SLEEP_MODE, r5), r0
 	tst	#SUSP_SH_SF, r0
 	bt	skip_set_sf
-#ifdef CONFIG_CPU_SUBTYPE_SH7724
-	/* DBSC: put memory in self-refresh mode */
-	mov.l	dben_reg, r4
-	mov.l	dben_data0, r1
-	mov.l	r1, @r4
-
-	mov.l	dbrfpdn0_reg, r4
-	mov.l	dbrfpdn0_data0, r1
-	mov.l	r1, @r4
-
-	mov.l	dbcmdcnt_reg, r4
-	mov.l	dbcmdcnt_data0, r1
-	mov.l	r1, @r4
-
-	mov.l	dbcmdcnt_reg, r4
-	mov.l	dbcmdcnt_data1, r1
-	mov.l	r1, @r4
-
-	mov.l	dbrfpdn0_reg, r4
-	mov.l	dbrfpdn0_data1, r1
-	mov.l	r1, @r4
-#else
-	/* SBSC: disable power down and put in self-refresh mode */
-	mov.l	1f, r4
-	mov.l	2f, r1
-	mov.l	@r4, r2
-	or	r1, r2
-	mov.l   3f, r3
-	and	r3, r2
-	mov.l	r2, @r4
-#endif
+
+	mov.l	@(SH_SLEEP_SF_PRE, r5), r0
+	jsr	@r0
+	 nop
 
 skip_set_sf:
+	mov.l	@(SH_SLEEP_MODE, r5), r0
 	tst	#SUSP_SH_STANDBY, r0
 	bt	test_rstandby
 
@@ -104,6 +128,12 @@ test_rstandby:
 	tst	#SUSP_SH_RSTANDBY, r0
 	bt	test_ustandby
 
+	/* setup BAR register */
+	bsr	get_register
+	 mov	#SH_SLEEP_REG_BAR, r0
+	mov.l	@(SH_SLEEP_RESUME, r5), r1
+	mov.l	r1, @r0
+
 	/* set mode to "r-standby mode" */
 	bra	do_sleep
 	 mov	#0x20, r1
@@ -123,124 +153,136 @@ force_sleep:
 
 do_sleep:
 	/* setup and enter selected standby mode */
-	mov.l	5f, r4
-	mov.l	r1, @r4
+	bsr     get_register
+	 mov    #SH_SLEEP_REG_STBCR, r0
+	mov.l	r1, @r0
 again:
 	sleep
 	bra	again
 	 nop
 
-restore_jump_vbr:
+save_register:
+	add	#SH_SLEEP_BASE_ADDR, r0
+	mov.l	@(r0, r5), r1
+	add	#-SH_SLEEP_BASE_ADDR, r0
+	mov.l	@r1, r1
+	add	#SH_SLEEP_BASE_DATA, r0
+	mov.l	r1, @(r0, r5)
+	add	#-SH_SLEEP_BASE_DATA, r0
+	rts
+	 nop
+
+get_register:
+	add	#SH_SLEEP_BASE_ADDR, r0
+	mov.l	@(r0, r5), r0
+	rts
+	 nop
+ENTRY(sh_mobile_sleep_enter_end)
+
+	.balign 4
+ENTRY(sh_mobile_sleep_resume_start)
+
+	/* figure out start address */
+	bsr	0f
+	 nop
+0:
+	sts	pr, k1
+	mov.l	1f, k0
+	and	k0, k1
+
+	/* store pointer to data area in VBR */
+	ldc	k1, vbr
+
+	/* setup sr with saved sr */
+	mov.l	@(SH_SLEEP_SR, k1), k0
+	ldc	k0, sr
+
+	/* now: user register set! */
+	stc	vbr, r5
+
 	/* setup spc with return address to c code */
-	mov.l	saved_spc, k0
-	ldc	k0, spc
+	mov.l	@(SH_SLEEP_SPC, r5), r0
+	ldc	r0, spc
 
 	/* restore vbr */
-	mov.l	saved_vbr, k0
-	ldc	k0, vbr
+	mov.l	@(SH_SLEEP_VBR, r5), r0
+	ldc	r0, vbr
 
 	/* setup ssr with saved sr */
-	mov.l	saved_sr, k0
-	ldc	k0, ssr
+	mov.l	@(SH_SLEEP_SR, r5), r0
+	ldc	r0, ssr
 
-	/* get mode flags */
-	mov.l	saved_mode, k0
+	/* restore sp */
+	mov.l   @(SH_SLEEP_SP, r5), r15
 
-done_sleep:
-	/* reset standby mode to sleep mode */
-	mov.l	5f, k4
-	mov	#0x00, k1
-	mov.l	k1, @k4
+	/* restore sleep mode register */
+	bsr     restore_register
+	 mov    #SH_SLEEP_REG_STBCR, r0
 
-	tst	#SUSP_SH_SF, k0
+	/* call self-refresh resume code if needed */
+	mov.l	@(SH_SLEEP_MODE, r5), r0
+	tst	#SUSP_SH_SF, r0
 	bt	skip_restore_sf
 
-#ifdef CONFIG_CPU_SUBTYPE_SH7724
-	/* DBSC: put memory in auto-refresh mode */
-	mov.l	dbrfpdn0_reg, k4
-	mov.l	dbrfpdn0_data0, k1
-	mov.l	k1, @k4
-
-	nop /* sleep 140 ns */
-	nop
-	nop
-	nop
-
-	mov.l	dbcmdcnt_reg, k4
-	mov.l	dbcmdcnt_data0, k1
-	mov.l	k1, @k4
-
-	mov.l	dbcmdcnt_reg, k4
-	mov.l	dbcmdcnt_data1, k1
-	mov.l	k1, @k4
-
-	mov.l	dben_reg, k4
-	mov.l	dben_data1, k1
-	mov.l	k1, @k4
-
-	mov.l	dbrfpdn0_reg, k4
-	mov.l	dbrfpdn0_data2, k1
-	mov.l	k1, @k4
-#else
-	/* SBSC: set auto-refresh mode */
-	mov.l	1f, k4
-	mov.l	@k4, k0
-	mov.l   4f, k1
-	and	k1, k0
-	mov.l	k0, @k4
-	mov.l	6f, k4
-	mov.l	8f, k0
-	mov.l	@k4, k1
-	mov	#-1, k4
-	add	k4, k1
-	or	k1, k0
-	mov.l	7f, k1
-	mov.l	k0, @k1
-#endif
+	mov.l	@(SH_SLEEP_SF_POST, r5), r0
+	jsr	@r0
+	 nop
+
 skip_restore_sf:
-	/* jump to vbr vector */
-	mov.l	saved_vbr, k0
-	mov.l	offset_vbr, k4
-	add	k4, k0
-	jmp	@k0
+	/* restore mmu and cache state if needed */
+	mov.l	@(SH_SLEEP_MODE, r5), r0
+	tst	#SUSP_SH_MMU, r0
+	bt	skip_restore_mmu
+
+	/* restore mmu state */
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_PTEH, r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_PTEL, r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_TTB, r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_TEA, r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_PTEA, r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_PASCR, r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_IRMCR, r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_MMUCR, r0
+	icbi	@r0
+
+	/* restore cache settings */
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_RAMCR, r0
+	icbi	@r0
+
+	bsr	restore_register
+	 mov	#SH_SLEEP_REG_CCR, r0
+	icbi	@r0
+
+skip_restore_mmu:
+	rte
 	 nop
 
-	.balign 4
-saved_mode:	.long	0
-saved_spc:	.long	0
-saved_sr:	.long	0
-saved_vbr:	.long	0
-offset_vbr:	.long	0x600
-#ifdef CONFIG_CPU_SUBTYPE_SH7724
-dben_reg:	.long	0xfd000010 /* DBEN */
-dben_data0:	.long	0
-dben_data1:	.long	1
-dbrfpdn0_reg:	.long	0xfd000040 /* DBRFPDN0 */
-dbrfpdn0_data0:	.long	0
-dbrfpdn0_data1:	.long	1
-dbrfpdn0_data2:	.long	0x00010000
-dbcmdcnt_reg:	.long	0xfd000014 /* DBCMDCNT */
-dbcmdcnt_data0:	.long	2
-dbcmdcnt_data1:	.long	4
-#else
-1:	.long	0xfe400008 /* SDCR0 */
-2:	.long	0x00000400
-3:	.long	0xffff7fff
-4:	.long	0xfffffbff
-#endif
-5:	.long	0xa4150020 /* STBCR */
-6:	.long   0xfe40001c /* RTCOR */
-7:	.long   0xfe400018 /* RTCNT */
-8:	.long   0xa55a0000
-
-
-/* interrupt vector @ 0x600 */
-	.balign 	0x400,0,0x400
-	.long	0xdeadbeef
-	.balign 	0x200,0,0x200
-	bra	restore_jump_vbr
+restore_register:
+	add	#SH_SLEEP_BASE_DATA, r0
+	mov.l	@(r0, r5), r1
+	add	#-SH_SLEEP_BASE_DATA, r0
+	add	#SH_SLEEP_BASE_ADDR, r0
+	mov.l	@(r0, r5), r0
+	mov.l	r1, @r0
+	rts
 	 nop
-sh_mobile_standby_end:
 
-ENTRY(sh_mobile_standby_size)
-	.long sh_mobile_standby_end - sh_mobile_standby
+	.balign 4
+1:	.long	~0x7ff
+ENTRY(sh_mobile_sleep_resume_end)
diff --git a/arch/sh/kernel/cpu/ubc.S b/arch/sh/kernel/cpu/ubc.S
deleted file mode 100644
index 81923079fa12..000000000000
--- a/arch/sh/kernel/cpu/ubc.S
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * arch/sh/kernel/cpu/ubc.S
- *
- * Set of management routines for the User Break Controller (UBC)
- *
- * Copyright (C) 2002 Paul Mundt
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#include <linux/linkage.h>
-#include <asm/ubc.h>
-
-#define STBCR2		0xffc00010
-
-ENTRY(ubc_sleep)
-	mov	#0, r0
-
-	mov.l	1f, r1		! Zero out UBC_BBRA ..
-	mov.w	r0, @r1
-
-	mov.l	2f, r1		! .. same for BBRB ..
-	mov.w	r0, @r1
-
-	mov.l	3f, r1		! .. and again for BRCR.
-	mov.w	r0, @r1
-
-	mov.w	@r1, r0		! Dummy read BRCR
-
-	mov.l	4f, r1		! Set MSTP5 in STBCR2
-	mov.b	@r1, r0
-	or	#0x01, r0
-	mov.b	r0, @r1
-
-	mov.b	@r1, r0		! Two dummy reads ..
-	mov.b	@r1, r0
-
-	rts
-	nop
-
-ENTRY(ubc_wakeup)
-	mov.l	4f, r1		! Clear MSTP5
-	mov.b	@r1, r0
-	and	#0xfe, r0
-	mov.b	r0, @r1
-
-	mov.b	@r1, r0		! Two more dummy reads ..
-	mov.b	@r1, r0
-
-	rts
-	nop
-
-1:	.long	UBC_BBRA
-2:	.long	UBC_BBRB
-3:	.long	UBC_BRCR
-4:	.long	STBCR2
-
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
new file mode 100644
index 000000000000..3c55b87f8b63
--- /dev/null
+++ b/arch/sh/kernel/dma-nommu.c
@@ -0,0 +1,82 @@
+/*
+ * DMA mapping support for platforms lacking IOMMUs.
+ *
+ * Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+
+static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
+				 unsigned long offset, size_t size,
+				 enum dma_data_direction dir,
+				 struct dma_attrs *attrs)
+{
+	dma_addr_t addr = page_to_phys(page) + offset;
+
+	WARN_ON(size == 0);
+	dma_cache_sync(dev, page_address(page) + offset, size, dir);
+
+	return addr;
+}
+
+static int nommu_map_sg(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir,
+			struct dma_attrs *attrs)
+{
+	struct scatterlist *s;
+	int i;
+
+	WARN_ON(nents == 0 || sg[0].length == 0);
+
+	for_each_sg(sg, s, nents, i) {
+		BUG_ON(!sg_page(s));
+
+		dma_cache_sync(dev, sg_virt(s), s->length, dir);
+
+		s->dma_address = sg_phys(s);
+		s->dma_length = s->length;
+	}
+
+	return nents;
+}
+
+#ifdef CONFIG_DMA_NONCOHERENT
+static void nommu_sync_single(struct device *dev, dma_addr_t addr,
+			      size_t size, enum dma_data_direction dir)
+{
+	dma_cache_sync(dev, phys_to_virt(addr), size, dir);
+}
+
+static void nommu_sync_sg(struct device *dev, struct scatterlist *sg,
+			  int nelems, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nelems, i)
+		dma_cache_sync(dev, sg_virt(s), s->length, dir);
+}
+#endif
+
+struct dma_map_ops nommu_dma_ops = {
+	.alloc_coherent		= dma_generic_alloc_coherent,
+	.free_coherent		= dma_generic_free_coherent,
+	.map_page		= nommu_map_page,
+	.map_sg			= nommu_map_sg,
+#ifdef CONFIG_DMA_NONCOHERENT
+	.sync_single_for_device	= nommu_sync_single,
+	.sync_sg_for_device	= nommu_sync_sg,
+#endif
+	.is_phys		= 1,
+};
+
+void __init no_iommu_init(void)
+{
+	if (dma_ops)
+		return;
+	dma_ops = &nommu_dma_ops;
+}
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index d76a23170dbb..3576b709f052 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/mempool.h>
 #include <linux/mm.h>
+#include <linux/elf.h>
 #include <linux/ftrace.h>
 #include <asm/dwarf.h>
 #include <asm/unwinder.h>
@@ -530,7 +531,18 @@ static int dwarf_cfa_execute_insns(unsigned char *insn_start,
 }
 
 /**
- *	dwarf_unwind_stack - recursively unwind the stack
+ *	dwarf_free_frame - free the memory allocated for @frame
+ *	@frame: the frame to free
+ */
+void dwarf_free_frame(struct dwarf_frame *frame)
+{
+	dwarf_frame_free_regs(frame);
+	mempool_free(frame, dwarf_frame_pool);
+}
+
+/**
+ *	dwarf_unwind_stack - unwind the stack
+ *
  *	@pc: address of the function to unwind
  *	@prev: struct dwarf_frame of the previous stackframe on the callstack
  *
@@ -548,9 +560,9 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	unsigned long addr;
 
 	/*
-	 * If this is the first invocation of this recursive function we
-	 * need get the contents of a physical register to get the CFA
-	 * in order to begin the virtual unwinding of the stack.
+	 * If we're starting at the top of the stack we need get the
+	 * contents of a physical register to get the CFA in order to
+	 * begin the virtual unwinding of the stack.
 	 *
 	 * NOTE: the return address is guaranteed to be setup by the
 	 * time this function makes its first function call.
@@ -593,9 +605,8 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	fde = dwarf_lookup_fde(pc);
 	if (!fde) {
 		/*
-		 * This is our normal exit path - the one that stops the
-		 * recursion. There's two reasons why we might exit
-		 * here,
+		 * This is our normal exit path. There are two reasons
+		 * why we might exit here,
 		 *
 		 *	a) pc has no asscociated DWARF frame info and so
 		 *	we don't know how to unwind this frame. This is
@@ -637,10 +648,10 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 
 		} else {
 			/*
-			 * Again, this is the first invocation of this
-			 * recurisve function. We need to physically
-			 * read the contents of a register in order to
-			 * get the Canonical Frame Address for this
+			 * Again, we're starting from the top of the
+			 * stack. We need to physically read
+			 * the contents of a register in order to get
+			 * the Canonical Frame Address for this
 			 * function.
 			 */
 			frame->cfa = dwarf_read_arch_reg(frame->cfa_register);
@@ -670,13 +681,12 @@ struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
 	return frame;
 
 bail:
-	dwarf_frame_free_regs(frame);
-	mempool_free(frame, dwarf_frame_pool);
+	dwarf_free_frame(frame);
 	return NULL;
 }
 
 static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
-			   unsigned char *end)
+			   unsigned char *end, struct module *mod)
 {
 	struct dwarf_cie *cie;
 	unsigned long flags;
@@ -772,6 +782,8 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
 	cie->initial_instructions = p;
 	cie->instructions_end = end;
 
+	cie->mod = mod;
+
 	/* Add to list */
 	spin_lock_irqsave(&dwarf_cie_lock, flags);
 	list_add_tail(&cie->link, &dwarf_cie_list);
@@ -782,7 +794,7 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
 
 static int dwarf_parse_fde(void *entry, u32 entry_type,
 			   void *start, unsigned long len,
-			   unsigned char *end)
+			   unsigned char *end, struct module *mod)
 {
 	struct dwarf_fde *fde;
 	struct dwarf_cie *cie;
@@ -831,6 +843,8 @@ static int dwarf_parse_fde(void *entry, u32 entry_type,
 	fde->instructions = p;
 	fde->end = end;
 
+	fde->mod = mod;
+
 	/* Add to list. */
 	spin_lock_irqsave(&dwarf_fde_lock, flags);
 	list_add_tail(&fde->link, &dwarf_fde_list);
@@ -854,10 +868,8 @@ static void dwarf_unwinder_dump(struct task_struct *task,
 	while (1) {
 		frame = dwarf_unwind_stack(return_addr, _frame);
 
-		if (_frame) {
-			dwarf_frame_free_regs(_frame);
-			mempool_free(_frame, dwarf_frame_pool);
-		}
+		if (_frame)
+			dwarf_free_frame(_frame);
 
 		_frame = frame;
 
@@ -867,6 +879,9 @@ static void dwarf_unwinder_dump(struct task_struct *task,
 		return_addr = frame->return_addr;
 		ops->address(data, return_addr, 1);
 	}
+
+	if (frame)
+		dwarf_free_frame(frame);
 }
 
 static struct unwinder dwarf_unwinder = {
@@ -896,48 +911,28 @@ static void dwarf_unwinder_cleanup(void)
 }
 
 /**
- *	dwarf_unwinder_init - initialise the dwarf unwinder
+ *	dwarf_parse_section - parse DWARF section
+ *	@eh_frame_start: start address of the .eh_frame section
+ *	@eh_frame_end: end address of the .eh_frame section
+ *	@mod: the kernel module containing the .eh_frame section
  *
- *	Build the data structures describing the .dwarf_frame section to
- *	make it easier to lookup CIE and FDE entries. Because the
- *	.eh_frame section is packed as tightly as possible it is not
- *	easy to lookup the FDE for a given PC, so we build a list of FDE
- *	and CIE entries that make it easier.
+ *	Parse the information in a .eh_frame section.
  */
-static int __init dwarf_unwinder_init(void)
+static int dwarf_parse_section(char *eh_frame_start, char *eh_frame_end,
+			       struct module *mod)
 {
 	u32 entry_type;
 	void *p, *entry;
 	int count, err = 0;
-	unsigned long len;
+	unsigned long len = 0;
 	unsigned int c_entries, f_entries;
 	unsigned char *end;
-	INIT_LIST_HEAD(&dwarf_cie_list);
-	INIT_LIST_HEAD(&dwarf_fde_list);
 
 	c_entries = 0;
 	f_entries = 0;
-	entry = &__start_eh_frame;
-
-	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
-			sizeof(struct dwarf_frame), 0,
-			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
-
-	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
-			sizeof(struct dwarf_reg), 0,
-			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
+	entry = eh_frame_start;
 
-	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
-					  mempool_alloc_slab,
-					  mempool_free_slab,
-					  dwarf_frame_cachep);
-
-	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
-					 mempool_alloc_slab,
-					 mempool_free_slab,
-					 dwarf_reg_cachep);
-
-	while ((char *)entry < __stop_eh_frame) {
+	while ((char *)entry < eh_frame_end) {
 		p = entry;
 
 		count = dwarf_entry_len(p, &len);
@@ -949,6 +944,7 @@ static int __init dwarf_unwinder_init(void)
 			 * entry and move to the next one because 'len'
 			 * tells us where our next entry is.
 			 */
+			err = -EINVAL;
 			goto out;
 		} else
 			p += count;
@@ -960,13 +956,14 @@ static int __init dwarf_unwinder_init(void)
 		p += 4;
 
 		if (entry_type == DW_EH_FRAME_CIE) {
-			err = dwarf_parse_cie(entry, p, len, end);
+			err = dwarf_parse_cie(entry, p, len, end, mod);
 			if (err < 0)
 				goto out;
 			else
 				c_entries++;
 		} else {
-			err = dwarf_parse_fde(entry, entry_type, p, len, end);
+			err = dwarf_parse_fde(entry, entry_type, p, len,
+					      end, mod);
 			if (err < 0)
 				goto out;
 			else
@@ -979,6 +976,129 @@ static int __init dwarf_unwinder_init(void)
 	printk(KERN_INFO "DWARF unwinder initialised: read %u CIEs, %u FDEs\n",
 	       c_entries, f_entries);
 
+	return 0;
+
+out:
+	return err;
+}
+
+#ifdef CONFIG_MODULES
+int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+			  struct module *me)
+{
+	unsigned int i, err;
+	unsigned long start, end;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	start = end = 0;
+
+	for (i = 1; i < hdr->e_shnum; i++) {
+		/* Alloc bit cleared means "ignore it." */
+		if ((sechdrs[i].sh_flags & SHF_ALLOC)
+		    && !strcmp(secstrings+sechdrs[i].sh_name, ".eh_frame")) {
+			start = sechdrs[i].sh_addr;
+			end = start + sechdrs[i].sh_size;
+			break;
+		}
+	}
+
+	/* Did we find the .eh_frame section? */
+	if (i != hdr->e_shnum) {
+		err = dwarf_parse_section((char *)start, (char *)end, me);
+		if (err) {
+			printk(KERN_WARNING "%s: failed to parse DWARF info\n",
+			       me->name);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *	module_dwarf_cleanup - remove FDE/CIEs associated with @mod
+ *	@mod: the module that is being unloaded
+ *
+ *	Remove any FDEs and CIEs from the global lists that came from
+ *	@mod's .eh_frame section because @mod is being unloaded.
+ */
+void module_dwarf_cleanup(struct module *mod)
+{
+	struct dwarf_fde *fde;
+	struct dwarf_cie *cie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dwarf_cie_lock, flags);
+
+again_cie:
+	list_for_each_entry(cie, &dwarf_cie_list, link) {
+		if (cie->mod == mod)
+			break;
+	}
+
+	if (&cie->link != &dwarf_cie_list) {
+		list_del(&cie->link);
+		kfree(cie);
+		goto again_cie;
+	}
+
+	spin_unlock_irqrestore(&dwarf_cie_lock, flags);
+
+	spin_lock_irqsave(&dwarf_fde_lock, flags);
+
+again_fde:
+	list_for_each_entry(fde, &dwarf_fde_list, link) {
+		if (fde->mod == mod)
+			break;
+	}
+
+	if (&fde->link != &dwarf_fde_list) {
+		list_del(&fde->link);
+		kfree(fde);
+		goto again_fde;
+	}
+
+	spin_unlock_irqrestore(&dwarf_fde_lock, flags);
+}
+#endif /* CONFIG_MODULES */
+
+/**
+ *	dwarf_unwinder_init - initialise the dwarf unwinder
+ *
+ *	Build the data structures describing the .dwarf_frame section to
+ *	make it easier to lookup CIE and FDE entries. Because the
+ *	.eh_frame section is packed as tightly as possible it is not
+ *	easy to lookup the FDE for a given PC, so we build a list of FDE
+ *	and CIE entries that make it easier.
+ */
+static int __init dwarf_unwinder_init(void)
+{
+	int err;
+	INIT_LIST_HEAD(&dwarf_cie_list);
+	INIT_LIST_HEAD(&dwarf_fde_list);
+
+	dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
+			sizeof(struct dwarf_frame), 0,
+			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
+
+	dwarf_reg_cachep = kmem_cache_create("dwarf_regs",
+			sizeof(struct dwarf_reg), 0,
+			SLAB_PANIC | SLAB_HWCACHE_ALIGN | SLAB_NOTRACK, NULL);
+
+	dwarf_frame_pool = mempool_create(DWARF_FRAME_MIN_REQ,
+					  mempool_alloc_slab,
+					  mempool_free_slab,
+					  dwarf_frame_cachep);
+
+	dwarf_reg_pool = mempool_create(DWARF_REG_MIN_REQ,
+					 mempool_alloc_slab,
+					 mempool_free_slab,
+					 dwarf_reg_cachep);
+
+	err = dwarf_parse_section(__start_eh_frame, __stop_eh_frame, NULL);
+	if (err)
+		goto out;
+
 	err = unwinder_register(&dwarf_unwinder);
 	if (err)
 		goto out;
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index 3eb84931d2aa..f0abd58c3a69 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -133,7 +133,7 @@ work_pending:
 	! r8: current_thread_info
 	! t:  result of "tst	#_TIF_NEED_RESCHED, r0"
 	bf/s	work_resched
-	 tst	#(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK), r0
+	 tst	#_TIF_SIGPENDING, r0
 work_notifysig:
 	bt/s	__restore_all
 	 mov	r15, r4
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 2c48e267256e..b6f41c109beb 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -62,6 +62,150 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 	return ftrace_replaced_code;
 }
 
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put the instruction pointer into the IP buffer
+ *    and the new code into the "code" buffer.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ *    we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
+static atomic_t nmi_running = ATOMIC_INIT(0);
+static int mod_code_status;		/* holds return value of text write */
+static void *mod_code_ip;		/* holds the IP to write to */
+static void *mod_code_newcode;		/* holds the text to write to the IP */
+
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
+
+int ftrace_arch_read_dyn_info(char *buf, int size)
+{
+	int r;
+
+	r = snprintf(buf, size, "%u %u",
+		     nmi_wait_count,
+		     atomic_read(&nmi_update_count));
+	return r;
+}
+
+static void clear_mod_flag(void)
+{
+	int old = atomic_read(&nmi_running);
+
+	for (;;) {
+		int new = old & ~MOD_CODE_WRITE_FLAG;
+
+		if (old == new)
+			break;
+
+		old = atomic_cmpxchg(&nmi_running, old, new);
+	}
+}
+
+static void ftrace_mod_code(void)
+{
+	/*
+	 * Yes, more than one CPU process can be writing to mod_code_status.
+	 *    (and the code itself)
+	 * But if one were to fail, then they all should, and if one were
+	 * to succeed, then they all should.
+	 */
+	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+					     MCOUNT_INSN_SIZE);
+
+	/* if we fail, then kill any new writers */
+	if (mod_code_status)
+		clear_mod_flag();
+}
+
+void ftrace_nmi_enter(void)
+{
+	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+		smp_rmb();
+		ftrace_mod_code();
+		atomic_inc(&nmi_update_count);
+	}
+	/* Must have previous changes seen before executions */
+	smp_mb();
+}
+
+void ftrace_nmi_exit(void)
+{
+	/* Finish all executions before clearing nmi_running */
+	smp_mb();
+	atomic_dec(&nmi_running);
+}
+
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+		return;
+
+	do {
+		cpu_relax();
+	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+	nmi_wait_count++;
+}
+
+static void wait_for_nmi(void)
+{
+	if (!atomic_read(&nmi_running))
+		return;
+
+	do {
+		cpu_relax();
+	} while (atomic_read(&nmi_running));
+
+	nmi_wait_count++;
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+	mod_code_ip = (void *)ip;
+	mod_code_newcode = new_code;
+
+	/* The buffers need to be visible before we let NMIs write them */
+	smp_mb();
+
+	wait_for_nmi_and_set_mod_flag();
+
+	/* Make sure all running NMIs have finished before we write the code */
+	smp_mb();
+
+	ftrace_mod_code();
+
+	/* Make sure the write happens before clearing the bit */
+	smp_mb();
+
+	clear_mod_flag();
+	wait_for_nmi();
+
+	return mod_code_status;
+}
+
 static int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		       unsigned char *new_code)
 {
@@ -86,7 +230,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
 		return -EINVAL;
 
 	/* replace the text with the new text */
-	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+	if (do_ftrace_mod_code(ip, new_code))
 		return -EPERM;
 
 	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
diff --git a/arch/sh/kernel/head_32.S b/arch/sh/kernel/head_32.S
index a78be74b8d3e..1151ecdffa71 100644
--- a/arch/sh/kernel/head_32.S
+++ b/arch/sh/kernel/head_32.S
@@ -33,7 +33,7 @@ ENTRY(empty_zero_page)
 	.long	1		/* LOADER_TYPE */
 	.long	0x00000000	/* INITRD_START */
 	.long	0x00000000	/* INITRD_SIZE */
-#ifdef CONFIG_32BIT
+#if defined(CONFIG_32BIT) && defined(CONFIG_PMB_FIXED)
 	.long	0x53453f00 + 32	/* "SE?" = 32 bit */
 #else
 	.long	0x53453f00 + 29	/* "SE?" = 29 bit */
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 27ff2dc093c7..aaff0037fcd7 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -21,7 +21,7 @@
 #include <asm/atomic.h>
 
 static int hlt_counter;
-void (*pm_idle)(void);
+void (*pm_idle)(void) = NULL;
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
@@ -39,48 +39,92 @@ static int __init hlt_setup(char *__unused)
 }
 __setup("hlt", hlt_setup);
 
+static inline int hlt_works(void)
+{
+	return !hlt_counter;
+}
+
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->work.need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle(void)
+{
+	local_irq_enable();
+	while (!need_resched())
+		cpu_relax();
+}
+
 void default_idle(void)
 {
-	if (!hlt_counter) {
+	if (hlt_works()) {
 		clear_thread_flag(TIF_POLLING_NRFLAG);
 		smp_mb__after_clear_bit();
-		set_bl_bit();
-		stop_critical_timings();
 
-		while (!need_resched())
+		if (!need_resched()) {
+			local_irq_enable();
 			cpu_sleep();
+		} else
+			local_irq_enable();
 
-		start_critical_timings();
-		clear_bl_bit();
 		set_thread_flag(TIF_POLLING_NRFLAG);
 	} else
-		while (!need_resched())
-			cpu_relax();
+		poll_idle();
 }
 
+/*
+ * The idle thread. There's no useful work to be done, so just try to conserve
+ * power and have a low exit latency (ie sit in a loop waiting for somebody to
+ * say that they'd like to reschedule)
+ */
 void cpu_idle(void)
 {
+	unsigned int cpu = smp_processor_id();
+
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		void (*idle)(void) = pm_idle;
+		tick_nohz_stop_sched_tick(1);
 
-		if (!idle)
-			idle = default_idle;
+		while (!need_resched() && cpu_online(cpu)) {
+			check_pgt_cache();
+			rmb();
 
-		tick_nohz_stop_sched_tick(1);
-		while (!need_resched())
-			idle();
-		tick_nohz_restart_sched_tick();
+			local_irq_disable();
+			/* Don't trace irqs off for idle */
+			stop_critical_timings();
+			pm_idle();
+			/*
+			 * Sanity check to ensure that pm_idle() returns
+			 * with IRQs enabled
+			 */
+			WARN_ON(irqs_disabled());
+			start_critical_timings();
+		}
 
+		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
-		check_pgt_cache();
 	}
 }
 
+void __cpuinit select_idle_routine(void)
+{
+	/*
+	 * If a platform has set its own idle routine, leave it alone.
+	 */
+	if (pm_idle)
+		return;
+
+	if (hlt_works())
+		pm_idle = default_idle;
+	else
+		pm_idle = poll_idle;
+}
+
 static void do_nothing(void *unused)
 {
 }
diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c
index b8fa6524760a..e1e1dbd19557 100644
--- a/arch/sh/kernel/io_generic.c
+++ b/arch/sh/kernel/io_generic.c
@@ -24,7 +24,7 @@
 #define dummy_read()
 #endif
 
-unsigned long generic_io_base;
+unsigned long generic_io_base = 0;
 
 u8 generic_inb(unsigned long port)
 {
@@ -147,8 +147,10 @@ void generic_outsl(unsigned long port, const void *src, unsigned long count)
 
 void __iomem *generic_ioport_map(unsigned long addr, unsigned int size)
 {
+#ifdef P1SEG
 	if (PXSEG(addr) >= P1SEG)
 		return (void __iomem *)addr;
+#endif
 
 	return (void __iomem *)(addr + generic_io_base);
 }
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index eac7da772fc2..e1913f28f418 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -37,7 +37,15 @@ void ack_bad_irq(unsigned int irq)
  */
 static int show_other_interrupts(struct seq_file *p, int prec)
 {
+	int j;
+
+	seq_printf(p, "%*s: ", prec, "NMI");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stat[j].__nmi_count);
+	seq_printf(p, "  Non-maskable interrupts\n");
+
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+
 	return 0;
 }
 
@@ -255,6 +263,12 @@ void __init init_IRQ(void)
 {
 	plat_irq_setup();
 
+	/*
+	 * Pin any of the legacy IRQ vectors that haven't already been
+	 * grabbed by the platform
+	 */
+	reserve_irq_legacy();
+
 	/* Perform the machine specific initialisation */
 	if (sh_mv.mv_init_irq)
 		sh_mv.mv_init_irq();
diff --git a/arch/sh/kernel/irq_32.c b/arch/sh/kernel/irq_32.c
new file mode 100644
index 000000000000..e33ab15831f9
--- /dev/null
+++ b/arch/sh/kernel/irq_32.c
@@ -0,0 +1,57 @@
+/*
+ * SHcompact irqflags support
+ *
+ * Copyright (C) 2006 - 2009 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/irqflags.h>
+#include <linux/module.h>
+
+void notrace raw_local_irq_restore(unsigned long flags)
+{
+	unsigned long __dummy0, __dummy1;
+
+	if (flags == RAW_IRQ_DISABLED) {
+		__asm__ __volatile__ (
+			"stc	sr, %0\n\t"
+			"or	#0xf0, %0\n\t"
+			"ldc	%0, sr\n\t"
+			: "=&z" (__dummy0)
+			: /* no inputs */
+			: "memory"
+		);
+	} else {
+		__asm__ __volatile__ (
+			"stc	sr, %0\n\t"
+			"and	%1, %0\n\t"
+#ifdef CONFIG_CPU_HAS_SR_RB
+			"stc	r6_bank, %1\n\t"
+			"or	%1, %0\n\t"
+#endif
+			"ldc	%0, sr\n\t"
+			: "=&r" (__dummy0), "=r" (__dummy1)
+			: "1" (~RAW_IRQ_DISABLED)
+			: "memory"
+		);
+	}
+}
+EXPORT_SYMBOL(raw_local_irq_restore);
+
+unsigned long notrace __raw_local_save_flags(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__ (
+		"stc	sr, %0\n\t"
+		"and	#0xf0, %0\n\t"
+		: "=&z" (flags)
+		: /* no inputs */
+		: "memory"
+	);
+
+	return flags;
+}
+EXPORT_SYMBOL(__raw_local_save_flags);
diff --git a/arch/sh/kernel/irq_64.c b/arch/sh/kernel/irq_64.c
new file mode 100644
index 000000000000..32365ba0e039
--- /dev/null
+++ b/arch/sh/kernel/irq_64.c
@@ -0,0 +1,51 @@
+/*
+ * SHmedia irqflags support
+ *
+ * Copyright (C) 2006 - 2009 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/irqflags.h>
+#include <linux/module.h>
+#include <cpu/registers.h>
+
+void notrace raw_local_irq_restore(unsigned long flags)
+{
+	unsigned long long __dummy;
+
+	if (flags == RAW_IRQ_DISABLED) {
+		__asm__ __volatile__ (
+			"getcon	" __SR ", %0\n\t"
+			"or	%0, %1, %0\n\t"
+			"putcon	%0, " __SR "\n\t"
+			: "=&r" (__dummy)
+			: "r" (RAW_IRQ_DISABLED)
+		);
+	} else {
+		__asm__ __volatile__ (
+			"getcon	" __SR ", %0\n\t"
+			"and	%0, %1, %0\n\t"
+			"putcon	%0, " __SR "\n\t"
+			: "=&r" (__dummy)
+			: "r" (~RAW_IRQ_DISABLED)
+		);
+	}
+}
+EXPORT_SYMBOL(raw_local_irq_restore);
+
+unsigned long notrace __raw_local_save_flags(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__ (
+		"getcon	" __SR ", %0\n\t"
+		"and	%0, %1, %0"
+		: "=&r" (flags)
+		: "r" (RAW_IRQ_DISABLED)
+	);
+
+	return flags;
+}
+EXPORT_SYMBOL(__raw_local_save_flags);
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 7ea2704ea033..76f280223ebd 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -46,12 +46,6 @@ void machine_crash_shutdown(struct pt_regs *regs)
  */
 int machine_kexec_prepare(struct kimage *image)
 {
-	/* older versions of kexec-tools are passing
-	 * the zImage entry point as a virtual address.
-	 */
-	if (image->start != PHYSADDR(image->start))
-		return -EINVAL; /* upgrade your kexec-tools */
-
 	return 0;
 }
 
diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c
index cbce639b108a..1652340ba3f2 100644
--- a/arch/sh/kernel/machvec.c
+++ b/arch/sh/kernel/machvec.c
@@ -135,5 +135,9 @@ void __init sh_mv_setup(void)
 	if (!sh_mv.mv_nr_irqs)
 		sh_mv.mv_nr_irqs = NR_IRQS;
 
+#ifdef P2SEG
 	__set_io_port_base(P2SEG);
+#else
+	__set_io_port_base(0);
+#endif
 }
diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c
index c2efdcde266f..43adddfe4c04 100644
--- a/arch/sh/kernel/module.c
+++ b/arch/sh/kernel/module.c
@@ -32,6 +32,7 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/unaligned.h>
+#include <asm/dwarf.h>
 
 void *module_alloc(unsigned long size)
 {
@@ -145,10 +146,16 @@ int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
 {
-	return module_bug_finalize(hdr, sechdrs, me);
+	int ret = 0;
+
+	ret |= module_dwarf_finalize(hdr, sechdrs, me);
+	ret |= module_bug_finalize(hdr, sechdrs, me);
+
+	return ret;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
 	module_bug_cleanup(mod);
+	module_dwarf_cleanup(mod);
 }
diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c
new file mode 100644
index 000000000000..24ea837eac5b
--- /dev/null
+++ b/arch/sh/kernel/perf_callchain.c
@@ -0,0 +1,98 @@
+/*
+ * Performance event callchain support - SuperH architecture code
+ *
+ * Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <asm/unwinder.h>
+#include <asm/ptrace.h>
+
+static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
+static void callchain_warning(void *data, char *msg)
+{
+}
+
+static void
+callchain_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+}
+
+static int callchain_stack(void *data, char *name)
+{
+	return 0;
+}
+
+static void callchain_address(void *data, unsigned long addr, int reliable)
+{
+	struct perf_callchain_entry *entry = data;
+
+	if (reliable)
+		callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops callchain_ops = {
+	.warning	= callchain_warning,
+	.warning_symbol	= callchain_warning_symbol,
+	.stack		= callchain_stack,
+	.address	= callchain_address,
+};
+
+static void
+perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	callchain_store(entry, PERF_CONTEXT_KERNEL);
+	callchain_store(entry, regs->pc);
+
+	unwind_stack(NULL, regs, NULL, &callchain_ops, entry);
+}
+
+static void
+perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	int is_user;
+
+	if (!regs)
+		return;
+
+	is_user = user_mode(regs);
+
+	if (!current || current->pid == 0)
+		return;
+
+	if (is_user && current->state != TASK_RUNNING)
+		return;
+
+	/*
+	 * Only the kernel side is implemented for now.
+	 */
+	if (!is_user)
+		perf_callchain_kernel(regs, entry);
+}
+
+/*
+ * No need for separate IRQ and NMI entries.
+ */
+static DEFINE_PER_CPU(struct perf_callchain_entry, callchain);
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry = &__get_cpu_var(callchain);
+
+	entry->nr = 0;
+
+	perf_do_callchain(regs, entry);
+
+	return entry;
+}
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
new file mode 100644
index 000000000000..7ff0943e7a08
--- /dev/null
+++ b/arch/sh/kernel/perf_event.c
@@ -0,0 +1,312 @@
+/*
+ * Performance event support framework for SuperH hardware counters.
+ *
+ *  Copyright (C) 2009  Paul Mundt
+ *
+ * Heavily based on the x86 and PowerPC implementations.
+ *
+ * x86:
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *
+ * ppc:
+ *  Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/perf_event.h>
+#include <asm/processor.h>
+
+struct cpu_hw_events {
+	struct perf_event	*events[MAX_HWEVENTS];
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+};
+
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+static struct sh_pmu *sh_pmu __read_mostly;
+
+/* Number of perf_events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Stub these out for now, do something more profound later.
+ */
+int reserve_pmc_hardware(void)
+{
+	return 0;
+}
+
+void release_pmc_hardware(void)
+{
+}
+
+static inline int sh_pmu_initialized(void)
+{
+	return !!sh_pmu;
+}
+
+/*
+ * Release the PMU if this is the last perf_event.
+ */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static int hw_perf_cache_event(int config, int *evp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!sh_pmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*sh_pmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*evp = ev;
+	return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int config = -1;
+	int err;
+
+	if (!sh_pmu_initialized())
+		return -ENODEV;
+
+	/*
+	 * All of the on-chip counters are "limited", in that they have
+	 * no interrupts, and are therefore unable to do sampling without
+	 * further work and timer assistance.
+	 */
+	if (hwc->sample_period)
+		return -EINVAL;
+
+	/*
+	 * See if we need to reserve the counter.
+	 *
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 &&
+		    reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+
+	if (err)
+		return err;
+
+	event->destroy = hw_perf_event_destroy;
+
+	switch (attr->type) {
+	case PERF_TYPE_RAW:
+		config = attr->config & sh_pmu->raw_event_mask;
+		break;
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(attr->config, &config);
+		if (err)
+			return err;
+		break;
+	case PERF_TYPE_HARDWARE:
+		if (attr->config >= sh_pmu->max_events)
+			return -EINVAL;
+
+		config = sh_pmu->event_map(attr->config);
+		break;
+	}
+
+	if (config == -1)
+		return -EINVAL;
+
+	hwc->config |= config;
+
+	return 0;
+}
+
+static void sh_perf_event_update(struct perf_event *event,
+				   struct hw_perf_event *hwc, int idx)
+{
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+	int shift = 0;
+
+	/*
+	 * Depending on the counter configuration, they may or may not
+	 * be chained, in which case the previous counter value can be
+	 * updated underneath us if the lower-half overflows.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic counter atomically.
+	 *
+	 * As there is no interrupt associated with the overflow events,
+	 * this is the simplest approach for maintaining consistency.
+	 */
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	new_raw_count = sh_pmu->read(idx);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (counter-)time and add that to the generic counter.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+}
+
+static void sh_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cpuc->active_mask);
+	sh_pmu->disable(hwc, idx);
+
+	barrier();
+
+	sh_perf_event_update(event, &event->hw, idx);
+
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static int sh_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (test_and_set_bit(idx, cpuc->used_mask)) {
+		idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events);
+		if (idx == sh_pmu->num_events)
+			return -EAGAIN;
+
+		set_bit(idx, cpuc->used_mask);
+		hwc->idx = idx;
+	}
+
+	sh_pmu->disable(hwc, idx);
+
+	cpuc->events[idx] = event;
+	set_bit(idx, cpuc->active_mask);
+
+	sh_pmu->enable(hwc, idx);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void sh_pmu_read(struct perf_event *event)
+{
+	sh_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static const struct pmu pmu = {
+	.enable		= sh_pmu_enable,
+	.disable	= sh_pmu_disable,
+	.read		= sh_pmu_read,
+};
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	int err = __hw_perf_event_init(event);
+	if (unlikely(err)) {
+		if (event->destroy)
+			event->destroy(event);
+		return ERR_PTR(err);
+	}
+
+	return &pmu;
+}
+
+void hw_perf_event_setup(int cpu)
+{
+	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	memset(cpuhw, 0, sizeof(struct cpu_hw_events));
+}
+
+void hw_perf_enable(void)
+{
+	if (!sh_pmu_initialized())
+		return;
+
+	sh_pmu->enable_all();
+}
+
+void hw_perf_disable(void)
+{
+	if (!sh_pmu_initialized())
+		return;
+
+	sh_pmu->disable_all();
+}
+
+int register_sh_pmu(struct sh_pmu *pmu)
+{
+	if (sh_pmu)
+		return -EBUSY;
+	sh_pmu = pmu;
+
+	pr_info("Performance Events: %s support registered\n", pmu->name);
+
+	WARN_ON(pmu->num_events > MAX_HWEVENTS);
+
+	return 0;
+}
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 0673c4746be3..d8af889366a4 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -134,7 +134,10 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	regs.regs[5] = (unsigned long)fn;
 
 	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.sr = (1 << 30);
+	regs.sr = SR_MD;
+#if defined(CONFIG_SH_FPU)
+	regs.sr |= SR_FD;
+#endif
 
 	/* Ok, create the new process.. */
 	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
@@ -142,6 +145,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 
 	return pid;
 }
+EXPORT_SYMBOL(kernel_thread);
 
 /*
  * Free current thread data structures etc..
@@ -186,6 +190,16 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
 
 	return fpvalid;
 }
+EXPORT_SYMBOL(dump_fpu);
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+	unlazy_fpu(tsk, task_pt_regs(tsk));
+}
 
 asmlinkage void ret_from_fork(void);
 
@@ -195,16 +209,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs;
-#if defined(CONFIG_SH_FPU) || defined(CONFIG_SH_DSP)
+#if defined(CONFIG_SH_DSP)
 	struct task_struct *tsk = current;
 #endif
 
-#if defined(CONFIG_SH_FPU)
-	unlazy_fpu(tsk, regs);
-	p->thread.fpu = tsk->thread.fpu;
-	copy_to_stopped_child_used_math(p);
-#endif
-
 #if defined(CONFIG_SH_DSP)
 	if (is_dsp_enabled(tsk)) {
 		/* We can use the __save_dsp or just copy the struct:
@@ -224,6 +232,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	} else {
 		childregs->regs[15] = (unsigned long)childregs;
 		ti->addr_limit = KERNEL_DS;
+		ti->status &= ~TS_USEDFPU;
+		p->fpu_counter = 0;
 	}
 
 	if (clone_flags & CLONE_SETTLS)
@@ -288,9 +298,13 @@ static void ubc_set_tracing(int asid, unsigned long pc)
 __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev, struct task_struct *next)
 {
-#if defined(CONFIG_SH_FPU)
+	struct thread_struct *next_t = &next->thread;
+
 	unlazy_fpu(prev, task_pt_regs(prev));
-#endif
+
+	/* we're going to use this soon, after a few expensive things */
+	if (next->fpu_counter > 5)
+		prefetch(&next_t->fpu.hard);
 
 #ifdef CONFIG_MMU
 	/*
@@ -321,6 +335,14 @@ __switch_to(struct task_struct *prev, struct task_struct *next)
 #endif
 	}
 
+	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	if (next->fpu_counter > 5)
+		fpu_state_restore(task_pt_regs(next));
+
 	return prev;
 }
 
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 1192398ef582..359b8a2f4d2e 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -335,6 +335,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
 		      &regs, 0, NULL, NULL);
 }
+EXPORT_SYMBOL(kernel_thread);
 
 /*
  * Free current thread data structures etc..
@@ -417,6 +418,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
 	return 0; /* Task didn't use the fpu at all. */
 #endif
 }
+EXPORT_SYMBOL(dump_fpu);
 
 asmlinkage void ret_from_fork(void);
 
diff --git a/arch/sh/kernel/return_address.c b/arch/sh/kernel/return_address.c
new file mode 100644
index 000000000000..df3ab5811074
--- /dev/null
+++ b/arch/sh/kernel/return_address.c
@@ -0,0 +1,54 @@
+/*
+ * arch/sh/kernel/return_address.c
+ *
+ * Copyright (C) 2009  Matt Fleming
+ * Copyright (C) 2009  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <asm/dwarf.h>
+
+#ifdef CONFIG_DWARF_UNWINDER
+
+void *return_address(unsigned int depth)
+{
+	struct dwarf_frame *frame;
+	unsigned long ra;
+	int i;
+
+	for (i = 0, frame = NULL, ra = 0; i <= depth; i++) {
+		struct dwarf_frame *tmp;
+
+		tmp = dwarf_unwind_stack(ra, frame);
+
+		if (frame)
+			dwarf_free_frame(frame);
+
+		frame = tmp;
+
+		if (!frame || !frame->return_addr)
+			break;
+
+		ra = frame->return_addr;
+	}
+
+	/* Failed to unwind the stack to the specified depth. */
+	WARN_ON(i != depth + 1);
+
+	if (frame)
+		dwarf_free_frame(frame);
+
+	return (void *)ra;
+}
+
+#else
+
+void *return_address(unsigned int depth)
+{
+	return NULL;
+}
+
+#endif
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 99b4fb553bf1..5a947a2567e4 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -453,6 +453,10 @@ void __init setup_arch(char **cmdline_p)
 
 	paging_init();
 
+#ifdef CONFIG_PMB_ENABLE
+	pmb_init();
+#endif
+
 #ifdef CONFIG_SMP
 	plat_smp_setup();
 #endif
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 444cce3ae921..3896f26efa4a 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -1,37 +1,11 @@
 #include <linux/module.h>
-#include <linux/smp.h>
-#include <linux/user.h>
-#include <linux/elfcore.h>
-#include <linux/sched.h>
-#include <linux/in6.h>
-#include <linux/interrupt.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <linux/irq.h>
-#include <asm/sections.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
 #include <asm/checksum.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/tlbflush.h>
-#include <asm/cacheflush.h>
-#include <asm/ftrace.h>
-
-extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
-
-/* platform dependent support */
-EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(strlen);
-
-/* PCI exports */
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_alloc_consistent);
-EXPORT_SYMBOL(pci_free_consistent);
-#endif
+#include <asm/sections.h>
 
-/* mem exports */
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
@@ -40,6 +14,13 @@ EXPORT_SYMBOL(__copy_user);
 EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__ndelay);
 EXPORT_SYMBOL(__const_udelay);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_generic);
+EXPORT_SYMBOL(copy_page);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(_ebss);
+EXPORT_SYMBOL(empty_zero_page);
 
 #define DECLARE_EXPORT(name)		\
 	extern void name(void);EXPORT_SYMBOL(name)
@@ -107,30 +88,6 @@ DECLARE_EXPORT(__sdivsi3_i4);
 DECLARE_EXPORT(__udivsi3_i4);
 DECLARE_EXPORT(__sdivsi3_i4i);
 DECLARE_EXPORT(__udivsi3_i4i);
-
-#if !defined(CONFIG_CACHE_OFF) && (defined(CONFIG_CPU_SH4) || \
-	defined(CONFIG_SH7705_CACHE_32KB))
-/* needed by some modules */
-EXPORT_SYMBOL(flush_cache_all);
-EXPORT_SYMBOL(flush_cache_range);
-EXPORT_SYMBOL(flush_dcache_page);
-#endif
-
 #ifdef CONFIG_MCOUNT
 DECLARE_EXPORT(mcount);
 #endif
-EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(csum_partial_copy_generic);
-#ifdef CONFIG_IPV6
-EXPORT_SYMBOL(csum_ipv6_magic);
-#endif
-EXPORT_SYMBOL(copy_page);
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(_ebss);
-EXPORT_SYMBOL(empty_zero_page);
-
-#ifndef CONFIG_CACHE_OFF
-EXPORT_SYMBOL(__flush_purge_region);
-EXPORT_SYMBOL(__flush_wback_region);
-EXPORT_SYMBOL(__flush_invalidate_region);
-#endif
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index d008e17eb257..45afa5c51f67 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -24,16 +24,6 @@
 #include <asm/delay.h>
 #include <asm/irq.h>
 
-extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
-
-/* platform dependent support */
-EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(kernel_thread);
-
-#ifdef CONFIG_VT
-EXPORT_SYMBOL(screen_info);
-#endif
-
 EXPORT_SYMBOL(__put_user_asm_b);
 EXPORT_SYMBOL(__put_user_asm_w);
 EXPORT_SYMBOL(__put_user_asm_l);
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 3db37425210d..12815ce01ecd 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -67,7 +67,8 @@ sys_sigsuspend(old_sigset_t mask,
 
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
-	set_thread_flag(TIF_RESTORE_SIGMASK);
+	set_restore_sigmask();
+
 	return -ERESTARTNOHAND;
 }
 
@@ -590,7 +591,7 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 	if (try_to_freeze())
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
 		oldset = &current->saved_sigmask;
 	else
 		oldset = &current->blocked;
@@ -602,12 +603,13 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(signr, &ka, &info, oldset,
 				  regs, save_r0) == 0) {
-			/* a signal was successfully delivered; the saved
+			/*
+			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
-			 * clear the TIF_RESTORE_SIGMASK flag */
-			if (test_thread_flag(TIF_RESTORE_SIGMASK))
-				clear_thread_flag(TIF_RESTORE_SIGMASK);
+			 * clear the TS_RESTORE_SIGMASK flag
+			 */
+			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 
 			tracehook_signal_handler(signr, &info, &ka, regs,
 					test_thread_flag(TIF_SINGLESTEP));
@@ -631,10 +633,12 @@ no_signal:
 		}
 	}
 
-	/* if there's no signal to deliver, we just put the saved sigmask
-	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask
+	 * back.
+	 */
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 	}
 }
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 74793c80a57a..feb3dddd3192 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -101,7 +101,7 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset)
 	if (try_to_freeze())
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
 		oldset = &current->saved_sigmask;
 	else if (!oldset)
 		oldset = &current->blocked;
@@ -115,11 +115,9 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset)
 			/*
 			 * If a signal was successfully delivered, the
 			 * saved sigmask is in its frame, and we can
-			 * clear the TIF_RESTORE_SIGMASK flag.
+			 * clear the TS_RESTORE_SIGMASK flag.
 			 */
-			if (test_thread_flag(TIF_RESTORE_SIGMASK))
-				clear_thread_flag(TIF_RESTORE_SIGMASK);
-
+			current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 			tracehook_signal_handler(signr, &info, &ka, regs, 0);
 			return 1;
 		}
@@ -146,8 +144,8 @@ no_signal:
 	}
 
 	/* No signal to deliver -- put the saved sigmask back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
+	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
+		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 	}
 
@@ -176,6 +174,7 @@ sys_sigsuspend(old_sigset_t mask,
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
+		set_restore_sigmask();
 		regs->pc += 4;    /* because sys_sigreturn decrements the pc */
 		if (do_signal(regs, &saveset)) {
 			/* pc now points at signal handler. Need to decrement
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 160db1003cfb..983e0792d5f3 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -122,7 +122,9 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	stack_start.bss_start = 0; /* don't clear bss for secondary cpus */
 	stack_start.start_kernel_fn = start_secondary;
 
-	flush_cache_all();
+	flush_icache_range((unsigned long)&stack_start,
+			   (unsigned long)&stack_start + sizeof(stack_start));
+	wmb();
 
 	plat_start_cpu(cpu, (unsigned long)_stext);
 
diff --git a/arch/sh/kernel/topology.c b/arch/sh/kernel/topology.c
index 0838942b7083..9b0b633b6c92 100644
--- a/arch/sh/kernel/topology.c
+++ b/arch/sh/kernel/topology.c
@@ -16,6 +16,32 @@
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
+cpumask_t cpu_core_map[NR_CPUS];
+
+static cpumask_t cpu_coregroup_map(unsigned int cpu)
+{
+	/*
+	 * Presently all SH-X3 SMP cores are multi-cores, so just keep it
+	 * simple until we have a method for determining topology..
+	 */
+	return cpu_possible_map;
+}
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+	return &cpu_core_map[cpu];
+}
+
+int arch_update_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		cpu_core_map[cpu] = cpu_coregroup_map(cpu);
+
+	return 0;
+}
+
 static int __init topology_init(void)
 {
 	int i, ret;
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index a8396f36bd14..7b036339dc92 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -9,8 +9,8 @@
 #include <asm/unwinder.h>
 #include <asm/system.h>
 
-#ifdef CONFIG_BUG
-void handle_BUG(struct pt_regs *regs)
+#ifdef CONFIG_GENERIC_BUG
+static void handle_BUG(struct pt_regs *regs)
 {
 	const struct bug_entry *bug;
 	unsigned long bugaddr = regs->pc;
@@ -81,7 +81,7 @@ BUILD_TRAP_HANDLER(bug)
 		       SIGTRAP) == NOTIFY_STOP)
 		return;
 
-#ifdef CONFIG_BUG
+#ifdef CONFIG_GENERIC_BUG
 	if (__kernel_text_address(instruction_pointer(regs))) {
 		insn_size_t insn = *(insn_size_t *)instruction_pointer(regs);
 		if (insn == TRAPA_BUG_OPCODE)
@@ -95,9 +95,11 @@ BUILD_TRAP_HANDLER(bug)
 
 BUILD_TRAP_HANDLER(nmi)
 {
+	unsigned int cpu = smp_processor_id();
 	TRAP_HANDLER_DECL;
 
 	nmi_enter();
+	nmi_count(cpu)++;
 
 	switch (notify_die(DIE_NMI, "NMI", regs, 0, vec & 0xff, SIGINT)) {
 	case NOTIFY_OK:
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 7a2ee3a6b8e7..3da5a125d884 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -25,6 +25,7 @@
 #include <linux/kexec.h>
 #include <linux/limits.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/sysfs.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -68,61 +69,49 @@ static const char *se_usermode_action[] = {
 	"signal+warn"
 };
 
-static int
-proc_alignment_read(char *page, char **start, off_t off, int count, int *eof,
-		    void *data)
+static int alignment_proc_show(struct seq_file *m, void *v)
 {
-	char *p = page;
-	int len;
-
-	p += sprintf(p, "User:\t\t%lu\n", se_user);
-	p += sprintf(p, "System:\t\t%lu\n", se_sys);
-	p += sprintf(p, "Half:\t\t%lu\n", se_half);
-	p += sprintf(p, "Word:\t\t%lu\n", se_word);
-	p += sprintf(p, "DWord:\t\t%lu\n", se_dword);
-	p += sprintf(p, "Multi:\t\t%lu\n", se_multi);
-	p += sprintf(p, "User faults:\t%i (%s)\n", se_usermode,
+	seq_printf(m, "User:\t\t%lu\n", se_user);
+	seq_printf(m, "System:\t\t%lu\n", se_sys);
+	seq_printf(m, "Half:\t\t%lu\n", se_half);
+	seq_printf(m, "Word:\t\t%lu\n", se_word);
+	seq_printf(m, "DWord:\t\t%lu\n", se_dword);
+	seq_printf(m, "Multi:\t\t%lu\n", se_multi);
+	seq_printf(m, "User faults:\t%i (%s)\n", se_usermode,
 			se_usermode_action[se_usermode]);
-	p += sprintf(p, "Kernel faults:\t%i (fixup%s)\n", se_kernmode_warn,
+	seq_printf(m, "Kernel faults:\t%i (fixup%s)\n", se_kernmode_warn,
 			se_kernmode_warn ? "+warn" : "");
-
-	len = (p - page) - off;
-	if (len < 0)
-		len = 0;
-
-	*eof = (len <= count) ? 1 : 0;
-	*start = page + off;
-
-	return len;
+	return 0;
 }
 
-static int proc_alignment_write(struct file *file, const char __user *buffer,
-				unsigned long count, void *data)
+static int alignment_proc_open(struct inode *inode, struct file *file)
 {
-	char mode;
-
-	if (count > 0) {
-		if (get_user(mode, buffer))
-			return -EFAULT;
-		if (mode >= '0' && mode <= '5')
-			se_usermode = mode - '0';
-	}
-	return count;
+	return single_open(file, alignment_proc_show, NULL);
 }
 
-static int proc_alignment_kern_write(struct file *file, const char __user *buffer,
-				     unsigned long count, void *data)
+static ssize_t alignment_proc_write(struct file *file,
+		const char __user *buffer, size_t count, loff_t *pos)
 {
+	int *data = PDE(file->f_path.dentry->d_inode)->data;
 	char mode;
 
 	if (count > 0) {
 		if (get_user(mode, buffer))
 			return -EFAULT;
-		if (mode >= '0' && mode <= '1')
-			se_kernmode_warn = mode - '0';
+		if (mode >= '0' && mode <= '5')
+			*data = mode - '0';
 	}
 	return count;
 }
+
+static const struct file_operations alignment_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= alignment_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= alignment_proc_write,
+};
 #endif
 
 static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
@@ -945,14 +934,9 @@ void __init trap_init(void)
 	set_exception_table_evt(0x800, do_reserved_inst);
 	set_exception_table_evt(0x820, do_illegal_slot_inst);
 #elif defined(CONFIG_SH_FPU)
-#ifdef CONFIG_CPU_SUBTYPE_SHX3
-	set_exception_table_evt(0xd80, fpu_state_restore_trap_handler);
-	set_exception_table_evt(0xda0, fpu_state_restore_trap_handler);
-#else
 	set_exception_table_evt(0x800, fpu_state_restore_trap_handler);
 	set_exception_table_evt(0x820, fpu_state_restore_trap_handler);
 #endif
-#endif
 
 #ifdef CONFIG_CPU_SH2
 	set_exception_table_vec(TRAP_ADDRESS_ERROR, address_error_trap_handler);
@@ -1011,20 +995,16 @@ static int __init alignment_init(void)
 	if (!dir)
 		return -ENOMEM;
 
-	res = create_proc_entry("alignment", S_IWUSR | S_IRUGO, dir);
+	res = proc_create_data("alignment", S_IWUSR | S_IRUGO, dir,
+			       &alignment_proc_fops, &se_usermode);
 	if (!res)
 		return -ENOMEM;
 
-	res->read_proc = proc_alignment_read;
-	res->write_proc = proc_alignment_write;
-
-        res = create_proc_entry("kernel_alignment", S_IWUSR | S_IRUGO, dir);
+        res = proc_create_data("kernel_alignment", S_IWUSR | S_IRUGO, dir,
+			       &alignment_proc_fops, &se_kernmode_warn);
         if (!res)
                 return -ENOMEM;
 
-        res->read_proc = proc_alignment_read;
-        res->write_proc = proc_alignment_kern_write;
-
 	return 0;
 }
 
diff --git a/arch/sh/lib/Makefile b/arch/sh/lib/Makefile
index a969b47c5463..dab4d2129812 100644
--- a/arch/sh/lib/Makefile
+++ b/arch/sh/lib/Makefile
@@ -2,7 +2,7 @@
 # Makefile for SuperH-specific library files..
 #
 
-lib-y  = delay.o memset.o memmove.o memchr.o \
+lib-y  = delay.o memmove.o memchr.o \
 	 checksum.o strlen.o div64.o div64-generic.o
 
 # Extracted from libgcc
@@ -23,8 +23,11 @@ obj-y				+= io.o
 memcpy-y			:= memcpy.o
 memcpy-$(CONFIG_CPU_SH4)	:= memcpy-sh4.o
 
+memset-y			:= memset.o
+memset-$(CONFIG_CPU_SH4)	:= memset-sh4.o
+
 lib-$(CONFIG_MMU)		+= copy_page.o __clear_user.o
 lib-$(CONFIG_MCOUNT)		+= mcount.o
-lib-y				+= $(memcpy-y) $(udivsi3-y)
+lib-y				+= $(memcpy-y) $(memset-y) $(udivsi3-y)
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/lib/memset-sh4.S b/arch/sh/lib/memset-sh4.S
new file mode 100644
index 000000000000..1a6e32cc4e4d
--- /dev/null
+++ b/arch/sh/lib/memset-sh4.S
@@ -0,0 +1,107 @@
+/*
+ * "memset" implementation for SH4
+ *
+ * Copyright (C) 1999  Niibe Yutaka
+ * Copyright (c) 2009  STMicroelectronics Limited
+ * Author: Stuart Menefy <stuart.menefy:st.com>
+ */
+
+/*
+ *            void *memset(void *s, int c, size_t n);
+ */
+
+#include <linux/linkage.h>
+
+ENTRY(memset)
+	mov	#12,r0
+	add	r6,r4
+	cmp/gt	r6,r0
+	bt/s	40f		! if it's too small, set a byte at once
+	 mov	r4,r0
+	and	#3,r0
+	cmp/eq	#0,r0
+	bt/s	2f		! It's aligned
+	 sub	r0,r6
+1:
+	dt	r0
+	bf/s	1b
+	 mov.b	r5,@-r4
+2:				! make VVVV
+	extu.b	r5,r5
+	swap.b	r5,r0		!   V0
+	or	r0,r5		!   VV
+	swap.w	r5,r0		! VV00
+	or	r0,r5		! VVVV
+
+	! Check if enough bytes need to be copied to be worth the big loop
+	mov	#0x40, r0	! (MT)
+	cmp/gt	r6,r0		! (MT)  64 > len => slow loop
+
+	bt/s	22f
+	 mov	r6,r0
+
+	! align the dst to the cache block size if necessary
+	mov	r4, r3
+	mov	#~(0x1f), r1
+
+	and	r3, r1
+	cmp/eq	r3, r1
+
+	bt/s	11f		! dst is already aligned
+	 sub	r1, r3		! r3-r1 -> r3
+	shlr2	r3		! number of loops
+
+10:	mov.l	r5,@-r4
+	dt	r3
+	bf/s	10b
+	 add	#-4, r6
+
+11:	! dst is 32byte aligned
+	mov	r6,r2
+	mov	#-5,r0
+	shld	r0,r2		! number of loops
+
+	add	#-32, r4
+	mov	r5, r0
+12:
+	movca.l	r0,@r4
+	mov.l	r5,@(4, r4)
+	mov.l	r5,@(8, r4)
+	mov.l	r5,@(12,r4)
+	mov.l	r5,@(16,r4)
+	mov.l	r5,@(20,r4)
+	add	#-0x20, r6
+	mov.l	r5,@(24,r4)
+	dt	r2
+	mov.l	r5,@(28,r4)
+	bf/s	12b
+	 add	#-32, r4
+
+	add	#32, r4
+	mov	#8, r0
+	cmp/ge	r0, r6
+	bf	40f
+
+	mov	r6,r0
+22:
+	shlr2	r0
+	shlr	r0		! r0 = r6 >> 3
+3:
+	dt	r0
+	mov.l	r5,@-r4		! set 8-byte at once
+	bf/s	3b
+	 mov.l	r5,@-r4
+	!
+	mov	#7,r0
+	and	r0,r6
+
+	! fill bytes (length may be zero)
+40:	tst	r6,r6
+	bt	5f
+4:
+	dt	r6
+	bf/s	4b
+	 mov.b	r5,@-r4
+5:
+	rts
+	 mov	r4,r0
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index ac2d7abd2567..d6c15cae0912 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -558,7 +558,7 @@ static int ieee_fpe_handler(struct pt_regs *regs)
 					    (finsn >> 8) & 0xf);
 			tsk->thread.fpu.hard.fpscr &=
 				~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
-			set_tsk_thread_flag(tsk, TIF_USEDFPU);
+			task_thread_info(tsk)->status |= TS_USEDFPU;
 		} else {
 			info.si_signo = SIGFPE;
 			info.si_errno = 0;
@@ -619,10 +619,10 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
 	struct task_struct *tsk = current;
 	struct sh_fpu_soft_struct *fpu = &(tsk->thread.fpu.soft);
 
-	if (!test_tsk_thread_flag(tsk, TIF_USEDFPU)) {
+	if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
 		/* initialize once. */
 		fpu_init(fpu);
-		set_tsk_thread_flag(tsk, TIF_USEDFPU);
+		task_thread_info(tsk)->status |= TS_USEDFPU;
 	}
 
 	return fpu_emulate(inst, fpu, regs);
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 7f7b52f9beba..0e7ba8e891cf 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -82,8 +82,7 @@ config 32BIT
 
 config PMB_ENABLE
 	bool "Support 32-bit physical addressing through PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
-	select 32BIT
+	depends on MMU && EXPERIMENTAL && CPU_SH4A
 	default y
 	help
 	  If you say Y here, physical addressing will be extended to
@@ -97,8 +96,7 @@ choice
 
 config PMB
 	bool "PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785)
-	select 32BIT
+	depends on MMU && EXPERIMENTAL && CPU_SH4A
 	help
 	  If you say Y here, physical addressing will be extended to
 	  32-bits through the SH-4A PMB. If this is not set, legacy
@@ -106,9 +104,7 @@ config PMB
 
 config PMB_FIXED
 	bool "fixed PMB"
-	depends on MMU && EXPERIMENTAL && (CPU_SUBTYPE_SH7757 || \
-					   CPU_SUBTYPE_SH7780 || \
-					   CPU_SUBTYPE_SH7785)
+	depends on MMU && EXPERIMENTAL && CPU_SH4A
 	select 32BIT
 	help
 	  If this option is enabled, fixed PMB mappings are inherited
@@ -258,6 +254,15 @@ endchoice
 
 source "mm/Kconfig"
 
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
 endmenu
 
 menu "Cache configuration"
diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile
index 3759bf853293..8a70535fa7ce 100644
--- a/arch/sh/mm/Makefile
+++ b/arch/sh/mm/Makefile
@@ -33,8 +33,7 @@ obj-y				+= $(tlb-y)
 endif
 
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PMB)		+= pmb.o
-obj-$(CONFIG_PMB_FIXED)		+= pmb-fixed.o
+obj-$(CONFIG_PMB_ENABLE)	+= pmb.o
 obj-$(CONFIG_NUMA)		+= numa.o
 
 # Special flags for fault_64.o.  This puts restrictions on the number of
diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
index b7f235c74d66..f36a08bf3d5c 100644
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@@ -2,7 +2,7 @@
  * arch/sh/mm/cache-sh4.c
  *
  * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- * Copyright (C) 2001 - 2007  Paul Mundt
+ * Copyright (C) 2001 - 2009  Paul Mundt
  * Copyright (C) 2003  Richard Curnow
  * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
  *
@@ -15,6 +15,8 @@
 #include <linux/io.h>
 #include <linux/mutex.h>
 #include <linux/fs.h>
+#include <linux/highmem.h>
+#include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
@@ -23,21 +25,12 @@
  * flushing. Anything exceeding this will simply flush the dcache in its
  * entirety.
  */
-#define MAX_DCACHE_PAGES	64	/* XXX: Tune for ways */
 #define MAX_ICACHE_PAGES	32
 
 static void __flush_cache_one(unsigned long addr, unsigned long phys,
 			       unsigned long exec_offset);
 
 /*
- * This is initialised here to ensure that it is not placed in the BSS.  If
- * that were to happen, note that cache_init gets called before the BSS is
- * cleared, so this would get nulled out which would be hopeless.
- */
-static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
-	(void (*)(unsigned long, unsigned long))0xdeadbeef;
-
-/*
  * Write back the range of D-cache, and purge the I-cache.
  *
  * Called from kernel/module.c:sys_init_module and routine for a.out format,
@@ -97,15 +90,15 @@ static inline void flush_cache_one(unsigned long start, unsigned long phys)
 	unsigned long flags, exec_offset = 0;
 
 	/*
-	 * All types of SH-4 require PC to be in P2 to operate on the I-cache.
-	 * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
+	 * All types of SH-4 require PC to be uncached to operate on the I-cache.
+	 * Some types of SH-4 require PC to be uncached to operate on the D-cache.
 	 */
 	if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) ||
 	    (start < CACHE_OC_ADDRESS_ARRAY))
-		exec_offset = 0x20000000;
+		exec_offset = cached_to_uncached;
 
 	local_irq_save(flags);
-	__flush_cache_one(start | SH_CACHE_ASSOC, P1SEGADDR(phys), exec_offset);
+	__flush_cache_one(start, phys, exec_offset);
 	local_irq_restore(flags);
 }
 
@@ -124,7 +117,7 @@ static void sh4_flush_dcache_page(void *arg)
 	else
 #endif
 	{
-		unsigned long phys = PHYSADDR(page_address(page));
+		unsigned long phys = page_to_phys(page);
 		unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
 		int i, n;
 
@@ -159,10 +152,27 @@ static void __uses_jump_to_uncached flush_icache_all(void)
 	local_irq_restore(flags);
 }
 
-static inline void flush_dcache_all(void)
+static void flush_dcache_all(void)
 {
-	(*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
-	wmb();
+	unsigned long addr, end_addr, entry_offset;
+
+	end_addr = CACHE_OC_ADDRESS_ARRAY +
+		(current_cpu_data.dcache.sets <<
+		 current_cpu_data.dcache.entry_shift) *
+			current_cpu_data.dcache.ways;
+
+	entry_offset = 1 << current_cpu_data.dcache.entry_shift;
+
+	for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) {
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+		__raw_writel(0, addr); addr += entry_offset;
+	}
 }
 
 static void sh4_flush_cache_all(void *unused)
@@ -171,89 +181,13 @@ static void sh4_flush_cache_all(void *unused)
 	flush_icache_all();
 }
 
-static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
-			     unsigned long end)
-{
-	unsigned long d = 0, p = start & PAGE_MASK;
-	unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
-	unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
-	unsigned long select_bit;
-	unsigned long all_aliases_mask;
-	unsigned long addr_offset;
-	pgd_t *dir;
-	pmd_t *pmd;
-	pud_t *pud;
-	pte_t *pte;
-	int i;
-
-	dir = pgd_offset(mm, p);
-	pud = pud_offset(dir, p);
-	pmd = pmd_offset(pud, p);
-	end = PAGE_ALIGN(end);
-
-	all_aliases_mask = (1 << n_aliases) - 1;
-
-	do {
-		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
-			p &= PMD_MASK;
-			p += PMD_SIZE;
-			pmd++;
-
-			continue;
-		}
-
-		pte = pte_offset_kernel(pmd, p);
-
-		do {
-			unsigned long phys;
-			pte_t entry = *pte;
-
-			if (!(pte_val(entry) & _PAGE_PRESENT)) {
-				pte++;
-				p += PAGE_SIZE;
-				continue;
-			}
-
-			phys = pte_val(entry) & PTE_PHYS_MASK;
-
-			if ((p ^ phys) & alias_mask) {
-				d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
-				d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
-
-				if (d == all_aliases_mask)
-					goto loop_exit;
-			}
-
-			pte++;
-			p += PAGE_SIZE;
-		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
-		pmd++;
-	} while (p < end);
-
-loop_exit:
-	addr_offset = 0;
-	select_bit = 1;
-
-	for (i = 0; i < n_aliases; i++) {
-		if (d & select_bit) {
-			(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
-			wmb();
-		}
-
-		select_bit <<= 1;
-		addr_offset += PAGE_SIZE;
-	}
-}
-
 /*
  * Note : (RPC) since the caches are physically tagged, the only point
  * of flush_cache_mm for SH-4 is to get rid of aliases from the
  * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
  * lines can stay resident so long as the virtual address they were
  * accessed with (hence cache set) is in accord with the physical
- * address (i.e. tag).  It's no different here.  So I reckon we don't
- * need to flush the I-cache, since aliases don't matter for that.  We
- * should try that.
+ * address (i.e. tag).  It's no different here.
  *
  * Caller takes mm->mmap_sem.
  */
@@ -264,33 +198,7 @@ static void sh4_flush_cache_mm(void *arg)
 	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
 		return;
 
-	/*
-	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
-	 * the cache is physically tagged, the data can just be left in there.
-	 */
-	if (boot_cpu_data.dcache.n_aliases == 0)
-		return;
-
-	/*
-	 * Don't bother groveling around the dcache for the VMA ranges
-	 * if there are too many PTEs to make it worthwhile.
-	 */
-	if (mm->nr_ptes >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else {
-		struct vm_area_struct *vma;
-
-		/*
-		 * In this case there are reasonably sized ranges to flush,
-		 * iterate through the VMA list and take care of any aliases.
-		 */
-		for (vma = mm->mmap; vma; vma = vma->vm_next)
-			__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
-	}
-
-	/* Only touch the icache if one of the VMAs has VM_EXEC set. */
-	if (mm->exec_vm)
-		flush_icache_all();
+	flush_dcache_all();
 }
 
 /*
@@ -303,44 +211,63 @@ static void sh4_flush_cache_page(void *args)
 {
 	struct flusher_data *data = args;
 	struct vm_area_struct *vma;
+	struct page *page;
 	unsigned long address, pfn, phys;
-	unsigned int alias_mask;
+	int map_coherent = 0;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	void *vaddr;
 
 	vma = data->vma;
-	address = data->addr1;
+	address = data->addr1 & PAGE_MASK;
 	pfn = data->addr2;
 	phys = pfn << PAGE_SHIFT;
+	page = pfn_to_page(pfn);
 
 	if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
 		return;
 
-	alias_mask = boot_cpu_data.dcache.alias_mask;
-
-	/* We only need to flush D-cache when we have alias */
-	if ((address^phys) & alias_mask) {
-		/* Loop 4K of the D-cache */
-		flush_cache_one(
-			CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
-		/* Loop another 4K of the D-cache */
-		flush_cache_one(
-			CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask),
-			phys);
-	}
+	pgd = pgd_offset(vma->vm_mm, address);
+	pud = pud_offset(pgd, address);
+	pmd = pmd_offset(pud, address);
+	pte = pte_offset_kernel(pmd, address);
+
+	/* If the page isn't present, there is nothing to do here. */
+	if (!(pte_val(*pte) & _PAGE_PRESENT))
+		return;
 
-	alias_mask = boot_cpu_data.icache.alias_mask;
-	if (vma->vm_flags & VM_EXEC) {
+	if ((vma->vm_mm == current->active_mm))
+		vaddr = NULL;
+	else {
 		/*
-		 * Evict entries from the portion of the cache from which code
-		 * may have been executed at this address (virtual).  There's
-		 * no need to evict from the portion corresponding to the
-		 * physical address as for the D-cache, because we know the
-		 * kernel has never executed the code through its identity
-		 * translation.
+		 * Use kmap_coherent or kmap_atomic to do flushes for
+		 * another ASID than the current one.
 		 */
-		flush_cache_one(
-			CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
-			phys);
+		map_coherent = (current_cpu_data.dcache.n_aliases &&
+			!test_bit(PG_dcache_dirty, &page->flags) &&
+			page_mapped(page));
+		if (map_coherent)
+			vaddr = kmap_coherent(page, address);
+		else
+			vaddr = kmap_atomic(page, KM_USER0);
+
+		address = (unsigned long)vaddr;
+	}
+
+	if (pages_do_alias(address, phys))
+		flush_cache_one(CACHE_OC_ADDRESS_ARRAY |
+			(address & shm_align_mask), phys);
+
+	if (vma->vm_flags & VM_EXEC)
+		flush_icache_all();
+
+	if (vaddr) {
+		if (map_coherent)
+			kunmap_coherent(vaddr);
+		else
+			kunmap_atomic(vaddr, KM_USER0);
 	}
 }
 
@@ -373,24 +300,10 @@ static void sh4_flush_cache_range(void *args)
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 
-	/*
-	 * Don't bother with the lookup and alias check if we have a
-	 * wide range to cover, just blow away the dcache in its
-	 * entirety instead. -- PFM.
-	 */
-	if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
-		flush_dcache_all();
-	else
-		__flush_cache_mm(vma->vm_mm, start, end);
+	flush_dcache_all();
 
-	if (vma->vm_flags & VM_EXEC) {
-		/*
-		 * TODO: Is this required???  Need to look at how I-cache
-		 * coherency is assured when new programs are loaded to see if
-		 * this matters.
-		 */
+	if (vma->vm_flags & VM_EXEC)
 		flush_icache_all();
-	}
 }
 
 /**
@@ -464,245 +377,6 @@ static void __flush_cache_one(unsigned long addr, unsigned long phys,
 	} while (--way_count != 0);
 }
 
-/*
- * Break the 1, 2 and 4 way variants of this out into separate functions to
- * avoid nearly all the overhead of having the conditional stuff in the function
- * bodies (+ the 1 and 2 way cases avoid saving any registers too).
- *
- * We want to eliminate unnecessary bus transactions, so this code uses
- * a non-obvious technique.
- *
- * Loop over a cache way sized block of, one cache line at a time. For each
- * line, use movca.a to cause the current cache line contents to be written
- * back, but without reading anything from main memory. However this has the
- * side effect that the cache is now caching that memory location. So follow
- * this with a cache invalidate to mark the cache line invalid. And do all
- * this with interrupts disabled, to avoid the cache line being accidently
- * evicted while it is holding garbage.
- *
- * This also breaks in a number of circumstances:
- * - if there are modifications to the region of memory just above
- *   empty_zero_page (for example because a breakpoint has been placed
- *   there), then these can be lost.
- *
- *   This is because the the memory address which the cache temporarily
- *   caches in the above description is empty_zero_page. So the
- *   movca.l hits the cache (it is assumed that it misses, or at least
- *   isn't dirty), modifies the line and then invalidates it, losing the
- *   required change.
- *
- * - If caches are disabled or configured in write-through mode, then
- *   the movca.l writes garbage directly into memory.
- */
-static void __flush_dcache_segment_writethrough(unsigned long start,
-					        unsigned long extent_per_way)
-{
-	unsigned long addr;
-	int i;
-
-	addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask);
-
-	while (extent_per_way) {
-		for (i = 0; i < cpu_data->dcache.ways; i++)
-			__raw_writel(0, addr + cpu_data->dcache.way_incr * i);
-
-		addr += cpu_data->dcache.linesz;
-		extent_per_way -= cpu_data->dcache.linesz;
-	}
-}
-
-static void __flush_dcache_segment_1way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/*
-	 * The previous code aligned base_addr to 16k, i.e. the way_size of all
-	 * existing SH-4 D-caches.  Whilst I don't see a need to have this
-	 * aligned to any better than the cache line size (which it will be
-	 * anyway by construction), let's align it to at least the way_size of
-	 * any existing or conceivable SH-4 D-cache.  -- RPC
-	 */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		a0 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "ocbi @%0" : : "r" (a0));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_2way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		a0 += linesz;
-		a1 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1" : :
-			     "r" (a0), "r" (a1));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-	} while (a0 < a0e);
-}
-
-static void __flush_dcache_segment_4way(unsigned long start,
-					unsigned long extent_per_way)
-{
-	unsigned long orig_sr, sr_with_bl;
-	unsigned long base_addr;
-	unsigned long way_incr, linesz, way_size;
-	struct cache_info *dcache;
-	register unsigned long a0, a1, a2, a3, a0e;
-
-	asm volatile("stc sr, %0" : "=r" (orig_sr));
-	sr_with_bl = orig_sr | (1<<28);
-	base_addr = ((unsigned long)&empty_zero_page[0]);
-
-	/* See comment under 1-way above */
-	base_addr = ((base_addr >> 16) << 16);
-	base_addr |= start;
-
-	dcache = &boot_cpu_data.dcache;
-	linesz = dcache->linesz;
-	way_incr = dcache->way_incr;
-	way_size = dcache->way_size;
-
-	a0 = base_addr;
-	a1 = a0 + way_incr;
-	a2 = a1 + way_incr;
-	a3 = a2 + way_incr;
-	a0e = base_addr + extent_per_way;
-	do {
-		asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-		asm volatile("movca.l r0, @%0\n\t"
-			     "movca.l r0, @%1\n\t"
-			     "movca.l r0, @%2\n\t"
-			     "movca.l r0, @%3\n\t"
-			     "ocbi @%0\n\t"
-			     "ocbi @%1\n\t"
-			     "ocbi @%2\n\t"
-			     "ocbi @%3\n\t" : :
-			     "r" (a0), "r" (a1), "r" (a2), "r" (a3));
-		asm volatile("ldc %0, sr" : : "r" (orig_sr));
-		a0 += linesz;
-		a1 += linesz;
-		a2 += linesz;
-		a3 += linesz;
-	} while (a0 < a0e);
-}
-
 extern void __weak sh4__flush_region_init(void);
 
 /*
@@ -710,32 +384,11 @@ extern void __weak sh4__flush_region_init(void);
  */
 void __init sh4_cache_init(void)
 {
-	unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
-
 	printk("PVR=%08x CVR=%08x PRR=%08x\n",
 		ctrl_inl(CCN_PVR),
 		ctrl_inl(CCN_CVR),
 		ctrl_inl(CCN_PRR));
 
-	if (wt_enabled)
-		__flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
-	else {
-		switch (boot_cpu_data.dcache.ways) {
-		case 1:
-			__flush_dcache_segment_fn = __flush_dcache_segment_1way;
-			break;
-		case 2:
-			__flush_dcache_segment_fn = __flush_dcache_segment_2way;
-			break;
-		case 4:
-			__flush_dcache_segment_fn = __flush_dcache_segment_4way;
-			break;
-		default:
-			panic("unknown number of cache ways\n");
-			break;
-		}
-	}
-
 	local_flush_icache_range	= sh4_flush_icache_range;
 	local_flush_dcache_page		= sh4_flush_dcache_page;
 	local_flush_cache_all		= sh4_flush_cache_all;
diff --git a/arch/sh/mm/cache-sh5.c b/arch/sh/mm/cache-sh5.c
index 467ff8e260f7..eb4cc4ec7952 100644
--- a/arch/sh/mm/cache-sh5.c
+++ b/arch/sh/mm/cache-sh5.c
@@ -563,7 +563,7 @@ static void sh5_flush_cache_page(void *args)
 
 static void sh5_flush_dcache_page(void *page)
 {
-	sh64_dcache_purge_phy_page(page_to_phys(page));
+	sh64_dcache_purge_phy_page(page_to_phys((struct page *)page));
 	wmb();
 }
 
diff --git a/arch/sh/mm/cache-sh7705.c b/arch/sh/mm/cache-sh7705.c
index 2601935eb589..f527fb70fce6 100644
--- a/arch/sh/mm/cache-sh7705.c
+++ b/arch/sh/mm/cache-sh7705.c
@@ -141,7 +141,7 @@ static void sh7705_flush_dcache_page(void *arg)
 	if (mapping && !mapping_mapped(mapping))
 		set_bit(PG_dcache_dirty, &page->flags);
 	else
-		__flush_dcache_page(PHYSADDR(page_address(page)));
+		__flush_dcache_page(__pa(page_address(page)));
 }
 
 static void __uses_jump_to_uncached sh7705_flush_cache_all(void *args)
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index a2dc7f9ecc51..e9415d3ea94a 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -27,8 +27,11 @@ void (*local_flush_icache_page)(void *args) = cache_noop;
 void (*local_flush_cache_sigtramp)(void *args) = cache_noop;
 
 void (*__flush_wback_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_wback_region);
 void (*__flush_purge_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_purge_region);
 void (*__flush_invalidate_region)(void *start, int size);
+EXPORT_SYMBOL(__flush_invalidate_region);
 
 static inline void noop__flush_region(void *start, int size)
 {
@@ -161,14 +164,21 @@ void flush_cache_all(void)
 {
 	cacheop_on_each_cpu(local_flush_cache_all, NULL, 1);
 }
+EXPORT_SYMBOL(flush_cache_all);
 
 void flush_cache_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
 }
 
 void flush_cache_dup_mm(struct mm_struct *mm)
 {
+	if (boot_cpu_data.dcache.n_aliases == 0)
+		return;
+
 	cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
 }
 
@@ -195,11 +205,13 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 
 	cacheop_on_each_cpu(local_flush_cache_range, (void *)&data, 1);
 }
+EXPORT_SYMBOL(flush_cache_range);
 
 void flush_dcache_page(struct page *page)
 {
 	cacheop_on_each_cpu(local_flush_dcache_page, page, 1);
 }
+EXPORT_SYMBOL(flush_dcache_page);
 
 void flush_icache_range(unsigned long start, unsigned long end)
 {
@@ -265,7 +277,11 @@ static void __init emit_cache_params(void)
 
 void __init cpu_cache_init(void)
 {
-	unsigned int cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
+	unsigned int cache_disabled = 0;
+
+#ifdef CCR
+	cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE);
+#endif
 
 	compute_alias(&boot_cpu_data.icache);
 	compute_alias(&boot_cpu_data.dcache);
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index e098ec158ddb..902967e3f841 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -15,11 +15,15 @@
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/io.h>
+#include <linux/module.h>
 #include <asm/cacheflush.h>
 #include <asm/addrspace.h>
 
 #define PREALLOC_DMA_DEBUG_ENTRIES	4096
 
+struct dma_map_ops *dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
 static int __init dma_init(void)
 {
 	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
@@ -27,15 +31,12 @@ static int __init dma_init(void)
 }
 fs_initcall(dma_init);
 
-void *dma_alloc_coherent(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t gfp)
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret, *ret_nocache;
 	int order = get_order(size);
 
-	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
-		return ret;
-
 	ret = (void *)__get_free_pages(gfp, order);
 	if (!ret)
 		return NULL;
@@ -57,35 +58,26 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 
 	*dma_handle = virt_to_phys(ret);
 
-	debug_dma_alloc_coherent(dev, size, *dma_handle, ret_nocache);
-
 	return ret_nocache;
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *dev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle)
+void dma_generic_free_coherent(struct device *dev, size_t size,
+			       void *vaddr, dma_addr_t dma_handle)
 {
 	int order = get_order(size);
 	unsigned long pfn = dma_handle >> PAGE_SHIFT;
 	int k;
 
-	WARN_ON(irqs_disabled());	/* for portability */
-
-	if (dma_release_from_coherent(dev, order, vaddr))
-		return;
-
-	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
 	for (k = 0; k < (1 << order); k++)
 		__free_pages(pfn_to_page(pfn + k), 0);
+
 	iounmap(vaddr);
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
 void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		    enum dma_data_direction direction)
 {
-#ifdef CONFIG_CPU_SH5
+#if defined(CONFIG_CPU_SH5) || defined(CONFIG_PMB)
 	void *p1addr = vaddr;
 #else
 	void *p1addr = (void*) P1SEGADDR((unsigned long)vaddr);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 8173e38afd38..432acd07e76a 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -15,6 +15,7 @@
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
+#include <linux/dma-mapping.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
@@ -186,11 +187,21 @@ void __init paging_init(void)
 	set_fixmap_nocache(FIX_UNCACHED, __pa(&__uncached_start));
 }
 
+/*
+ * Early initialization for any I/O MMUs we might have.
+ */
+static void __init iommu_init(void)
+{
+	no_iommu_init();
+}
+
 void __init mem_init(void)
 {
 	int codesize, datasize, initsize;
 	int nid;
 
+	iommu_init();
+
 	num_physpages = 0;
 	high_memory = NULL;
 
@@ -323,4 +334,12 @@ int memory_add_physaddr_to_nid(u64 addr)
 }
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
+
 #endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_PMB
+int __in_29bit_mode(void)
+{
+	return !(ctrl_inl(PMB_PASCR) & PASCR_SE);
+}
+#endif /* CONFIG_PMB */
diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c
index 16e01b5fed04..15d74ea42094 100644
--- a/arch/sh/mm/kmap.c
+++ b/arch/sh/mm/kmap.c
@@ -39,7 +39,9 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 	pagefault_disable();
 
 	idx = FIX_CMAP_END -
-		((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT);
+		(((addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1)) +
+		 (FIX_N_COLOURS * smp_processor_id()));
+
 	vaddr = __fix_to_virt(idx);
 
 	BUG_ON(!pte_none(*(kmap_coherent_pte - idx)));
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 9b784fdb947c..6c524446c0f6 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -60,7 +60,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	unsigned long bootmem_paddr;
 
 	/* Don't allow bogus node assignment */
-	BUG_ON(nid > MAX_NUMNODES || nid == 0);
+	BUG_ON(nid > MAX_NUMNODES || nid <= 0);
 
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;
diff --git a/arch/sh/mm/pmb-fixed.c b/arch/sh/mm/pmb-fixed.c
deleted file mode 100644
index 43c8eac4d8a1..000000000000
--- a/arch/sh/mm/pmb-fixed.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * arch/sh/mm/fixed_pmb.c
- *
- * Copyright (C) 2009  Renesas Solutions Corp.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/io.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-
-static int __uses_jump_to_uncached fixed_pmb_init(void)
-{
-	int i;
-	unsigned long addr, data;
-
-	jump_to_uncached();
-
-	for (i = 0; i < PMB_ENTRY_MAX; i++) {
-		addr = PMB_DATA + (i << PMB_E_SHIFT);
-		data = ctrl_inl(addr);
-		if (!(data & PMB_V))
-			continue;
-
-		if (data & PMB_C) {
-#if defined(CONFIG_CACHE_WRITETHROUGH)
-			data |= PMB_WT;
-#elif defined(CONFIG_CACHE_WRITEBACK)
-			data &= ~PMB_WT;
-#else
-			data &= ~(PMB_C | PMB_WT);
-#endif
-		}
-		ctrl_outl(data, addr);
-	}
-
-	back_to_cached();
-
-	return 0;
-}
-arch_initcall(fixed_pmb_init);
diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c
index aade31102112..280f6a166035 100644
--- a/arch/sh/mm/pmb.c
+++ b/arch/sh/mm/pmb.c
@@ -35,29 +35,9 @@
 
 static void __pmb_unmap(struct pmb_entry *);
 
-static struct kmem_cache *pmb_cache;
+static struct pmb_entry pmb_entry_list[NR_PMB_ENTRIES];
 static unsigned long pmb_map;
 
-static struct pmb_entry pmb_init_map[] = {
-	/* vpn         ppn         flags (ub/sz/c/wt) */
-
-	/* P1 Section Mappings */
-	{ 0x80000000, 0x00000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x84000000, 0x04000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x88000000, 0x08000000, PMB_SZ_128M | PMB_C, },
-	{ 0x90000000, 0x10000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x94000000, 0x14000000, PMB_SZ_64M  | PMB_C, },
-	{ 0x98000000, 0x18000000, PMB_SZ_64M  | PMB_C, },
-
-	/* P2 Section Mappings */
-	{ 0xa0000000, 0x00000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa4000000, 0x04000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xa8000000, 0x08000000, PMB_UB | PMB_SZ_128M | PMB_WT, },
-	{ 0xb0000000, 0x10000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb4000000, 0x14000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-	{ 0xb8000000, 0x18000000, PMB_UB | PMB_SZ_64M  | PMB_WT, },
-};
-
 static inline unsigned long mk_pmb_entry(unsigned int entry)
 {
 	return (entry & PMB_E_MASK) << PMB_E_SHIFT;
@@ -73,81 +53,68 @@ static inline unsigned long mk_pmb_data(unsigned int entry)
 	return mk_pmb_entry(entry) | PMB_DATA;
 }
 
-static DEFINE_SPINLOCK(pmb_list_lock);
-static struct pmb_entry *pmb_list;
-
-static inline void pmb_list_add(struct pmb_entry *pmbe)
+static int pmb_alloc_entry(void)
 {
-	struct pmb_entry **p, *tmp;
+	unsigned int pos;
 
-	p = &pmb_list;
-	while ((tmp = *p) != NULL)
-		p = &tmp->next;
+repeat:
+	pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
 
-	pmbe->next = tmp;
-	*p = pmbe;
-}
+	if (unlikely(pos > NR_PMB_ENTRIES))
+		return -ENOSPC;
 
-static inline void pmb_list_del(struct pmb_entry *pmbe)
-{
-	struct pmb_entry **p, *tmp;
+	if (test_and_set_bit(pos, &pmb_map))
+		goto repeat;
 
-	for (p = &pmb_list; (tmp = *p); p = &tmp->next)
-		if (tmp == pmbe) {
-			*p = tmp->next;
-			return;
-		}
+	return pos;
 }
 
-struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
-			    unsigned long flags)
+static struct pmb_entry *pmb_alloc(unsigned long vpn, unsigned long ppn,
+				   unsigned long flags, int entry)
 {
 	struct pmb_entry *pmbe;
+	int pos;
+
+	if (entry == PMB_NO_ENTRY) {
+		pos = pmb_alloc_entry();
+		if (pos < 0)
+			return ERR_PTR(pos);
+	} else {
+		if (test_bit(entry, &pmb_map))
+			return ERR_PTR(-ENOSPC);
+		pos = entry;
+	}
 
-	pmbe = kmem_cache_alloc(pmb_cache, GFP_KERNEL);
+	pmbe = &pmb_entry_list[pos];
 	if (!pmbe)
 		return ERR_PTR(-ENOMEM);
 
 	pmbe->vpn	= vpn;
 	pmbe->ppn	= ppn;
 	pmbe->flags	= flags;
-
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_add(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	pmbe->entry	= pos;
 
 	return pmbe;
 }
 
-void pmb_free(struct pmb_entry *pmbe)
+static void pmb_free(struct pmb_entry *pmbe)
 {
-	spin_lock_irq(&pmb_list_lock);
-	pmb_list_del(pmbe);
-	spin_unlock_irq(&pmb_list_lock);
+	int pos = pmbe->entry;
 
-	kmem_cache_free(pmb_cache, pmbe);
+	pmbe->vpn	= 0;
+	pmbe->ppn	= 0;
+	pmbe->flags	= 0;
+	pmbe->entry	= 0;
+
+	clear_bit(pos, &pmb_map);
 }
 
 /*
  * Must be in P2 for __set_pmb_entry()
  */
-int __set_pmb_entry(unsigned long vpn, unsigned long ppn,
-		    unsigned long flags, int *entry)
+static void __set_pmb_entry(unsigned long vpn, unsigned long ppn,
+			    unsigned long flags, int pos)
 {
-	unsigned int pos = *entry;
-
-	if (unlikely(pos == PMB_NO_ENTRY))
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-
-repeat:
-	if (unlikely(pos > NR_PMB_ENTRIES))
-		return -ENOSPC;
-
-	if (test_and_set_bit(pos, &pmb_map)) {
-		pos = find_first_zero_bit(&pmb_map, NR_PMB_ENTRIES);
-		goto repeat;
-	}
-
 	ctrl_outl(vpn | PMB_V, mk_pmb_addr(pos));
 
 #ifdef CONFIG_CACHE_WRITETHROUGH
@@ -161,35 +128,21 @@ repeat:
 #endif
 
 	ctrl_outl(ppn | flags | PMB_V, mk_pmb_data(pos));
-
-	*entry = pos;
-
-	return 0;
 }
 
-int __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
+static void __uses_jump_to_uncached set_pmb_entry(struct pmb_entry *pmbe)
 {
-	int ret;
-
 	jump_to_uncached();
-	ret = __set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &pmbe->entry);
+	__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, pmbe->entry);
 	back_to_cached();
-
-	return ret;
 }
 
-void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
+static void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
 {
 	unsigned int entry = pmbe->entry;
 	unsigned long addr;
 
-	/*
-	 * Don't allow clearing of wired init entries, P1 or P2 access
-	 * without a corresponding mapping in the PMB will lead to reset
-	 * by the TLB.
-	 */
-	if (unlikely(entry < ARRAY_SIZE(pmb_init_map) ||
-		     entry >= NR_PMB_ENTRIES))
+	if (unlikely(entry >= NR_PMB_ENTRIES))
 		return;
 
 	jump_to_uncached();
@@ -202,8 +155,6 @@ void __uses_jump_to_uncached clear_pmb_entry(struct pmb_entry *pmbe)
 	ctrl_outl(ctrl_inl(addr) & ~PMB_V, addr);
 
 	back_to_cached();
-
-	clear_bit(entry, &pmb_map);
 }
 
 
@@ -239,23 +190,17 @@ long pmb_remap(unsigned long vaddr, unsigned long phys,
 
 again:
 	for (i = 0; i < ARRAY_SIZE(pmb_sizes); i++) {
-		int ret;
-
 		if (size < pmb_sizes[i].size)
 			continue;
 
-		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag);
+		pmbe = pmb_alloc(vaddr, phys, pmb_flags | pmb_sizes[i].flag,
+				 PMB_NO_ENTRY);
 		if (IS_ERR(pmbe)) {
 			err = PTR_ERR(pmbe);
 			goto out;
 		}
 
-		ret = set_pmb_entry(pmbe);
-		if (ret != 0) {
-			pmb_free(pmbe);
-			err = -EBUSY;
-			goto out;
-		}
+		set_pmb_entry(pmbe);
 
 		phys	+= pmb_sizes[i].size;
 		vaddr	+= pmb_sizes[i].size;
@@ -292,11 +237,16 @@ out:
 
 void pmb_unmap(unsigned long addr)
 {
-	struct pmb_entry **p, *pmbe;
+	struct pmb_entry *pmbe = NULL;
+	int i;
 
-	for (p = &pmb_list; (pmbe = *p); p = &pmbe->next)
-		if (pmbe->vpn == addr)
-			break;
+	for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+		if (test_bit(i, &pmb_map)) {
+			pmbe = &pmb_entry_list[i];
+			if (pmbe->vpn == addr)
+				break;
+		}
+	}
 
 	if (unlikely(!pmbe))
 		return;
@@ -306,13 +256,22 @@ void pmb_unmap(unsigned long addr)
 
 static void __pmb_unmap(struct pmb_entry *pmbe)
 {
-	WARN_ON(!test_bit(pmbe->entry, &pmb_map));
+	BUG_ON(!test_bit(pmbe->entry, &pmb_map));
 
 	do {
 		struct pmb_entry *pmblink = pmbe;
 
-		if (pmbe->entry != PMB_NO_ENTRY)
-			clear_pmb_entry(pmbe);
+		/*
+		 * We may be called before this pmb_entry has been
+		 * entered into the PMB table via set_pmb_entry(), but
+		 * that's OK because we've allocated a unique slot for
+		 * this entry in pmb_alloc() (even if we haven't filled
+		 * it yet).
+		 *
+		 * Therefore, calling clear_pmb_entry() is safe as no
+		 * other mapping can be using that slot.
+		 */
+		clear_pmb_entry(pmbe);
 
 		pmbe = pmblink->link;
 
@@ -320,42 +279,34 @@ static void __pmb_unmap(struct pmb_entry *pmbe)
 	} while (pmbe);
 }
 
-static void pmb_cache_ctor(void *pmb)
+#ifdef CONFIG_PMB
+int __uses_jump_to_uncached pmb_init(void)
 {
-	struct pmb_entry *pmbe = pmb;
-
-	memset(pmb, 0, sizeof(struct pmb_entry));
-
-	pmbe->entry = PMB_NO_ENTRY;
-}
-
-static int __uses_jump_to_uncached pmb_init(void)
-{
-	unsigned int nr_entries = ARRAY_SIZE(pmb_init_map);
-	unsigned int entry, i;
-
-	BUG_ON(unlikely(nr_entries >= NR_PMB_ENTRIES));
-
-	pmb_cache = kmem_cache_create("pmb", sizeof(struct pmb_entry), 0,
-				      SLAB_PANIC, pmb_cache_ctor);
+	unsigned int i;
+	long size, ret;
 
 	jump_to_uncached();
 
 	/*
-	 * Ordering is important, P2 must be mapped in the PMB before we
-	 * can set PMB.SE, and P1 must be mapped before we jump back to
-	 * P1 space.
+	 * Insert PMB entries for the P1 and P2 areas so that, after
+	 * we've switched the MMU to 32-bit mode, the semantics of P1
+	 * and P2 are the same as in 29-bit mode, e.g.
+	 *
+	 *	P1 - provides a cached window onto physical memory
+	 *	P2 - provides an uncached window onto physical memory
 	 */
-	for (entry = 0; entry < nr_entries; entry++) {
-		struct pmb_entry *pmbe = pmb_init_map + entry;
+	size = __MEMORY_START + __MEMORY_SIZE;
 
-		__set_pmb_entry(pmbe->vpn, pmbe->ppn, pmbe->flags, &entry);
-	}
+	ret = pmb_remap(P1SEG, 0x00000000, size, PMB_C);
+	BUG_ON(ret != size);
+
+	ret = pmb_remap(P2SEG, 0x00000000, size, PMB_WT | PMB_UB);
+	BUG_ON(ret != size);
 
 	ctrl_outl(0, PMB_IRMCR);
 
 	/* PMB.SE and UB[7] */
-	ctrl_outl((1 << 31) | (1 << 7), PMB_PASCR);
+	ctrl_outl(PASCR_SE | (1 << 7), PMB_PASCR);
 
 	/* Flush out the TLB */
 	i =  ctrl_inl(MMUCR);
@@ -366,7 +317,53 @@ static int __uses_jump_to_uncached pmb_init(void)
 
 	return 0;
 }
-arch_initcall(pmb_init);
+#else
+int __uses_jump_to_uncached pmb_init(void)
+{
+	int i;
+	unsigned long addr, data;
+
+	jump_to_uncached();
+
+	for (i = 0; i < PMB_ENTRY_MAX; i++) {
+		struct pmb_entry *pmbe;
+		unsigned long vpn, ppn, flags;
+
+		addr = PMB_DATA + (i << PMB_E_SHIFT);
+		data = ctrl_inl(addr);
+		if (!(data & PMB_V))
+			continue;
+
+		if (data & PMB_C) {
+#if defined(CONFIG_CACHE_WRITETHROUGH)
+			data |= PMB_WT;
+#elif defined(CONFIG_CACHE_WRITEBACK)
+			data &= ~PMB_WT;
+#else
+			data &= ~(PMB_C | PMB_WT);
+#endif
+		}
+		ctrl_outl(data, addr);
+
+		ppn = data & PMB_PFN_MASK;
+
+		flags = data & (PMB_C | PMB_WT | PMB_UB);
+		flags |= data & PMB_SZ_MASK;
+
+		addr = PMB_ADDR + (i << PMB_E_SHIFT);
+		data = ctrl_inl(addr);
+
+		vpn = data & PMB_PFN_MASK;
+
+		pmbe = pmb_alloc(vpn, ppn, flags, i);
+		WARN_ON(IS_ERR(pmbe));
+	}
+
+	back_to_cached();
+
+	return 0;
+}
+#endif /* CONFIG_PMB */
 
 static int pmb_seq_show(struct seq_file *file, void *iter)
 {
@@ -434,15 +431,18 @@ postcore_initcall(pmb_debugfs_init);
 static int pmb_sysdev_suspend(struct sys_device *dev, pm_message_t state)
 {
 	static pm_message_t prev_state;
+	int i;
 
 	/* Restore the PMB after a resume from hibernation */
 	if (state.event == PM_EVENT_ON &&
 	    prev_state.event == PM_EVENT_FREEZE) {
 		struct pmb_entry *pmbe;
-		spin_lock_irq(&pmb_list_lock);
-		for (pmbe = pmb_list; pmbe; pmbe = pmbe->next)
-			set_pmb_entry(pmbe);
-		spin_unlock_irq(&pmb_list_lock);
+		for (i = 0; i < ARRAY_SIZE(pmb_entry_list); i++) {
+			if (test_bit(i, &pmb_map)) {
+				pmbe = &pmb_entry_list[i];
+				set_pmb_entry(pmbe);
+			}
+		}
 	}
 	prev_state = state;
 	return 0;
diff --git a/arch/sh/oprofile/Makefile b/arch/sh/oprofile/Makefile
index 8e6eec91c14c..4886c5c1786c 100644
--- a/arch/sh/oprofile/Makefile
+++ b/arch/sh/oprofile/Makefile
@@ -7,7 +7,3 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		timer_int.o )
 
 oprofile-y	:= $(DRIVER_OBJS) common.o backtrace.o
-
-oprofile-$(CONFIG_CPU_SUBTYPE_SH7750S)	+= op_model_sh7750.o
-oprofile-$(CONFIG_CPU_SUBTYPE_SH7750)	+= op_model_sh7750.o
-oprofile-$(CONFIG_CPU_SUBTYPE_SH7091)	+= op_model_sh7750.o
diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c
index 44f4e31c6d63..ac604937f3ee 100644
--- a/arch/sh/oprofile/common.c
+++ b/arch/sh/oprofile/common.c
@@ -20,9 +20,6 @@
 #include <asm/processor.h>
 #include "op_impl.h"
 
-extern struct op_sh_model op_model_sh7750_ops __weak;
-extern struct op_sh_model op_model_sh4a_ops __weak;
-
 static struct op_sh_model *model;
 
 static struct op_counter_config ctr[20];
@@ -94,33 +91,14 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	 */
 	ops->backtrace = sh_backtrace;
 
-	switch (current_cpu_data.type) {
-	/* SH-4 types */
-	case CPU_SH7750:
-	case CPU_SH7750S:
-		lmodel = &op_model_sh7750_ops;
-		break;
-
-        /* SH-4A types */
-	case CPU_SH7763:
-	case CPU_SH7770:
-	case CPU_SH7780:
-	case CPU_SH7781:
-	case CPU_SH7785:
-	case CPU_SH7786:
-	case CPU_SH7723:
-	case CPU_SH7724:
-	case CPU_SHX3:
-		lmodel = &op_model_sh4a_ops;
-		break;
-
-	/* SH4AL-DSP types */
-	case CPU_SH7343:
-	case CPU_SH7722:
-	case CPU_SH7366:
-		lmodel = &op_model_sh4a_ops;
-		break;
-	}
+	/*
+	 * XXX
+	 *
+	 * All of the SH7750/SH-4A counters have been converted to perf,
+	 * this infrastructure hook is left for other users until they've
+	 * had a chance to convert over, at which point all of this
+	 * will be deleted.
+	 */
 
 	if (!lmodel)
 		return -ENODEV;
diff --git a/arch/sh/oprofile/op_impl.h b/arch/sh/oprofile/op_impl.h
index 4d509975eba6..1244479ceb29 100644
--- a/arch/sh/oprofile/op_impl.h
+++ b/arch/sh/oprofile/op_impl.h
@@ -6,7 +6,7 @@ struct op_counter_config {
 	unsigned long enabled;
 	unsigned long event;
 
-	unsigned long long count;
+	unsigned long count;
 
 	/* Dummy values for userspace tool compliance */
 	unsigned long kernel;
diff --git a/arch/sh/oprofile/op_model_sh7750.c b/arch/sh/oprofile/op_model_sh7750.c
deleted file mode 100644
index c892c7c30c2f..000000000000
--- a/arch/sh/oprofile/op_model_sh7750.c
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * arch/sh/oprofile/op_model_sh7750.c
- *
- * OProfile support for SH7750/SH7750S Performance Counters
- *
- * Copyright (C) 2003 - 2008  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/kernel.h>
-#include <linux/oprofile.h>
-#include <linux/profile.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/fs.h>
-#include "op_impl.h"
-
-#define PM_CR_BASE	0xff000084	/* 16-bit */
-#define PM_CTR_BASE	0xff100004	/* 32-bit */
-
-#define PMCR(n)		(PM_CR_BASE + ((n) * 0x04))
-#define PMCTRH(n)	(PM_CTR_BASE + 0x00 + ((n) * 0x08))
-#define PMCTRL(n)	(PM_CTR_BASE + 0x04 + ((n) * 0x08))
-
-#define PMCR_PMM_MASK	0x0000003f
-
-#define PMCR_CLKF	0x00000100
-#define PMCR_PMCLR	0x00002000
-#define PMCR_PMST	0x00004000
-#define PMCR_PMEN	0x00008000
-
-struct op_sh_model op_model_sh7750_ops;
-
-#define NR_CNTRS	2
-
-static struct sh7750_ppc_register_config {
-	unsigned int ctrl;
-	unsigned long cnt_hi;
-	unsigned long cnt_lo;
-} regcache[NR_CNTRS];
-
-/*
- * There are a number of events supported by each counter (33 in total).
- * Since we have 2 counters, each counter will take the event code as it
- * corresponds to the PMCR PMM setting. Each counter can be configured
- * independently.
- *
- *	Event Code	Description
- *	----------	-----------
- *
- *	0x01		Operand read access
- *	0x02		Operand write access
- *	0x03		UTLB miss
- *	0x04		Operand cache read miss
- *	0x05		Operand cache write miss
- *	0x06		Instruction fetch (w/ cache)
- *	0x07		Instruction TLB miss
- *	0x08		Instruction cache miss
- *	0x09		All operand accesses
- *	0x0a		All instruction accesses
- *	0x0b		OC RAM operand access
- *	0x0d		On-chip I/O space access
- *	0x0e		Operand access (r/w)
- *	0x0f		Operand cache miss (r/w)
- *	0x10		Branch instruction
- *	0x11		Branch taken
- *	0x12		BSR/BSRF/JSR
- *	0x13		Instruction execution
- *	0x14		Instruction execution in parallel
- *	0x15		FPU Instruction execution
- *	0x16		Interrupt
- *	0x17		NMI
- *	0x18		trapa instruction execution
- *	0x19		UBCA match
- *	0x1a		UBCB match
- *	0x21		Instruction cache fill
- *	0x22		Operand cache fill
- *	0x23		Elapsed time
- *	0x24		Pipeline freeze by I-cache miss
- *	0x25		Pipeline freeze by D-cache miss
- *	0x27		Pipeline freeze by branch instruction
- *	0x28		Pipeline freeze by CPU register
- *	0x29		Pipeline freeze by FPU
- *
- * Unfortunately we don't have a native exception or interrupt for counter
- * overflow (although since these counters can run for 16.3 days without
- * overflowing, it's not really necessary).
- *
- * OProfile on the other hand likes to have samples taken periodically, so
- * for now we just piggyback the timer interrupt to get the expected
- * behavior.
- */
-
-static int sh7750_timer_notify(struct pt_regs *regs)
-{
-	oprofile_add_sample(regs, 0);
-	return 0;
-}
-
-static u64 sh7750_read_counter(int counter)
-{
-	return (u64)((u64)(__raw_readl(PMCTRH(counter)) & 0xffff) << 32) |
-			   __raw_readl(PMCTRL(counter));
-}
-
-/*
- * Files will be in a path like:
- *
- *  /<oprofilefs mount point>/<counter number>/<file>
- *
- * So when dealing with <file>, we look to the parent dentry for the counter
- * number.
- */
-static inline int to_counter(struct file *file)
-{
-	const unsigned char *name = file->f_path.dentry->d_parent->d_name.name;
-
-	return (int)simple_strtol(name, NULL, 10);
-}
-
-/*
- * XXX: We have 48-bit counters, so we're probably going to want something
- * more along the lines of oprofilefs_ullong_to_user().. Truncating to
- * unsigned long works fine for now though, as long as we don't attempt to
- * profile for too horribly long.
- */
-static ssize_t sh7750_read_count(struct file *file, char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	int counter = to_counter(file);
-	u64 val = sh7750_read_counter(counter);
-
-	return oprofilefs_ulong_to_user((unsigned long)val, buf, count, ppos);
-}
-
-static ssize_t sh7750_write_count(struct file *file, const char __user *buf,
-				  size_t count, loff_t *ppos)
-{
-	int counter = to_counter(file);
-	unsigned long val;
-
-	if (oprofilefs_ulong_from_user(&val, buf, count))
-		return -EFAULT;
-
-	/*
-	 * Any write will clear the counter, although only 0 should be
-	 * written for this purpose, as we do not support setting the
-	 * counter to an arbitrary value.
-	 */
-	WARN_ON(val != 0);
-
-	__raw_writew(__raw_readw(PMCR(counter)) | PMCR_PMCLR, PMCR(counter));
-
-	return count;
-}
-
-static const struct file_operations count_fops = {
-	.read		= sh7750_read_count,
-	.write		= sh7750_write_count,
-};
-
-static int sh7750_ppc_create_files(struct super_block *sb, struct dentry *dir)
-{
-	return oprofilefs_create_file(sb, dir, "count", &count_fops);
-}
-
-static void sh7750_ppc_reg_setup(struct op_counter_config *ctr)
-{
-	unsigned int counters = op_model_sh7750_ops.num_counters;
-	int i;
-
-	for (i = 0; i < counters; i++) {
-		regcache[i].ctrl	= 0;
-		regcache[i].cnt_hi	= 0;
-		regcache[i].cnt_lo	= 0;
-
-		if (!ctr[i].enabled)
-			continue;
-
-		regcache[i].ctrl |= ctr[i].event | PMCR_PMEN | PMCR_PMST;
-		regcache[i].cnt_hi = (unsigned long)((ctr->count >> 32) & 0xffff);
-		regcache[i].cnt_lo = (unsigned long)(ctr->count & 0xffffffff);
-	}
-}
-
-static void sh7750_ppc_cpu_setup(void *args)
-{
-	unsigned int counters = op_model_sh7750_ops.num_counters;
-	int i;
-
-	for (i = 0; i < counters; i++) {
-		__raw_writew(0, PMCR(i));
-		__raw_writel(regcache[i].cnt_hi, PMCTRH(i));
-		__raw_writel(regcache[i].cnt_lo, PMCTRL(i));
-	}
-}
-
-static void sh7750_ppc_cpu_start(void *args)
-{
-	unsigned int counters = op_model_sh7750_ops.num_counters;
-	int i;
-
-	for (i = 0; i < counters; i++)
-		__raw_writew(regcache[i].ctrl, PMCR(i));
-}
-
-static void sh7750_ppc_cpu_stop(void *args)
-{
-	unsigned int counters = op_model_sh7750_ops.num_counters;
-	int i;
-
-	/* Disable the counters */
-	for (i = 0; i < counters; i++)
-		__raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i));
-}
-
-static inline void sh7750_ppc_reset(void)
-{
-	unsigned int counters = op_model_sh7750_ops.num_counters;
-	int i;
-
-	/* Clear the counters */
-	for (i = 0; i < counters; i++)
-		__raw_writew(__raw_readw(PMCR(i)) | PMCR_PMCLR, PMCR(i));
-}
-
-static int sh7750_ppc_init(void)
-{
-	sh7750_ppc_reset();
-
-	return register_timer_hook(sh7750_timer_notify);
-}
-
-static void sh7750_ppc_exit(void)
-{
-	unregister_timer_hook(sh7750_timer_notify);
-
-	sh7750_ppc_reset();
-}
-
-struct op_sh_model op_model_sh7750_ops = {
-	.cpu_type	= "sh/sh7750",
-	.num_counters	= NR_CNTRS,
-	.reg_setup	= sh7750_ppc_reg_setup,
-	.cpu_setup	= sh7750_ppc_cpu_setup,
-	.cpu_start	= sh7750_ppc_cpu_start,
-	.cpu_stop	= sh7750_ppc_cpu_stop,
-	.init		= sh7750_ppc_init,
-	.exit		= sh7750_ppc_exit,
-	.create_files	= sh7750_ppc_create_files,
-};
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index a762283d2a21..e789e6c9a422 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -214,7 +214,7 @@ static void gdrom_spicommand(void *spi_string, int buflen)
 		gdrom_getsense(NULL);
 		return;
 	}
-	outsw(PHYSADDR(GDROM_DATA_REG), cmd, 6);
+	outsw(GDROM_DATA_REG, cmd, 6);
 }
 
 
@@ -298,7 +298,7 @@ static int gdrom_readtoc_cmd(struct gdromtoc *toc, int session)
 		err = -EINVAL;
 		goto cleanup_readtoc;
 	}
-	insw(PHYSADDR(GDROM_DATA_REG), toc, tocsize/2);
+	insw(GDROM_DATA_REG, toc, tocsize/2);
 	if (gd.status & 0x01)
 		err = -EINVAL;
 
@@ -449,7 +449,7 @@ static int gdrom_getsense(short *bufstring)
 		GDROM_DEFAULT_TIMEOUT);
 	if (gd.pending)
 		goto cleanup_sense;
-	insw(PHYSADDR(GDROM_DATA_REG), &sense, sense_command->buflen/2);
+	insw(GDROM_DATA_REG, &sense, sense_command->buflen/2);
 	if (sense[1] & 40) {
 		printk(KERN_INFO "GDROM: Drive not ready - command aborted\n");
 		goto cleanup_sense;
@@ -586,7 +586,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 		spin_unlock(&gdrom_lock);
 		block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET;
 		block_cnt = blk_rq_sectors(req)/GD_TO_BLK;
-		ctrl_outl(PHYSADDR(req->buffer), GDROM_DMA_STARTADDR_REG);
+		ctrl_outl(virt_to_phys(req->buffer), GDROM_DMA_STARTADDR_REG);
 		ctrl_outl(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG);
 		ctrl_outl(1, GDROM_DMA_DIRECTION_REG);
 		ctrl_outl(1, GDROM_DMA_ENABLE_REG);
@@ -615,7 +615,7 @@ static void gdrom_readdisk_dma(struct work_struct *work)
 			cpu_relax();
 		gd.pending = 1;
 		gd.transfer = 1;
-		outsw(PHYSADDR(GDROM_DATA_REG), &read_command->cmd, 6);
+		outsw(GDROM_DATA_REG, &read_command->cmd, 6);
 		timeout = jiffies + HZ / 2;
 		/* Wait for any pending DMA to finish */
 		while (ctrl_inb(GDROM_DMA_STATUS_REG) &&
diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c
index 887af79b7bff..076111fc72d2 100644
--- a/drivers/input/keyboard/sh_keysc.c
+++ b/drivers/input/keyboard/sh_keysc.c
@@ -18,9 +18,9 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/input.h>
+#include <linux/input/sh_keysc.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <asm/sh_keysc.h>
 
 #define KYCR1_OFFS   0x00
 #define KYCR2_OFFS   0x04
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 08f2d07bf56a..a296e717e86e 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -35,6 +35,14 @@ config MFD_ASIC3
 	  This driver supports the ASIC3 multifunction chip found on many
 	  PDAs (mainly iPAQ and HTC based ones)
 
+config MFD_SH_MOBILE_SDHI
+	bool "Support for SuperH Mobile SDHI"
+	depends on SUPERH
+	select MFD_CORE
+	 ---help---
+	  This driver supports the SDHI hardware block found in many
+	  SuperH Mobile SoCs.
+
 config MFD_DM355EVM_MSP
 	bool "DaVinci DM355 EVM microcontroller"
 	depends on I2C && MACH_DAVINCI_DM355_EVM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index af0fc903cec8..11350c1d9301 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o
+obj-$(CONFIG_MFD_SH_MOBILE_SDHI)		+= sh_mobile_sdhi.o
 
 obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
diff --git a/drivers/mfd/sh_mobile_sdhi.c b/drivers/mfd/sh_mobile_sdhi.c
new file mode 100644
index 000000000000..03efae8041ab
--- /dev/null
+++ b/drivers/mfd/sh_mobile_sdhi.c
@@ -0,0 +1,156 @@
+/*
+ * SuperH Mobile SDHI
+ *
+ * Copyright (C) 2009 Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on "Compaq ASIC3 support":
+ *
+ * Copyright 2001 Compaq Computer Corporation.
+ * Copyright 2004-2005 Phil Blundell
+ * Copyright 2007-2008 OpenedHand Ltd.
+ *
+ * Authors: Phil Blundell <pb@handhelds.org>,
+ *	    Samuel Ortiz <sameo@openedhand.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+
+#include <linux/mfd/core.h>
+#include <linux/mfd/tmio.h>
+#include <linux/mfd/sh_mobile_sdhi.h>
+
+struct sh_mobile_sdhi {
+	struct clk *clk;
+	struct tmio_mmc_data mmc_data;
+	struct mfd_cell cell_mmc;
+};
+
+static struct resource sh_mobile_sdhi_resources[] = {
+	{
+		.start = 0x000,
+		.end   = 0x1ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = 0,
+		.end   = 0,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell sh_mobile_sdhi_cell = {
+	.name          = "tmio-mmc",
+	.num_resources = ARRAY_SIZE(sh_mobile_sdhi_resources),
+	.resources     = sh_mobile_sdhi_resources,
+};
+
+static void sh_mobile_sdhi_set_pwr(struct platform_device *tmio, int state)
+{
+	struct platform_device *pdev = to_platform_device(tmio->dev.parent);
+	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
+
+	if (p && p->set_pwr)
+		p->set_pwr(pdev, state);
+}
+
+static int __init sh_mobile_sdhi_probe(struct platform_device *pdev)
+{
+	struct sh_mobile_sdhi *priv;
+	struct resource *mem;
+	char clk_name[8];
+	int ret, irq;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem)
+		dev_err(&pdev->dev, "missing MEM resource\n");
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		dev_err(&pdev->dev, "missing IRQ resource\n");
+
+	if (!mem || (irq < 0))
+		return -EINVAL;
+
+	priv = kzalloc(sizeof(struct sh_mobile_sdhi), GFP_KERNEL);
+	if (priv == NULL) {
+		dev_err(&pdev->dev, "kzalloc failed\n");
+		return -ENOMEM;
+	}
+
+	snprintf(clk_name, sizeof(clk_name), "sdhi%d", pdev->id);
+	priv->clk = clk_get(&pdev->dev, clk_name);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name);
+		ret = PTR_ERR(priv->clk);
+		kfree(priv);
+		return ret;
+	}
+
+	clk_enable(priv->clk);
+
+	/* FIXME: silly const unsigned int hclk */
+	*(unsigned int *)&priv->mmc_data.hclk = clk_get_rate(priv->clk);
+	priv->mmc_data.set_pwr = sh_mobile_sdhi_set_pwr;
+
+	memcpy(&priv->cell_mmc, &sh_mobile_sdhi_cell, sizeof(priv->cell_mmc));
+	priv->cell_mmc.driver_data = &priv->mmc_data;
+	priv->cell_mmc.platform_data = &priv->cell_mmc;
+	priv->cell_mmc.data_size = sizeof(priv->cell_mmc);
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = mfd_add_devices(&pdev->dev, pdev->id,
+			      &priv->cell_mmc, 1, mem, irq);
+	if (ret) {
+		clk_disable(priv->clk);
+		clk_put(priv->clk);
+		kfree(priv);
+	}
+
+	return ret;
+}
+
+static int sh_mobile_sdhi_remove(struct platform_device *pdev)
+{
+	struct sh_mobile_sdhi *priv = platform_get_drvdata(pdev);
+
+	mfd_remove_devices(&pdev->dev);
+	clk_disable(priv->clk);
+	clk_put(priv->clk);
+	kfree(priv);
+
+	return 0;
+}
+
+static struct platform_driver sh_mobile_sdhi_driver = {
+	.driver		= {
+		.name	= "sh_mobile_sdhi",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sh_mobile_sdhi_probe,
+	.remove		= __devexit_p(sh_mobile_sdhi_remove),
+};
+
+static int __init sh_mobile_sdhi_init(void)
+{
+	return platform_driver_register(&sh_mobile_sdhi_driver);
+}
+
+static void __exit sh_mobile_sdhi_exit(void)
+{
+	platform_driver_unregister(&sh_mobile_sdhi_driver);
+}
+
+module_init(sh_mobile_sdhi_init);
+module_exit(sh_mobile_sdhi_exit);
+
+MODULE_DESCRIPTION("SuperH Mobile SDHI driver");
+MODULE_AUTHOR("Magnus Damm");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 432ae8358c86..e04b751680d0 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -329,7 +329,7 @@ config MMC_SDRICOH_CS
 
 config MMC_TMIO
 	tristate "Toshiba Mobile IO Controller (TMIO) MMC/SD function support"
-	depends on MFD_TMIO || MFD_ASIC3
+	depends on MFD_TMIO || MFD_ASIC3 || SUPERH
 	help
 	  This provides support for the SD/MMC cell found in TC6393XB,
 	  T7L66XB and also HTC ASIC3
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index d490628b64da..1e73c8f42e38 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -201,7 +201,7 @@ static struct platform_driver ds1302_platform_driver = {
 		.name	= DRV_NAME,
 		.owner	= THIS_MODULE,
 	},
-	.remove		= __exit_p(ds1302_rtc_remove),
+	.remove		= __devexit_p(ds1302_rtc_remove),
 };
 
 static int __init ds1302_rtc_init(void)
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 50943ff78f4b..9ff47db0b2ce 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -996,7 +996,7 @@ config SERIAL_IP22_ZILOG_CONSOLE
 
 config SERIAL_SH_SCI
 	tristate "SuperH SCI(F) serial port support"
-	depends on SUPERH || H8300
+	depends on HAVE_CLK && (SUPERH || H8300)
 	select SERIAL_CORE
 
 config SERIAL_SH_SCI_NR_UARTS
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 6498bd1fb6dd..ff38dbdb5c6e 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -50,7 +50,6 @@
 #include <linux/list.h>
 
 #ifdef CONFIG_SUPERH
-#include <asm/clock.h>
 #include <asm/sh_bios.h>
 #endif
 
@@ -79,22 +78,18 @@ struct sci_port {
 	struct timer_list	break_timer;
 	int			break_flag;
 
-#ifdef CONFIG_HAVE_CLK
 	/* Interface clock */
 	struct clk		*iclk;
 	/* Data clock */
 	struct clk		*dclk;
-#endif
+
 	struct list_head	node;
 };
 
 struct sh_sci_priv {
 	spinlock_t lock;
 	struct list_head ports;
-
-#ifdef CONFIG_HAVE_CLK
 	struct notifier_block clk_nb;
-#endif
 };
 
 /* Function prototypes */
@@ -156,32 +151,6 @@ static void sci_poll_put_char(struct uart_port *port, unsigned char c)
 }
 #endif /* CONFIG_CONSOLE_POLL || CONFIG_SERIAL_SH_SCI_CONSOLE */
 
-#if defined(__H8300S__)
-enum { sci_disable, sci_enable };
-
-static void h8300_sci_config(struct uart_port *port, unsigned int ctrl)
-{
-	volatile unsigned char *mstpcrl = (volatile unsigned char *)MSTPCRL;
-	int ch = (port->mapbase  - SMR0) >> 3;
-	unsigned char mask = 1 << (ch+1);
-
-	if (ctrl == sci_disable)
-		*mstpcrl |= mask;
-	else
-		*mstpcrl &= ~mask;
-}
-
-static void h8300_sci_enable(struct uart_port *port)
-{
-	h8300_sci_config(port, sci_enable);
-}
-
-static void h8300_sci_disable(struct uart_port *port)
-{
-	h8300_sci_config(port, sci_disable);
-}
-#endif
-
 #if defined(__H8300H__) || defined(__H8300S__)
 static void sci_init_pins(struct uart_port *port, unsigned int cflag)
 {
@@ -733,7 +702,6 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	return ret;
 }
 
-#ifdef CONFIG_HAVE_CLK
 /*
  * Here we define a transistion notifier so that we can update all of our
  * ports' baud rate when the peripheral clock changes.
@@ -751,7 +719,6 @@ static int sci_notifier(struct notifier_block *self,
 		spin_lock_irqsave(&priv->lock, flags);
 		list_for_each_entry(sci_port, &priv->ports, node)
 			sci_port->port.uartclk = clk_get_rate(sci_port->dclk);
-
 		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
@@ -778,7 +745,6 @@ static void sci_clk_disable(struct uart_port *port)
 
 	clk_disable(sci_port->dclk);
 }
-#endif
 
 static int sci_request_irq(struct sci_port *port)
 {
@@ -833,8 +799,8 @@ static void sci_free_irq(struct sci_port *port)
 
 static unsigned int sci_tx_empty(struct uart_port *port)
 {
-	/* Can't detect */
-	return TIOCSER_TEMT;
+	unsigned short status = sci_in(port, SCxSR);
+	return status & SCxSR_TEND(port) ? TIOCSER_TEMT : 0;
 }
 
 static void sci_set_mctrl(struct uart_port *port, unsigned int mctrl)
@@ -1077,21 +1043,10 @@ static void __devinit sci_init_single(struct platform_device *dev,
 	sci_port->port.iotype	= UPIO_MEM;
 	sci_port->port.line	= index;
 	sci_port->port.fifosize	= 1;
-
-#if defined(__H8300H__) || defined(__H8300S__)
-#ifdef __H8300S__
-	sci_port->enable	= h8300_sci_enable;
-	sci_port->disable	= h8300_sci_disable;
-#endif
-	sci_port->port.uartclk	= CONFIG_CPU_CLOCK;
-#elif defined(CONFIG_HAVE_CLK)
 	sci_port->iclk		= p->clk ? clk_get(&dev->dev, p->clk) : NULL;
 	sci_port->dclk		= clk_get(&dev->dev, "peripheral_clk");
 	sci_port->enable	= sci_clk_enable;
 	sci_port->disable	= sci_clk_disable;
-#else
-#error "Need a valid uartclk"
-#endif
 
 	sci_port->break_timer.data = (unsigned long)sci_port;
 	sci_port->break_timer.function = sci_break_timer;
@@ -1106,7 +1061,6 @@ static void __devinit sci_init_single(struct platform_device *dev,
 	sci_port->type		= sci_port->port.type = p->type;
 
 	memcpy(&sci_port->irqs, &p->irqs, sizeof(p->irqs));
-
 }
 
 #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE
@@ -1239,14 +1193,11 @@ static int sci_remove(struct platform_device *dev)
 	struct sci_port *p;
 	unsigned long flags;
 
-#ifdef CONFIG_HAVE_CLK
 	cpufreq_unregister_notifier(&priv->clk_nb, CPUFREQ_TRANSITION_NOTIFIER);
-#endif
 
 	spin_lock_irqsave(&priv->lock, flags);
 	list_for_each_entry(p, &priv->ports, node)
 		uart_remove_one_port(&sci_uart_driver, &p->port);
-
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	kfree(priv);
@@ -1307,10 +1258,8 @@ static int __devinit sci_probe(struct platform_device *dev)
 	spin_lock_init(&priv->lock);
 	platform_set_drvdata(dev, priv);
 
-#ifdef CONFIG_HAVE_CLK
 	priv->clk_nb.notifier_call = sci_notifier;
 	cpufreq_register_notifier(&priv->clk_nb, CPUFREQ_TRANSITION_NOTIFIER);
-#endif
 
 	if (dev->id != -1) {
 		ret = sci_probe_single(dev, dev->id, p, &sci_ports[dev->id]);
@@ -1370,7 +1319,7 @@ static struct dev_pm_ops sci_dev_pm_ops = {
 
 static struct platform_driver sci_driver = {
 	.probe		= sci_probe,
-	.remove		= __devexit_p(sci_remove),
+	.remove		= sci_remove,
 	.driver		= {
 		.name	= "sh-sci",
 		.owner	= THIS_MODULE,
diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h
index 3e2fcf93b42e..a32094eeb42b 100644
--- a/drivers/serial/sh-sci.h
+++ b/drivers/serial/sh-sci.h
@@ -1,5 +1,5 @@
 #include <linux/serial_core.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <linux/gpio.h>
 
 #if defined(CONFIG_H83007) || defined(CONFIG_H83068)
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index 6a025cefe6dc..4956bf1f2134 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -3,4 +3,5 @@
 #
 obj-$(CONFIG_SUPERHYWAY)	+= superhyway/
 obj-$(CONFIG_MAPLE)		+= maple/
+obj-$(CONFIG_GENERIC_GPIO)	+= pfc.o
 obj-y				+= intc.o
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 559b5fe9dc0f..a7e5c2e9986c 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -2,6 +2,7 @@
  * Shared interrupt handling code for IPR and INTC2 types of IRQs.
  *
  * Copyright (C) 2007, 2008 Magnus Damm
+ * Copyright (C) 2009 Paul Mundt
  *
  * Based on intc2.c and ipr.c
  *
@@ -24,6 +25,7 @@
 #include <linux/sysdev.h>
 #include <linux/list.h>
 #include <linux/topology.h>
+#include <linux/bitmap.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
 	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
@@ -59,6 +61,20 @@ struct intc_desc_int {
 
 static LIST_HEAD(intc_list);
 
+/*
+ * The intc_irq_map provides a global map of bound IRQ vectors for a
+ * given platform. Allocation of IRQs are either static through the CPU
+ * vector map, or dynamic in the case of board mux vectors or MSI.
+ *
+ * As this is a central point for all IRQ controllers on the system,
+ * each of the available sources are mapped out here. This combined with
+ * sparseirq makes it quite trivial to keep the vector map tightly packed
+ * when dynamically creating IRQs, as well as tying in to otherwise
+ * unused irq_desc positions in the sparse array.
+ */
+static DECLARE_BITMAP(intc_irq_map, NR_IRQS);
+static DEFINE_SPINLOCK(vector_lock);
+
 #ifdef CONFIG_SMP
 #define IS_SMP(x) x.smp
 #define INTC_REG(d, x, c) (d->reg[(x)] + ((d->smp[(x)] & 0xff) * c))
@@ -70,9 +86,7 @@ static LIST_HEAD(intc_list);
 #endif
 
 static unsigned int intc_prio_level[NR_IRQS]; /* for now */
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 static unsigned long ack_handle[NR_IRQS];
-#endif
 
 static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
 {
@@ -250,7 +264,6 @@ static int intc_set_wake(unsigned int irq, unsigned int on)
 	return 0; /* allow wakeup, but setup hardware in intc_suspend() */
 }
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 static void intc_mask_ack(unsigned int irq)
 {
 	struct intc_desc_int *d = get_intc_desc(irq);
@@ -282,7 +295,6 @@ static void intc_mask_ack(unsigned int irq)
 		}
 	}
 }
-#endif
 
 static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp,
 					     unsigned int nr_hp,
@@ -501,7 +513,6 @@ static unsigned int __init intc_prio_data(struct intc_desc *desc,
 	return 0;
 }
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 static unsigned int __init intc_ack_data(struct intc_desc *desc,
 					  struct intc_desc_int *d,
 					  intc_enum enum_id)
@@ -533,7 +544,6 @@ static unsigned int __init intc_ack_data(struct intc_desc *desc,
 
 	return 0;
 }
-#endif
 
 static unsigned int __init intc_sense_data(struct intc_desc *desc,
 					   struct intc_desc_int *d,
@@ -572,6 +582,11 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	struct intc_handle_int *hp;
 	unsigned int data[2], primary;
 
+	/*
+	 * Register the IRQ position with the global IRQ map
+	 */
+	set_bit(irq, intc_irq_map);
+
 	/* Prefer single interrupt source bitmap over other combinations:
 	 * 1. bitmap, single interrupt source
 	 * 2. priority, single interrupt source
@@ -641,10 +656,8 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	/* irq should be disabled by default */
 	d->chip.mask(irq);
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	if (desc->ack_regs)
 		ack_handle[irq] = intc_ack_data(desc, d, enum_id);
-#endif
 }
 
 static unsigned int __init save_reg(struct intc_desc_int *d,
@@ -681,10 +694,8 @@ void __init register_intc_controller(struct intc_desc *desc)
 	d->nr_reg = desc->mask_regs ? desc->nr_mask_regs * 2 : 0;
 	d->nr_reg += desc->prio_regs ? desc->nr_prio_regs * 2 : 0;
 	d->nr_reg += desc->sense_regs ? desc->nr_sense_regs : 0;
-
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	d->nr_reg += desc->ack_regs ? desc->nr_ack_regs : 0;
-#endif
+
 	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
 #ifdef CONFIG_SMP
 	d->smp = kzalloc(d->nr_reg * sizeof(*d->smp), GFP_NOWAIT);
@@ -727,14 +738,12 @@ void __init register_intc_controller(struct intc_desc *desc)
 	d->chip.set_type = intc_set_sense;
 	d->chip.set_wake = intc_set_wake;
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	if (desc->ack_regs) {
 		for (i = 0; i < desc->nr_ack_regs; i++)
 			k += save_reg(d, k, desc->ack_regs[i].set_reg, 0);
 
 		d->chip.mask_ack = intc_mask_ack;
 	}
-#endif
 
 	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
 
@@ -856,5 +865,91 @@ static int __init register_intc_sysdevs(void)
 
 	return error;
 }
-
 device_initcall(register_intc_sysdevs);
+
+/*
+ * Dynamic IRQ allocation and deallocation
+ */
+static unsigned int create_irq_on_node(unsigned int irq_want, int node)
+{
+	unsigned int irq = 0, new;
+	unsigned long flags;
+	struct irq_desc *desc;
+
+	spin_lock_irqsave(&vector_lock, flags);
+
+	/*
+	 * First try the wanted IRQ, then scan.
+	 */
+	if (test_and_set_bit(irq_want, intc_irq_map)) {
+		new = find_first_zero_bit(intc_irq_map, nr_irqs);
+		if (unlikely(new == nr_irqs))
+			goto out_unlock;
+
+		desc = irq_to_desc_alloc_node(new, node);
+		if (unlikely(!desc)) {
+			pr_info("can't get irq_desc for %d\n", new);
+			goto out_unlock;
+		}
+
+		desc = move_irq_desc(desc, node);
+		__set_bit(new, intc_irq_map);
+		irq = new;
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (irq > 0)
+		dynamic_irq_init(irq);
+
+	return irq;
+}
+
+int create_irq(void)
+{
+	int nid = cpu_to_node(smp_processor_id());
+	int irq;
+
+	irq = create_irq_on_node(NR_IRQS_LEGACY, nid);
+	if (irq == 0)
+		irq = -1;
+
+	return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+	unsigned long flags;
+
+	dynamic_irq_cleanup(irq);
+
+	spin_lock_irqsave(&vector_lock, flags);
+	__clear_bit(irq, intc_irq_map);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int reserve_irq_vector(unsigned int irq)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	if (test_and_set_bit(irq, intc_irq_map))
+		ret = -EBUSY;
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	return ret;
+}
+
+void reserve_irq_legacy(void)
+{
+	unsigned long flags;
+	int i, j;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	j = find_first_bit(intc_irq_map, nr_irqs);
+	for (i = 0; i < j; i++)
+		__set_bit(i, intc_irq_map);
+	spin_unlock_irqrestore(&vector_lock, flags);
+}
diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c
index 93c20e135ee1..4e8f57d4131f 100644
--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -106,7 +106,7 @@ static void maple_dma_reset(void)
 	* max delay is 11
 	*/
 	ctrl_outl(MAPLE_2MBPS | MAPLE_TIMEOUT(0xFFFF), MAPLE_SPEED);
-	ctrl_outl(PHYSADDR(maple_sendbuf), MAPLE_DMAADDR);
+	ctrl_outl(virt_to_phys(maple_sendbuf), MAPLE_DMAADDR);
 	ctrl_outl(1, MAPLE_ENABLE);
 }
 
@@ -258,7 +258,7 @@ static void maple_build_block(struct mapleq *mq)
 	maple_lastptr = maple_sendptr;
 
 	*maple_sendptr++ = (port << 16) | len | 0x80000000;
-	*maple_sendptr++ = PHYSADDR(mq->recvbuf->buf);
+	*maple_sendptr++ = virt_to_phys(mq->recvbuf->buf);
 	*maple_sendptr++ =
 	    mq->command | (to << 8) | (from << 16) | (len << 24);
 	while (len-- > 0)
diff --git a/arch/sh/kernel/gpio.c b/drivers/sh/pfc.c
index d22e5af699f9..841ed5030c8f 100644
--- a/arch/sh/kernel/gpio.c
+++ b/drivers/sh/pfc.c
@@ -7,7 +7,6 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -35,11 +34,11 @@ static unsigned long gpio_read_raw_reg(unsigned long reg,
 {
 	switch (reg_width) {
 	case 8:
-		return ctrl_inb(reg);
+		return __raw_readb(reg);
 	case 16:
-		return ctrl_inw(reg);
+		return __raw_readw(reg);
 	case 32:
-		return ctrl_inl(reg);
+		return __raw_readl(reg);
 	}
 
 	BUG();
@@ -52,13 +51,13 @@ static void gpio_write_raw_reg(unsigned long reg,
 {
 	switch (reg_width) {
 	case 8:
-		ctrl_outb(data, reg);
+		__raw_writeb(data, reg);
 		return;
 	case 16:
-		ctrl_outw(data, reg);
+		__raw_writew(data, reg);
 		return;
 	case 32:
-		ctrl_outl(data, reg);
+		__raw_writel(data, reg);
 		return;
 	}
 
@@ -72,11 +71,9 @@ static void gpio_write_bit(struct pinmux_data_reg *dr,
 
 	pos = dr->reg_width - (in_pos + 1);
 
-#ifdef DEBUG
-	pr_info("write_bit addr = %lx, value = %ld, pos = %ld, "
-		"r_width = %ld\n",
-		dr->reg, !!value, pos, dr->reg_width);
-#endif
+	pr_debug("write_bit addr = %lx, value = %ld, pos = %ld, "
+		 "r_width = %ld\n",
+		 dr->reg, !!value, pos, dr->reg_width);
 
 	if (value)
 		set_bit(pos, &dr->reg_shadow);
@@ -95,11 +92,9 @@ static int gpio_read_reg(unsigned long reg, unsigned long reg_width,
 	mask = (1 << field_width) - 1;
 	pos = reg_width - ((in_pos + 1) * field_width);
 
-#ifdef DEBUG
-	pr_info("read_reg: addr = %lx, pos = %ld, "
-		"r_width = %ld, f_width = %ld\n",
-		reg, pos, reg_width, field_width);
-#endif
+	pr_debug("read_reg: addr = %lx, pos = %ld, "
+		 "r_width = %ld, f_width = %ld\n",
+		 reg, pos, reg_width, field_width);
 
 	data = gpio_read_raw_reg(reg, reg_width);
 	return (data >> pos) & mask;
@@ -114,24 +109,22 @@ static void gpio_write_reg(unsigned long reg, unsigned long reg_width,
 	mask = (1 << field_width) - 1;
 	pos = reg_width - ((in_pos + 1) * field_width);
 
-#ifdef DEBUG
-	pr_info("write_reg addr = %lx, value = %ld, pos = %ld, "
-		"r_width = %ld, f_width = %ld\n",
-		reg, value, pos, reg_width, field_width);
-#endif
+	pr_debug("write_reg addr = %lx, value = %ld, pos = %ld, "
+		 "r_width = %ld, f_width = %ld\n",
+		 reg, value, pos, reg_width, field_width);
 
 	mask = ~(mask << pos);
 	value = value << pos;
 
 	switch (reg_width) {
 	case 8:
-		ctrl_outb((ctrl_inb(reg) & mask) | value, reg);
+		__raw_writeb((__raw_readb(reg) & mask) | value, reg);
 		break;
 	case 16:
-		ctrl_outw((ctrl_inw(reg) & mask) | value, reg);
+		__raw_writew((__raw_readw(reg) & mask) | value, reg);
 		break;
 	case 32:
-		ctrl_outl((ctrl_inl(reg) & mask) | value, reg);
+		__raw_writel((__raw_readl(reg) & mask) | value, reg);
 		break;
 	}
 }
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 3ad5157f9899..b4b5de930cf5 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -281,18 +281,34 @@ static void sh_mobile_lcdc_deferred_io(struct fb_info *info,
 				       struct list_head *pagelist)
 {
 	struct sh_mobile_lcdc_chan *ch = info->par;
-	unsigned int nr_pages;
 
 	/* enable clocks before accessing hardware */
 	sh_mobile_lcdc_clk_on(ch->lcdc);
 
-	nr_pages = sh_mobile_lcdc_sginit(info, pagelist);
-	dma_map_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
-
-	/* trigger panel update */
-	lcdc_write_chan(ch, LDSM2R, 1);
-
-	dma_unmap_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
+	/*
+	 * It's possible to get here without anything on the pagelist via
+	 * sh_mobile_lcdc_deferred_io_touch() or via a userspace fsync()
+	 * invocation. In the former case, the acceleration routines are
+	 * stepped in to when using the framebuffer console causing the
+	 * workqueue to be scheduled without any dirty pages on the list.
+	 *
+	 * Despite this, a panel update is still needed given that the
+	 * acceleration routines have their own methods for writing in
+	 * that still need to be updated.
+	 *
+	 * The fsync() and empty pagelist case could be optimized for,
+	 * but we don't bother, as any application exhibiting such
+	 * behaviour is fundamentally broken anyways.
+	 */
+	if (!list_empty(pagelist)) {
+		unsigned int nr_pages = sh_mobile_lcdc_sginit(info, pagelist);
+
+		/* trigger panel update */
+		dma_map_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
+		lcdc_write_chan(ch, LDSM2R, 1);
+		dma_unmap_sg(info->dev, ch->sglist, nr_pages, DMA_TO_DEVICE);
+	} else
+		lcdc_write_chan(ch, LDSM2R, 1);
 }
 
 static void sh_mobile_lcdc_deferred_io_touch(struct fb_info *info)
diff --git a/arch/sh/include/asm/sh_keysc.h b/include/linux/input/sh_keysc.h
index 4a65b1e40eab..c211b5cf08e6 100644
--- a/arch/sh/include/asm/sh_keysc.h
+++ b/include/linux/input/sh_keysc.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_KEYSC_H__
-#define __ASM_KEYSC_H__
+#ifndef __SH_KEYSC_H__
+#define __SH_KEYSC_H__
 
 #define SH_KEYSC_MAXKEYS 30
 
@@ -11,4 +11,4 @@ struct sh_keysc_info {
 	int keycodes[SH_KEYSC_MAXKEYS];
 };
 
-#endif /* __ASM_KEYSC_H__ */
+#endif /* __SH_KEYSC_H__ */
diff --git a/include/linux/mfd/sh_mobile_sdhi.h b/include/linux/mfd/sh_mobile_sdhi.h
new file mode 100644
index 000000000000..3bcd7163485c
--- /dev/null
+++ b/include/linux/mfd/sh_mobile_sdhi.h
@@ -0,0 +1,8 @@
+#ifndef __SH_MOBILE_SDHI_H__
+#define __SH_MOBILE_SDHI_H__
+
+struct sh_mobile_sdhi_info {
+	void (*set_pwr)(struct platform_device *pdev, int state);
+};
+
+#endif /* __SH_MOBILE_SDHI_H__ */
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 68e212ff9dde..4ef246f14654 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -57,10 +57,8 @@ struct intc_desc {
 	struct intc_sense_reg *sense_regs;
 	unsigned int nr_sense_regs;
 	char *name;
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 	struct intc_mask_reg *ack_regs;
 	unsigned int nr_ack_regs;
-#endif
 };
 
 #define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a)
@@ -73,7 +71,6 @@ struct intc_desc symbol __initdata = {					\
 	chipname,							\
 }
 
-#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A)
 #define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups,	\
 	mask_regs, prio_regs, sense_regs, ack_regs)			\
 struct intc_desc symbol __initdata = {					\
@@ -83,9 +80,11 @@ struct intc_desc symbol __initdata = {					\
 	chipname,							\
 	_INTC_ARRAY(ack_regs),						\
 }
-#endif
 
 void __init register_intc_controller(struct intc_desc *desc);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 
+int reserve_irq_vector(unsigned int irq);
+void reserve_irq_legacy(void);
+
 #endif /* __SH_INTC_H */
diff --git a/include/linux/sh_pfc.h b/include/linux/sh_pfc.h
new file mode 100644
index 000000000000..07c08af9f8f6
--- /dev/null
+++ b/include/linux/sh_pfc.h
@@ -0,0 +1,96 @@
+/*
+ * SuperH Pin Function Controller Support
+ *
+ * Copyright (c) 2008 Magnus Damm
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __SH_PFC_H
+#define __SH_PFC_H
+
+#include <asm-generic/gpio.h>
+
+typedef unsigned short pinmux_enum_t;
+typedef unsigned short pinmux_flag_t;
+
+#define PINMUX_TYPE_NONE            0
+#define PINMUX_TYPE_FUNCTION        1
+#define PINMUX_TYPE_GPIO            2
+#define PINMUX_TYPE_OUTPUT          3
+#define PINMUX_TYPE_INPUT           4
+#define PINMUX_TYPE_INPUT_PULLUP    5
+#define PINMUX_TYPE_INPUT_PULLDOWN  6
+
+#define PINMUX_FLAG_TYPE            (0x7)
+#define PINMUX_FLAG_WANT_PULLUP     (1 << 3)
+#define PINMUX_FLAG_WANT_PULLDOWN   (1 << 4)
+
+#define PINMUX_FLAG_DBIT_SHIFT      5
+#define PINMUX_FLAG_DBIT            (0x1f << PINMUX_FLAG_DBIT_SHIFT)
+#define PINMUX_FLAG_DREG_SHIFT      10
+#define PINMUX_FLAG_DREG            (0x3f << PINMUX_FLAG_DREG_SHIFT)
+
+struct pinmux_gpio {
+	pinmux_enum_t enum_id;
+	pinmux_flag_t flags;
+};
+
+#define PINMUX_GPIO(gpio, data_or_mark) [gpio] = { data_or_mark }
+#define PINMUX_DATA(data_or_mark, ids...) data_or_mark, ids, 0
+
+struct pinmux_cfg_reg {
+	unsigned long reg, reg_width, field_width;
+	unsigned long *cnt;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_CFG_REG(name, r, r_width, f_width) \
+	.reg = r, .reg_width = r_width, .field_width = f_width,		\
+	.cnt = (unsigned long [r_width / f_width]) {}, \
+	.enum_ids = (pinmux_enum_t [(r_width / f_width) * (1 << f_width)]) \
+
+struct pinmux_data_reg {
+	unsigned long reg, reg_width, reg_shadow;
+	pinmux_enum_t *enum_ids;
+};
+
+#define PINMUX_DATA_REG(name, r, r_width) \
+	.reg = r, .reg_width = r_width,	\
+	.enum_ids = (pinmux_enum_t [r_width]) \
+
+struct pinmux_range {
+	pinmux_enum_t begin;
+	pinmux_enum_t end;
+	pinmux_enum_t force;
+};
+
+struct pinmux_info {
+	char *name;
+	pinmux_enum_t reserved_id;
+	struct pinmux_range data;
+	struct pinmux_range input;
+	struct pinmux_range input_pd;
+	struct pinmux_range input_pu;
+	struct pinmux_range output;
+	struct pinmux_range mark;
+	struct pinmux_range function;
+
+	unsigned first_gpio, last_gpio;
+
+	struct pinmux_gpio *gpios;
+	struct pinmux_cfg_reg *cfg_regs;
+	struct pinmux_data_reg *data_regs;
+
+	pinmux_enum_t *gpio_data;
+	unsigned int gpio_data_size;
+
+	unsigned long *gpio_in_use;
+	struct gpio_chip chip;
+};
+
+int register_pinmux(struct pinmux_info *pip);
+
+#endif /* __SH_PFC_H */