summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-19 11:35:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-19 11:35:30 -0700
commitba0234ec35127fe21d373db53cbaf9fe20620cb6 (patch)
treea2cbef204482512ae9e723f2bf4d22051975ef45 /arch
parent537b60d17894b7c19a6060feae40299d7109d6e7 (diff)
parent939e379e9e183ae6291ac7caa4a5e1dfadae4ccc (diff)
downloadlinux-ba0234ec35127fe21d373db53cbaf9fe20620cb6.tar.gz
Merge branch 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6
* 'for-linus' of git://git390.marist.edu/pub/scm/linux-2.6: (24 commits)
  [S390] drivers/s390/char: Use kmemdup
  [S390] drivers/s390/char: Use kstrdup
  [S390] debug: enable exception-trace debug facility
  [S390] s390_hypfs: Add new attributes
  [S390] qdio: remove API wrappers
  [S390] qdio: set correct bit in dsci
  [S390] qdio: dont convert timestamps to microseconds
  [S390] qdio: remove memset hack
  [S390] qdio: prevent starvation on PCI devices
  [S390] qdio: count number of qdio interrupts
  [S390] user space fault: report fault before calling do_exit
  [S390] topology: expose core identifier
  [S390] dasd: remove uid from devmap
  [S390] dasd: add dynamic pav toleration
  [S390] vdso: add missing vdso_install target
  [S390] vdso: remove redundant check for CONFIG_64BIT
  [S390] avoid default_llseek in s390 drivers
  [S390] vmcp: disallow modular build
  [S390] add breaking event address for user space
  [S390] virtualization aware cpu measurement
  ...
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/Kconfig7
-rw-r--r--arch/s390/Makefile6
-rw-r--r--arch/s390/hypfs/hypfs.h4
-rw-r--r--arch/s390/hypfs/hypfs_diag.c123
-rw-r--r--arch/s390/hypfs/hypfs_vm.c87
-rw-r--r--arch/s390/hypfs/inode.c42
-rw-r--r--arch/s390/include/asm/cputime.h9
-rw-r--r--arch/s390/include/asm/lowcore.h89
-rw-r--r--arch/s390/include/asm/ptrace.h3
-rw-r--r--arch/s390/include/asm/qdio.h2
-rw-r--r--arch/s390/include/asm/setup.h5
-rw-r--r--arch/s390/include/asm/system.h5
-rw-r--r--arch/s390/include/asm/thread_info.h1
-rw-r--r--arch/s390/include/asm/timex.h8
-rw-r--r--arch/s390/include/asm/topology.h2
-rw-r--r--arch/s390/kernel/asm-offsets.c6
-rw-r--r--arch/s390/kernel/debug.c1
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/entry.S324
-rw-r--r--arch/s390/kernel/entry64.S617
-rw-r--r--arch/s390/kernel/head.S4
-rw-r--r--arch/s390/kernel/nmi.c3
-rw-r--r--arch/s390/kernel/processor.c37
-rw-r--r--arch/s390/kernel/ptrace.c68
-rw-r--r--arch/s390/kernel/s390_ext.c3
-rw-r--r--arch/s390/kernel/setup.c27
-rw-r--r--arch/s390/kernel/signal.c2
-rw-r--r--arch/s390/kernel/topology.c7
-rw-r--r--arch/s390/kernel/traps.c31
-rw-r--r--arch/s390/kernel/vdso.c4
-rw-r--r--arch/s390/kernel/vtime.c15
-rw-r--r--arch/s390/kvm/Kconfig11
-rw-r--r--arch/s390/kvm/sie64a.S77
-rw-r--r--arch/s390/mm/fault.c32
34 files changed, 969 insertions, 697 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0d8cd9bbe101..79d0ca086820 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -444,13 +444,6 @@ config FORCE_MAX_ZONEORDER
 	int
 	default "9"
 
-config PROCESS_DEBUG
-	bool "Show crashed user process info"
-	help
-	  Say Y to print all process fault locations to the console.  This is
-	  a debugging option; you probably do not want to set it unless you
-	  are an S390 port maintainer.
-
 config PFAULT
 	bool "Pseudo page fault support"
 	help
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 0da10746e0e5..30c5f01f93b0 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -116,6 +116,12 @@ image bzImage: vmlinux
 zfcpdump:
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
+vdso_install:
+ifeq ($(CONFIG_64BIT),y)
+	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
+endif
+	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
+
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
 
diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h
index aea572009d60..fa487d4cc08b 100644
--- a/arch/s390/hypfs/hypfs.h
+++ b/arch/s390/hypfs/hypfs.h
@@ -11,6 +11,7 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
+#include <linux/debugfs.h>
 
 #define REG_FILE_MODE    0440
 #define UPDATE_FILE_MODE 0220
@@ -34,6 +35,9 @@ extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root);
 
 /* VM Hypervisor */
 extern int hypfs_vm_init(void);
+extern void hypfs_vm_exit(void);
 extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root);
 
+/* Directory for debugfs files */
+extern struct dentry *hypfs_dbfs_dir;
 #endif /* _HYPFS_H_ */
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 5b1acdba6495..1211bb1d2f24 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
@@ -22,6 +23,8 @@
 #define CPU_NAME_LEN 16		/* type name len of cpus in diag224 name table */
 #define TMP_SIZE 64		/* size of temporary buffers */
 
+#define DBFS_D204_HDR_VERSION	0
+
 /* diag 204 subcodes */
 enum diag204_sc {
 	SUBC_STIB4 = 4,
@@ -47,6 +50,8 @@ static void *diag204_buf;		/* 4K aligned buffer for diag204 data */
 static void *diag204_buf_vmalloc;	/* vmalloc pointer for diag204 data */
 static int diag204_buf_pages;		/* number of pages for diag204 data */
 
+static struct dentry *dbfs_d204_file;
+
 /*
  * DIAG 204 data structures and member access functions.
  *
@@ -364,18 +369,21 @@ static void diag204_free_buffer(void)
 	} else {
 		free_pages((unsigned long) diag204_buf, 0);
 	}
-	diag204_buf_pages = 0;
 	diag204_buf = NULL;
 }
 
+static void *page_align_ptr(void *ptr)
+{
+	return (void *) PAGE_ALIGN((unsigned long) ptr);
+}
+
 static void *diag204_alloc_vbuf(int pages)
 {
 	/* The buffer has to be page aligned! */
 	diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1));
 	if (!diag204_buf_vmalloc)
 		return ERR_PTR(-ENOMEM);
-	diag204_buf = (void*)((unsigned long)diag204_buf_vmalloc
-				& ~0xfffUL) + 0x1000;
+	diag204_buf = page_align_ptr(diag204_buf_vmalloc);
 	diag204_buf_pages = pages;
 	return diag204_buf;
 }
@@ -468,17 +476,26 @@ fail_alloc:
 	return rc;
 }
 
+static int diag204_do_store(void *buf, int pages)
+{
+	int rc;
+
+	rc = diag204((unsigned long) diag204_store_sc |
+		     (unsigned long) diag204_info_type, pages, buf);
+	return rc < 0 ? -ENOSYS : 0;
+}
+
 static void *diag204_store(void)
 {
 	void *buf;
-	int pages;
+	int pages, rc;
 
 	buf = diag204_get_buffer(diag204_info_type, &pages);
 	if (IS_ERR(buf))
 		goto out;
-	if (diag204((unsigned long)diag204_store_sc |
-		    (unsigned long)diag204_info_type, pages, buf) < 0)
-		return ERR_PTR(-ENOSYS);
+	rc = diag204_do_store(buf, pages);
+	if (rc)
+		return ERR_PTR(rc);
 out:
 	return buf;
 }
@@ -526,6 +543,92 @@ static int diag224_idx2name(int index, char *name)
 	return 0;
 }
 
+struct dbfs_d204_hdr {
+	u64	len;		/* Length of d204 buffer without header */
+	u16	version;	/* Version of header */
+	u8	sc;		/* Used subcode */
+	char	reserved[53];
+} __attribute__ ((packed));
+
+struct dbfs_d204 {
+	struct dbfs_d204_hdr	hdr;	/* 64 byte header */
+	char			buf[];	/* d204 buffer */
+} __attribute__ ((packed));
+
+struct dbfs_d204_private {
+	struct dbfs_d204	*d204;	/* Aligned d204 data with header */
+	void			*base;	/* Base pointer (needed for vfree) */
+};
+
+static int dbfs_d204_open(struct inode *inode, struct file *file)
+{
+	struct dbfs_d204_private *data;
+	struct dbfs_d204 *d204;
+	int rc, buf_size;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+	buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr);
+	data->base = vmalloc(buf_size);
+	if (!data->base) {
+		rc = -ENOMEM;
+		goto fail_kfree_data;
+	}
+	memset(data->base, 0, buf_size);
+	d204 = page_align_ptr(data->base + sizeof(d204->hdr))
+		- sizeof(d204->hdr);
+	rc = diag204_do_store(&d204->buf, diag204_buf_pages);
+	if (rc)
+		goto fail_vfree_base;
+	d204->hdr.version = DBFS_D204_HDR_VERSION;
+	d204->hdr.len = PAGE_SIZE * diag204_buf_pages;
+	d204->hdr.sc = diag204_store_sc;
+	data->d204 = d204;
+	file->private_data = data;
+	return nonseekable_open(inode, file);
+
+fail_vfree_base:
+	vfree(data->base);
+fail_kfree_data:
+	kfree(data);
+	return rc;
+}
+
+static int dbfs_d204_release(struct inode *inode, struct file *file)
+{
+	struct dbfs_d204_private *data = file->private_data;
+
+	vfree(data->base);
+	kfree(data);
+	return 0;
+}
+
+static ssize_t dbfs_d204_read(struct file *file, char __user *buf,
+			      size_t size, loff_t *ppos)
+{
+	struct dbfs_d204_private *data = file->private_data;
+
+	return simple_read_from_buffer(buf, size, ppos, data->d204,
+				       data->d204->hdr.len +
+				       sizeof(data->d204->hdr));
+}
+
+static const struct file_operations dbfs_d204_ops = {
+	.open		= dbfs_d204_open,
+	.read		= dbfs_d204_read,
+	.release	= dbfs_d204_release,
+};
+
+static int hypfs_dbfs_init(void)
+{
+	dbfs_d204_file = debugfs_create_file("diag_204", 0400, hypfs_dbfs_dir,
+					     NULL, &dbfs_d204_ops);
+	if (IS_ERR(dbfs_d204_file))
+		return PTR_ERR(dbfs_d204_file);
+	return 0;
+}
+
 __init int hypfs_diag_init(void)
 {
 	int rc;
@@ -540,11 +643,17 @@ __init int hypfs_diag_init(void)
 		pr_err("The hardware system does not provide all "
 		       "functions required by hypfs\n");
 	}
+	if (diag204_info_type == INFO_EXT) {
+		rc = hypfs_dbfs_init();
+		if (rc)
+			diag204_free_buffer();
+	}
 	return rc;
 }
 
 void hypfs_diag_exit(void)
 {
+	debugfs_remove(dbfs_d204_file);
 	diag224_delete_name_table();
 	diag204_free_buffer();
 }
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index f0b0d31f0b48..ee5ab1a578e7 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -10,14 +10,18 @@
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 #include <asm/ebcdic.h>
+#include <asm/timex.h>
 #include "hypfs.h"
 
 #define NAME_LEN 8
+#define DBFS_D2FC_HDR_VERSION 0
 
 static char local_guest[] = "        ";
 static char all_guests[] = "*       ";
 static char *guest_query;
 
+static struct dentry *dbfs_d2fc_file;
+
 struct diag2fc_data {
 	__u32 version;
 	__u32 flags;
@@ -76,23 +80,26 @@ static int diag2fc(int size, char* query, void *addr)
 		return -residual_cnt;
 }
 
-static struct diag2fc_data *diag2fc_store(char *query, int *count)
+/*
+ * Allocate buffer for "query" and store diag 2fc at "offset"
+ */
+static void *diag2fc_store(char *query, unsigned int *count, int offset)
 {
+	void *data;
 	int size;
-	struct diag2fc_data *data;
 
 	do {
 		size = diag2fc(0, query, NULL);
 		if (size < 0)
 			return ERR_PTR(-EACCES);
-		data = vmalloc(size);
+		data = vmalloc(size + offset);
 		if (!data)
 			return ERR_PTR(-ENOMEM);
-		if (diag2fc(size, query, data) == 0)
+		if (diag2fc(size, query, data + offset) == 0)
 			break;
 		vfree(data);
 	} while (1);
-	*count = (size / sizeof(*data));
+	*count = (size / sizeof(struct diag2fc_data));
 
 	return data;
 }
@@ -168,9 +175,10 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root)
 {
 	struct dentry *dir, *file;
 	struct diag2fc_data *data;
-	int rc, i, count = 0;
+	unsigned int count = 0;
+	int rc, i;
 
-	data = diag2fc_store(guest_query, &count);
+	data = diag2fc_store(guest_query, &count, 0);
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
@@ -218,8 +226,61 @@ failed:
 	return rc;
 }
 
+struct dbfs_d2fc_hdr {
+	u64	len;		/* Length of d2fc buffer without header */
+	u16	version;	/* Version of header */
+	char	tod_ext[16];	/* TOD clock for d2fc */
+	u64	count;		/* Number of VM guests in d2fc buffer */
+	char	reserved[30];
+} __attribute__ ((packed));
+
+struct dbfs_d2fc {
+	struct dbfs_d2fc_hdr	hdr;	/* 64 byte header */
+	char			buf[];	/* d2fc buffer */
+} __attribute__ ((packed));
+
+static int dbfs_d2fc_open(struct inode *inode, struct file *file)
+{
+	struct dbfs_d2fc *data;
+	unsigned int count;
+
+	data = diag2fc_store(guest_query, &count, sizeof(data->hdr));
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+	get_clock_ext(data->hdr.tod_ext);
+	data->hdr.len = count * sizeof(struct diag2fc_data);
+	data->hdr.version = DBFS_D2FC_HDR_VERSION;
+	data->hdr.count = count;
+	memset(&data->hdr.reserved, 0, sizeof(data->hdr.reserved));
+	file->private_data = data;
+	return nonseekable_open(inode, file);
+}
+
+static int dbfs_d2fc_release(struct inode *inode, struct file *file)
+{
+	diag2fc_free(file->private_data);
+	return 0;
+}
+
+static ssize_t dbfs_d2fc_read(struct file *file, char __user *buf,
+				    size_t size, loff_t *ppos)
+{
+	struct dbfs_d2fc *data = file->private_data;
+
+	return simple_read_from_buffer(buf, size, ppos, data, data->hdr.len +
+				       sizeof(struct dbfs_d2fc_hdr));
+}
+
+static const struct file_operations dbfs_d2fc_ops = {
+	.open		= dbfs_d2fc_open,
+	.read		= dbfs_d2fc_read,
+	.release	= dbfs_d2fc_release,
+};
+
 int hypfs_vm_init(void)
 {
+	if (!MACHINE_IS_VM)
+		return 0;
 	if (diag2fc(0, all_guests, NULL) > 0)
 		guest_query = all_guests;
 	else if (diag2fc(0, local_guest, NULL) > 0)
@@ -227,5 +288,17 @@ int hypfs_vm_init(void)
 	else
 		return -EACCES;
 
+	dbfs_d2fc_file = debugfs_create_file("diag_2fc", 0400, hypfs_dbfs_dir,
+					     NULL, &dbfs_d2fc_ops);
+	if (IS_ERR(dbfs_d2fc_file))
+		return PTR_ERR(dbfs_d2fc_file);
+
 	return 0;
 }
+
+void hypfs_vm_exit(void)
+{
+	if (!MACHINE_IS_VM)
+		return;
+	debugfs_remove(dbfs_d2fc_file);
+}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index c53f8ac825ca..6b120f073043 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -46,6 +46,8 @@ static const struct super_operations hypfs_s_ops;
 /* start of list of all dentries, which have to be deleted on update */
 static struct dentry *hypfs_last_dentry;
 
+struct dentry *hypfs_dbfs_dir;
+
 static void hypfs_update_update(struct super_block *sb)
 {
 	struct hypfs_sb_info *sb_info = sb->s_fs_info;
@@ -145,7 +147,7 @@ static int hypfs_open(struct inode *inode, struct file *filp)
 		}
 		mutex_unlock(&fs_info->lock);
 	}
-	return 0;
+	return nonseekable_open(inode, filp);
 }
 
 static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
@@ -468,20 +470,22 @@ static int __init hypfs_init(void)
 {
 	int rc;
 
-	if (MACHINE_IS_VM) {
-		if (hypfs_vm_init())
-			/* no diag 2fc, just exit */
-			return -ENODATA;
-	} else {
-		if (hypfs_diag_init()) {
-			rc = -ENODATA;
-			goto fail_diag;
-		}
+	hypfs_dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+	if (IS_ERR(hypfs_dbfs_dir))
+		return PTR_ERR(hypfs_dbfs_dir);
+
+	if (hypfs_diag_init()) {
+		rc = -ENODATA;
+		goto fail_debugfs_remove;
+	}
+	if (hypfs_vm_init()) {
+		rc = -ENODATA;
+		goto fail_hypfs_diag_exit;
 	}
 	s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
 	if (!s390_kobj) {
 		rc = -ENOMEM;
-		goto fail_sysfs;
+		goto fail_hypfs_vm_exit;
 	}
 	rc = register_filesystem(&hypfs_type);
 	if (rc)
@@ -490,18 +494,22 @@ static int __init hypfs_init(void)
 
 fail_filesystem:
 	kobject_put(s390_kobj);
-fail_sysfs:
-	if (!MACHINE_IS_VM)
-		hypfs_diag_exit();
-fail_diag:
+fail_hypfs_vm_exit:
+	hypfs_vm_exit();
+fail_hypfs_diag_exit:
+	hypfs_diag_exit();
+fail_debugfs_remove:
+	debugfs_remove(hypfs_dbfs_dir);
+
 	pr_err("Initialization of hypfs failed with rc=%i\n", rc);
 	return rc;
 }
 
 static void __exit hypfs_exit(void)
 {
-	if (!MACHINE_IS_VM)
-		hypfs_diag_exit();
+	hypfs_diag_exit();
+	hypfs_vm_exit();
+	debugfs_remove(hypfs_dbfs_dir);
 	unregister_filesystem(&hypfs_type);
 	kobject_put(s390_kobj);
 }
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 258ba88b7b50..8b1a52a137c5 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -188,15 +188,16 @@ struct s390_idle_data {
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
 
-void vtime_start_cpu(void);
+void vtime_start_cpu(__u64 int_clock, __u64 enter_timer);
 cputime64_t s390_get_idle_time(int cpu);
 
 #define arch_idle_time(cpu) s390_get_idle_time(cpu)
 
-static inline void s390_idle_check(void)
+static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock,
+				   __u64 enter_timer)
 {
-	if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
-		vtime_start_cpu();
+	if (regs->psw.mask & PSW_MASK_WAIT)
+		vtime_start_cpu(int_clock, enter_timer);
 }
 
 static inline int s390_nohz_delay(int cpu)
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 05527c040b7a..0f97ef2d92ac 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -104,38 +104,39 @@ struct _lowcore {
 	/* CPU time accounting values */
 	__u64	sync_enter_timer;		/* 0x0250 */
 	__u64	async_enter_timer;		/* 0x0258 */
-	__u64	exit_timer;			/* 0x0260 */
-	__u64	user_timer;			/* 0x0268 */
-	__u64	system_timer;			/* 0x0270 */
-	__u64	steal_timer;			/* 0x0278 */
-	__u64	last_update_timer;		/* 0x0280 */
-	__u64	last_update_clock;		/* 0x0288 */
+	__u64	mcck_enter_timer;		/* 0x0260 */
+	__u64	exit_timer;			/* 0x0268 */
+	__u64	user_timer;			/* 0x0270 */
+	__u64	system_timer;			/* 0x0278 */
+	__u64	steal_timer;			/* 0x0280 */
+	__u64	last_update_timer;		/* 0x0288 */
+	__u64	last_update_clock;		/* 0x0290 */
 
 	/* Current process. */
-	__u32	current_task;			/* 0x0290 */
-	__u32	thread_info;			/* 0x0294 */
-	__u32	kernel_stack;			/* 0x0298 */
+	__u32	current_task;			/* 0x0298 */
+	__u32	thread_info;			/* 0x029c */
+	__u32	kernel_stack;			/* 0x02a0 */
 
 	/* Interrupt and panic stack. */
-	__u32	async_stack;			/* 0x029c */
-	__u32	panic_stack;			/* 0x02a0 */
+	__u32	async_stack;			/* 0x02a4 */
+	__u32	panic_stack;			/* 0x02a8 */
 
 	/* Address space pointer. */
-	__u32	kernel_asce;			/* 0x02a4 */
-	__u32	user_asce;			/* 0x02a8 */
-	__u32	user_exec_asce;			/* 0x02ac */
+	__u32	kernel_asce;			/* 0x02ac */
+	__u32	user_asce;			/* 0x02b0 */
+	__u32	user_exec_asce;			/* 0x02b4 */
 
 	/* SMP info area */
-	struct cpuid cpu_id;			/* 0x02b0 */
 	__u32	cpu_nr;				/* 0x02b8 */
 	__u32	softirq_pending;		/* 0x02bc */
 	__u32	percpu_offset;			/* 0x02c0 */
 	__u32	ext_call_fast;			/* 0x02c4 */
 	__u64	int_clock;			/* 0x02c8 */
-	__u64	clock_comparator;		/* 0x02d0 */
-	__u32	machine_flags;			/* 0x02d8 */
-	__u32	ftrace_func;			/* 0x02dc */
-	__u8	pad_0x02e0[0x0300-0x02e0];	/* 0x02e0 */
+	__u64	mcck_clock;			/* 0x02d0 */
+	__u64	clock_comparator;		/* 0x02d8 */
+	__u32	machine_flags;			/* 0x02e0 */
+	__u32	ftrace_func;			/* 0x02e4 */
+	__u8	pad_0x02e8[0x0300-0x02e8];	/* 0x02e8 */
 
 	/* Interrupt response block */
 	__u8	irb[64];			/* 0x0300 */
@@ -189,14 +190,14 @@ struct _lowcore {
 	__u32	data_exc_code;			/* 0x0090 */
 	__u16	mon_class_num;			/* 0x0094 */
 	__u16	per_perc_atmid;			/* 0x0096 */
-	addr_t	per_address;			/* 0x0098 */
+	__u64	per_address;			/* 0x0098 */
 	__u8	exc_access_id;			/* 0x00a0 */
 	__u8	per_access_id;			/* 0x00a1 */
 	__u8	op_access_id;			/* 0x00a2 */
 	__u8	ar_access_id;			/* 0x00a3 */
 	__u8	pad_0x00a4[0x00a8-0x00a4];	/* 0x00a4 */
-	addr_t	trans_exc_code;			/* 0x00a8 */
-	addr_t	monitor_code;			/* 0x00b0 */
+	__u64	trans_exc_code;			/* 0x00a8 */
+	__u64	monitor_code;			/* 0x00b0 */
 	__u16	subchannel_id;			/* 0x00b8 */
 	__u16	subchannel_nr;			/* 0x00ba */
 	__u32	io_int_parm;			/* 0x00bc */
@@ -207,7 +208,7 @@ struct _lowcore {
 	__u32	mcck_interruption_code[2];	/* 0x00e8 */
 	__u8	pad_0x00f0[0x00f4-0x00f0];	/* 0x00f0 */
 	__u32	external_damage_code;		/* 0x00f4 */
-	addr_t	failing_storage_address;	/* 0x00f8 */
+	__u64	failing_storage_address;	/* 0x00f8 */
 	__u8	pad_0x0100[0x0110-0x0100];	/* 0x0100 */
 	__u64	breaking_event_addr;		/* 0x0110 */
 	__u8	pad_0x0118[0x0120-0x0118];	/* 0x0118 */
@@ -233,39 +234,41 @@ struct _lowcore {
 	/* CPU accounting and timing values. */
 	__u64	sync_enter_timer;		/* 0x02a0 */
 	__u64	async_enter_timer;		/* 0x02a8 */
-	__u64	exit_timer;			/* 0x02b0 */
-	__u64	user_timer;			/* 0x02b8 */
-	__u64	system_timer;			/* 0x02c0 */
-	__u64	steal_timer;			/* 0x02c8 */
-	__u64	last_update_timer;		/* 0x02d0 */
-	__u64	last_update_clock;		/* 0x02d8 */
+	__u64	mcck_enter_timer;		/* 0x02b0 */
+	__u64	exit_timer;			/* 0x02b8 */
+	__u64	user_timer;			/* 0x02c0 */
+	__u64	system_timer;			/* 0x02c8 */
+	__u64	steal_timer;			/* 0x02d0 */
+	__u64	last_update_timer;		/* 0x02d8 */
+	__u64	last_update_clock;		/* 0x02e0 */
 
 	/* Current process. */
-	__u64	current_task;			/* 0x02e0 */
-	__u64	thread_info;			/* 0x02e8 */
-	__u64	kernel_stack;			/* 0x02f0 */
+	__u64	current_task;			/* 0x02e8 */
+	__u64	thread_info;			/* 0x02f0 */
+	__u64	kernel_stack;			/* 0x02f8 */
 
 	/* Interrupt and panic stack. */
-	__u64	async_stack;			/* 0x02f8 */
-	__u64	panic_stack;			/* 0x0300 */
+	__u64	async_stack;			/* 0x0300 */
+	__u64	panic_stack;			/* 0x0308 */
 
 	/* Address space pointer. */
-	__u64	kernel_asce;			/* 0x0308 */
-	__u64	user_asce;			/* 0x0310 */
-	__u64	user_exec_asce;			/* 0x0318 */
+	__u64	kernel_asce;			/* 0x0310 */
+	__u64	user_asce;			/* 0x0318 */
+	__u64	user_exec_asce;			/* 0x0320 */
 
 	/* SMP info area */
-	struct cpuid cpu_id;			/* 0x0320 */
 	__u32	cpu_nr;				/* 0x0328 */
 	__u32	softirq_pending;		/* 0x032c */
 	__u64	percpu_offset;			/* 0x0330 */
 	__u64	ext_call_fast;			/* 0x0338 */
 	__u64	int_clock;			/* 0x0340 */
-	__u64	clock_comparator;		/* 0x0348 */
-	__u64	vdso_per_cpu_data;		/* 0x0350 */
-	__u64	machine_flags;			/* 0x0358 */
-	__u64	ftrace_func;			/* 0x0360 */
-	__u8	pad_0x0368[0x0380-0x0368];	/* 0x0368 */
+	__u64	mcck_clock;			/* 0x0348 */
+	__u64	clock_comparator;		/* 0x0350 */
+	__u64	vdso_per_cpu_data;		/* 0x0358 */
+	__u64	machine_flags;			/* 0x0360 */
+	__u64	ftrace_func;			/* 0x0368 */
+	__u64	sie_hook;			/* 0x0370 */
+	__u64	cmf_hpp;			/* 0x0378 */
 
 	/* Interrupt response block. */
 	__u8	irb[64];			/* 0x0380 */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index fef9b33cdd59..e2c218dc68a6 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -328,8 +328,8 @@ struct pt_regs
 	psw_t psw;
 	unsigned long gprs[NUM_GPRS];
 	unsigned long orig_gpr2;
-	unsigned short svcnr;
 	unsigned short ilc;
+	unsigned short svcnr;
 };
 #endif
 
@@ -436,6 +436,7 @@ typedef struct
 #define PTRACE_PEEKDATA_AREA	      0x5003
 #define PTRACE_POKETEXT_AREA	      0x5004
 #define PTRACE_POKEDATA_AREA 	      0x5005
+#define PTRACE_GET_LAST_BREAK	      0x5006
 
 /*
  * PT_PROT definition is loosely based on hppa bsd definition in
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 9b04b1102bbc..0eaae6260274 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -368,14 +368,12 @@ struct qdio_initialize {
 #define QDIO_FLAG_SYNC_OUTPUT		0x02
 #define QDIO_FLAG_PCI_OUT		0x10
 
-extern int qdio_initialize(struct qdio_initialize *);
 extern int qdio_allocate(struct qdio_initialize *);
 extern int qdio_establish(struct qdio_initialize *);
 extern int qdio_activate(struct ccw_device *);
 
 extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags,
 		   int q_nr, unsigned int bufnr, unsigned int count);
-extern int qdio_cleanup(struct ccw_device*, int);
 extern int qdio_shutdown(struct ccw_device*, int);
 extern int qdio_free(struct ccw_device *);
 extern int qdio_get_ssqd_desc(struct ccw_device *dev, struct qdio_ssqd_desc*);
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 9ab6bd3a65d1..25e831d58e1e 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -2,7 +2,7 @@
  *  include/asm-s390/setup.h
  *
  *  S390 version
- *    Copyright IBM Corp. 1999,2006
+ *    Copyright IBM Corp. 1999,2010
  */
 
 #ifndef _ASM_S390_SETUP_H
@@ -72,6 +72,7 @@ extern unsigned int user_mode;
 #define MACHINE_FLAG_HPAGE	(1UL << 10)
 #define MACHINE_FLAG_PFMF	(1UL << 11)
 #define MACHINE_FLAG_LPAR	(1UL << 12)
+#define MACHINE_FLAG_SPP	(1UL << 13)
 
 #define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
 #define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -88,6 +89,7 @@ extern unsigned int user_mode;
 #define MACHINE_HAS_MVCOS	(0)
 #define MACHINE_HAS_HPAGE	(0)
 #define MACHINE_HAS_PFMF	(0)
+#define MACHINE_HAS_SPP		(0)
 #else /* __s390x__ */
 #define MACHINE_HAS_IEEE	(1)
 #define MACHINE_HAS_CSP		(1)
@@ -97,6 +99,7 @@ extern unsigned int user_mode;
 #define MACHINE_HAS_MVCOS	(S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS)
 #define MACHINE_HAS_HPAGE	(S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE)
 #define MACHINE_HAS_PFMF	(S390_lowcore.machine_flags & MACHINE_FLAG_PFMF)
+#define MACHINE_HAS_SPP		(S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
 #endif /* __s390x__ */
 
 #define ZFCPDUMP_HSA_SIZE	(32UL<<20)
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 1741c1556a4e..cef66210c846 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -459,11 +459,6 @@ extern void (*_machine_power_off)(void);
 
 #define arch_align_stack(x) (x)
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-extern psw_t sysc_restore_trace_psw;
-extern psw_t io_restore_trace_psw;
-#endif
-
 static inline int tprot(unsigned long addr)
 {
 	int rc = -EFAULT;
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 34f0873d6525..be3d3f91d86c 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -50,6 +50,7 @@ struct thread_info {
 	struct restart_block	restart_block;
 	__u64			user_timer;
 	__u64			system_timer;
+	unsigned long		last_break;	/* last breaking-event-address. */
 };
 
 /*
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index f174bdaa6b59..09d345a701dc 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -61,11 +61,15 @@ static inline unsigned long long get_clock (void)
 	return clk;
 }
 
+static inline void get_clock_ext(char *clk)
+{
+	asm volatile("stcke %0" : "=Q" (*clk) : : "cc");
+}
+
 static inline unsigned long long get_clock_xt(void)
 {
 	unsigned char clk[16];
-
-	asm volatile("stcke %0" : "=Q" (clk) : : "cc");
+	get_clock_ext(clk);
 	return *((unsigned long long *)&clk[1]);
 }
 
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 6e7211abd950..dc8a67297d0f 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -7,8 +7,10 @@
 
 const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
 
+extern unsigned char cpu_core_id[NR_CPUS];
 extern cpumask_t cpu_core_map[NR_CPUS];
 
+#define topology_core_id(cpu)		(cpu_core_id[cpu])
 #define topology_core_cpumask(cpu)	(&cpu_core_map[cpu])
 
 int topology_set_cpu_management(int fc);
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index a09408952ed0..d9b490a2716e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -39,6 +39,7 @@ int main(void)
 	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
 	DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
 	DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
+	DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
 	BLANK();
 	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
 	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
@@ -112,6 +113,7 @@ int main(void)
 	DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
 	DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
 	DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer));
+	DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer));
 	DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer));
 	DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer));
 	DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer));
@@ -126,10 +128,12 @@ int main(void)
 	DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
 	DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
 	DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce));
-	DEFINE(__LC_CPUID, offsetof(struct _lowcore, cpu_id));
 	DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
+	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
 	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
 	DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
+	DEFINE(__LC_SIE_HOOK, offsetof(struct _lowcore, sie_hook));
+	DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp));
 	DEFINE(__LC_IRB, offsetof(struct _lowcore, irb));
 	DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
 	DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area));
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 0168472b2fdf..98192261491d 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -655,6 +655,7 @@ found:
 	p_info->act_entry_offset = 0;
 	file->private_data = p_info;
 	debug_info_get(debug_info);
+	nonseekable_open(inode, file);
 out:
 	mutex_unlock(&debug_mutex);
 	return rc;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 2d92c2cf92d7..c00856ad4e5a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -356,6 +356,7 @@ static __init void detect_machine_facilities(void)
 {
 #ifdef CONFIG_64BIT
 	unsigned int facilities;
+	unsigned long long facility_bits;
 
 	facilities = stfl();
 	if (facilities & (1 << 28))
@@ -364,6 +365,9 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF;
 	if (facilities & (1 << 4))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
+	if ((stfle(&facility_bits, 1) > 0) &&
+	    (facility_bits & (1ULL << (63 - 40))))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_SPP;
 #endif
 }
 
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6af7045280a8..d5e3e6007447 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -73,21 +73,24 @@ STACK_SIZE  = 1 << STACK_SHIFT
 	basr	%r14,%r1
 	.endm
 
-	.macro	TRACE_IRQS_CHECK
-	basr	%r2,%r0
+	.macro	TRACE_IRQS_CHECK_ON
 	tm	SP_PSW(%r15),0x03	# irqs enabled?
-	jz	0f
-	l	%r1,BASED(.Ltrace_irq_on_caller)
-	basr	%r14,%r1
-	j	1f
-0:	l	%r1,BASED(.Ltrace_irq_off_caller)
-	basr	%r14,%r1
-1:
+	bz	BASED(0f)
+	TRACE_IRQS_ON
+0:
+	.endm
+
+	.macro	TRACE_IRQS_CHECK_OFF
+	tm	SP_PSW(%r15),0x03	# irqs enabled?
+	bz	BASED(0f)
+	TRACE_IRQS_OFF
+0:
 	.endm
 #else
 #define TRACE_IRQS_ON
 #define TRACE_IRQS_OFF
-#define TRACE_IRQS_CHECK
+#define TRACE_IRQS_CHECK_ON
+#define TRACE_IRQS_CHECK_OFF
 #endif
 
 #ifdef CONFIG_LOCKDEP
@@ -177,9 +180,9 @@ STACK_SIZE  = 1 << STACK_SHIFT
 	s	%r15,BASED(.Lc_spsize)	# make room for registers & psw
 	mvc	SP_PSW(8,%r15),0(%r12)	# move user PSW to stack
 	st	%r2,SP_ORIG_R2(%r15)	# store original content of gpr 2
-	icm	%r12,3,__LC_SVC_ILC
+	icm	%r12,12,__LC_SVC_ILC
 	stm	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
-	st	%r12,SP_SVCNR(%r15)
+	st	%r12,SP_ILC(%r15)
 	mvc	SP_R12(16,%r15),\savearea # move %r12-%r15 to stack
 	la	%r12,0
 	st	%r12,__SF_BACKCHAIN(%r15)	# clear back chain
@@ -273,66 +276,45 @@ sysc_do_restart:
 	st	%r2,SP_R2(%r15)   # store return value (change R2 on stack)
 
 sysc_return:
+	LOCKDEP_SYS_EXIT
+sysc_tif:
 	tm	__TI_flags+3(%r9),_TIF_WORK_SVC
 	bnz	BASED(sysc_work)  # there is work to do (signals etc.)
 sysc_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	la	%r1,BASED(sysc_restore_trace_psw_addr)
-	l	%r1,0(%r1)
-	lpsw	0(%r1)
-sysc_restore_trace:
-	TRACE_IRQS_CHECK
-	LOCKDEP_SYS_EXIT
-#endif
-sysc_leave:
 	RESTORE_ALL __LC_RETURN_PSW,1
 sysc_done:
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-sysc_restore_trace_psw_addr:
-	.long sysc_restore_trace_psw
-
-	.section .data,"aw",@progbits
-	.align	8
-	.globl	sysc_restore_trace_psw
-sysc_restore_trace_psw:
-	.long	0, sysc_restore_trace + 0x80000000
-	.previous
-#endif
-
-#
-# recheck if there is more work to do
 #
-sysc_work_loop:
-	tm	__TI_flags+3(%r9),_TIF_WORK_SVC
-	bz	BASED(sysc_restore)	# there is no work to do
-#
-# One of the work bits is on. Find out which one.
+# There is work to do, but first we need to check if we return to userspace.
 #
 sysc_work:
 	tm	SP_PSW+1(%r15),0x01	# returning to user ?
 	bno	BASED(sysc_restore)
+
+#
+# One of the work bits is on. Find out which one.
+#
+sysc_work_tif:
 	tm	__TI_flags+3(%r9),_TIF_MCCK_PENDING
 	bo	BASED(sysc_mcck_pending)
 	tm	__TI_flags+3(%r9),_TIF_NEED_RESCHED
 	bo	BASED(sysc_reschedule)
 	tm	__TI_flags+3(%r9),_TIF_SIGPENDING
-	bnz	BASED(sysc_sigpending)
+	bo	BASED(sysc_sigpending)
 	tm	__TI_flags+3(%r9),_TIF_NOTIFY_RESUME
-	bnz	BASED(sysc_notify_resume)
+	bo	BASED(sysc_notify_resume)
 	tm	__TI_flags+3(%r9),_TIF_RESTART_SVC
 	bo	BASED(sysc_restart)
 	tm	__TI_flags+3(%r9),_TIF_SINGLE_STEP
 	bo	BASED(sysc_singlestep)
-	b	BASED(sysc_restore)
-sysc_work_done:
+	b	BASED(sysc_return)	# beware of critical section cleanup
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
 sysc_reschedule:
 	l	%r1,BASED(.Lschedule)
-	la	%r14,BASED(sysc_work_loop)
+	la	%r14,BASED(sysc_return)
 	br	%r1			# call scheduler
 
 #
@@ -340,7 +322,7 @@ sysc_reschedule:
 #
 sysc_mcck_pending:
 	l	%r1,BASED(.Ls390_handle_mcck)
-	la	%r14,BASED(sysc_work_loop)
+	la	%r14,BASED(sysc_return)
 	br	%r1			# TIF bit will be cleared by handler
 
 #
@@ -355,7 +337,7 @@ sysc_sigpending:
 	bo	BASED(sysc_restart)
 	tm	__TI_flags+3(%r9),_TIF_SINGLE_STEP
 	bo	BASED(sysc_singlestep)
-	b	BASED(sysc_work_loop)
+	b	BASED(sysc_return)
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
@@ -363,7 +345,7 @@ sysc_sigpending:
 sysc_notify_resume:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	l	%r1,BASED(.Ldo_notify_resume)
-	la	%r14,BASED(sysc_work_loop)
+	la	%r14,BASED(sysc_return)
 	br	%r1			# call do_notify_resume
 
 
@@ -458,11 +440,13 @@ kernel_execve:
 	br	%r14
 	# execve succeeded.
 0:	stnsm	__SF_EMPTY(%r15),0xfc	# disable interrupts
+	TRACE_IRQS_OFF
 	l	%r15,__LC_KERNEL_STACK	# load ksp
 	s	%r15,BASED(.Lc_spsize)	# make room for registers & psw
 	l	%r9,__LC_THREAD_INFO
 	mvc	SP_PTREGS(__PT_SIZE,%r15),0(%r12)	# copy pt_regs
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
+	TRACE_IRQS_ON
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	l	%r1,BASED(.Lexecve_tail)
 	basr	%r14,%r1
@@ -499,8 +483,8 @@ pgm_check_handler:
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
 pgm_no_vtime:
+	TRACE_IRQS_CHECK_OFF
 	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	TRACE_IRQS_OFF
 	l	%r3,__LC_PGM_ILC	# load program interruption code
 	la	%r8,0x7f
 	nr	%r8,%r3
@@ -509,8 +493,10 @@ pgm_do_call:
 	sll	%r8,2
 	l	%r7,0(%r8,%r7)		# load address of handler routine
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
-	la	%r14,BASED(sysc_return)
-	br	%r7			# branch to interrupt-handler
+	basr	%r14,%r7		# branch to interrupt-handler
+pgm_exit:
+	TRACE_IRQS_CHECK_ON
+	b	BASED(sysc_return)
 
 #
 # handle per exception
@@ -537,19 +523,19 @@ pgm_per_std:
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
 pgm_no_vtime2:
+	TRACE_IRQS_CHECK_OFF
 	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	TRACE_IRQS_OFF
 	l	%r1,__TI_task(%r9)
+	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
+	bz	BASED(kernel_per)
 	mvc	__THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
 	mvc	__THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
 	oi	__TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
-	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
-	bz	BASED(kernel_per)
 	l	%r3,__LC_PGM_ILC	# load program interruption code
 	la	%r8,0x7f
 	nr	%r8,%r3 		# clear per-event-bit and ilc
-	be	BASED(sysc_return)	# only per or per+check ?
+	be	BASED(pgm_exit)		# only per or per+check ?
 	b	BASED(pgm_do_call)
 
 #
@@ -570,8 +556,8 @@ pgm_svcper:
 	mvc	__THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID
 	oi	__TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
 	TRACE_IRQS_ON
-	lm	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
+	lm	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	b	BASED(sysc_do_svc)
 
 #
@@ -582,8 +568,8 @@ kernel_per:
 	mvi	SP_SVCNR+1(%r15),0xff
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	l	%r1,BASED(.Lhandle_per)	# load adr. of per handler
-	la	%r14,BASED(sysc_restore)# load adr. of system return
-	br	%r1			# branch to do_single_step
+	basr	%r14,%r1		# branch to do_single_step
+	b	BASED(pgm_exit)
 
 /*
  * IO interrupt handler routine
@@ -602,134 +588,126 @@ io_int_handler:
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
 io_no_vtime:
-	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	TRACE_IRQS_OFF
+	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	l	%r1,BASED(.Ldo_IRQ)	# load address of do_IRQ
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	basr	%r14,%r1		# branch to standard irq handler
 io_return:
+	LOCKDEP_SYS_EXIT
+	TRACE_IRQS_ON
+io_tif:
 	tm	__TI_flags+3(%r9),_TIF_WORK_INT
 	bnz	BASED(io_work)		# there is work to do (signals etc.)
 io_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	la	%r1,BASED(io_restore_trace_psw_addr)
-	l	%r1,0(%r1)
-	lpsw	0(%r1)
-io_restore_trace:
-	TRACE_IRQS_CHECK
-	LOCKDEP_SYS_EXIT
-#endif
-io_leave:
 	RESTORE_ALL __LC_RETURN_PSW,0
 io_done:
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-io_restore_trace_psw_addr:
-	.long io_restore_trace_psw
-
-	.section .data,"aw",@progbits
-	.align	8
-	.globl	io_restore_trace_psw
-io_restore_trace_psw:
-	.long	0, io_restore_trace + 0x80000000
-	.previous
-#endif
-
 #
-# switch to kernel stack, then check the TIF bits
+# There is work todo, find out in which context we have been interrupted:
+# 1) if we return to user space we can do all _TIF_WORK_INT work
+# 2) if we return to kernel code and preemptive scheduling is enabled check
+#    the preemption counter and if it is zero call preempt_schedule_irq
+# Before any work can be done, a switch to the kernel stack is required.
 #
 io_work:
 	tm	SP_PSW+1(%r15),0x01	# returning to user ?
-#ifndef CONFIG_PREEMPT
-	bno	BASED(io_restore)	# no-> skip resched & signal
-#else
-	bnz	BASED(io_work_user)	# no -> check for preemptive scheduling
+	bo	BASED(io_work_user)	# yes -> do resched & signal
+#ifdef CONFIG_PREEMPT
 	# check for preemptive scheduling
 	icm	%r0,15,__TI_precount(%r9)
 	bnz	BASED(io_restore)	# preemption disabled
+	tm	__TI_flags+3(%r9),_TIF_NEED_RESCHED
+	bno	BASED(io_restore)
+	# switch to kernel stack
 	l	%r1,SP_R15(%r15)
 	s	%r1,BASED(.Lc_spsize)
 	mvc	SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
 	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain
 	lr	%r15,%r1
-io_resume_loop:
-	tm	__TI_flags+3(%r9),_TIF_NEED_RESCHED
-	bno	BASED(io_restore)
+	# TRACE_IRQS_ON already done at io_return, call
+	# TRACE_IRQS_OFF to keep things symmetrical
+	TRACE_IRQS_OFF
 	l	%r1,BASED(.Lpreempt_schedule_irq)
-	la	%r14,BASED(io_resume_loop)
-	br	%r1			# call schedule
+	basr	%r14,%r1		# call preempt_schedule_irq
+	b	BASED(io_return)
+#else
+	b	BASED(io_restore)
 #endif
 
+#
+# Need to do work before returning to userspace, switch to kernel stack
+#
 io_work_user:
 	l	%r1,__LC_KERNEL_STACK
 	s	%r1,BASED(.Lc_spsize)
 	mvc	SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
 	xc	__SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain
 	lr	%r15,%r1
+
 #
 # One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED
+# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
 #		and _TIF_MCCK_PENDING
 #
-io_work_loop:
+io_work_tif:
 	tm	__TI_flags+3(%r9),_TIF_MCCK_PENDING
 	bo	BASED(io_mcck_pending)
 	tm	__TI_flags+3(%r9),_TIF_NEED_RESCHED
 	bo	BASED(io_reschedule)
 	tm	__TI_flags+3(%r9),_TIF_SIGPENDING
-	bnz	BASED(io_sigpending)
+	bo	BASED(io_sigpending)
 	tm	__TI_flags+3(%r9),_TIF_NOTIFY_RESUME
-	bnz	BASED(io_notify_resume)
-	b	BASED(io_restore)
-io_work_done:
+	bo	BASED(io_notify_resume)
+	b	BASED(io_return)	# beware of critical section cleanup
 
 #
 # _TIF_MCCK_PENDING is set, call handler
 #
 io_mcck_pending:
+	# TRACE_IRQS_ON already done at io_return
 	l	%r1,BASED(.Ls390_handle_mcck)
 	basr	%r14,%r1		# TIF bit will be cleared by handler
-	b	BASED(io_work_loop)
+	TRACE_IRQS_OFF
+	b	BASED(io_return)
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
 io_reschedule:
-	TRACE_IRQS_ON
+	# TRACE_IRQS_ON already done at io_return
 	l	%r1,BASED(.Lschedule)
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	basr	%r14,%r1		# call scheduler
 	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	tm	__TI_flags+3(%r9),_TIF_WORK_INT
-	bz	BASED(io_restore)	# there is no work to do
-	b	BASED(io_work_loop)
+	b	BASED(io_return)
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
 io_sigpending:
-	TRACE_IRQS_ON
+	# TRACE_IRQS_ON already done at io_return
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	l	%r1,BASED(.Ldo_signal)
 	basr	%r14,%r1		# call do_signal
 	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	b	BASED(io_work_loop)
+	b	BASED(io_return)
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
 io_notify_resume:
-	TRACE_IRQS_ON
+	# TRACE_IRQS_ON already done at io_return
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	l	%r1,BASED(.Ldo_notify_resume)
 	basr	%r14,%r1		# call do_signal
 	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	b	BASED(io_work_loop)
+	b	BASED(io_return)
 
 /*
  * External interrupt handler routine
@@ -764,15 +742,14 @@ __critical_end:
 
 	.globl mcck_int_handler
 mcck_int_handler:
-	stck	__LC_INT_CLOCK
+	stck	__LC_MCCK_CLOCK
 	spt	__LC_CPU_TIMER_SAVE_AREA	# revalidate cpu timer
 	lm	%r0,%r15,__LC_GPREGS_SAVE_AREA	# revalidate gprs
 	SAVE_ALL_BASE __LC_SAVE_AREA+32
 	la	%r12,__LC_MCK_OLD_PSW
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	bo	BASED(mcck_int_main)	# yes -> rest of mcck code invalid
-	mvc	__LC_SAVE_AREA+52(8),__LC_ASYNC_ENTER_TIMER
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA
+	mvc	__LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA
 	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
 	bo	BASED(1f)
 	la	%r14,__LC_SYNC_ENTER_TIMER
@@ -786,7 +763,7 @@ mcck_int_handler:
 	bl	BASED(0f)
 	la	%r14,__LC_LAST_UPDATE_TIMER
 0:	spt	0(%r14)
-	mvc	__LC_ASYNC_ENTER_TIMER(8),0(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 1:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	bno	BASED(mcck_int_main)	# no -> skip cleanup critical
 	tm	__LC_MCK_OLD_PSW+1,0x01	# test problem state bit
@@ -808,9 +785,9 @@ mcck_int_main:
 	bno	BASED(mcck_no_vtime)	# no -> skip cleanup critical
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	bz	BASED(mcck_no_vtime)
-	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
+	UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER
 mcck_no_vtime:
 	l	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
@@ -833,7 +810,6 @@ mcck_no_vtime:
 mcck_return:
 	mvc	__LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW
 	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+52
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	bno	BASED(0f)
 	lm	%r0,%r15,SP_R0(%r15)	# load gprs 0-15
@@ -917,18 +893,14 @@ stack_overflow:
 
 cleanup_table_system_call:
 	.long	system_call + 0x80000000, sysc_do_svc + 0x80000000
-cleanup_table_sysc_return:
-	.long	sysc_return + 0x80000000, sysc_leave + 0x80000000
-cleanup_table_sysc_leave:
-	.long	sysc_leave + 0x80000000, sysc_done + 0x80000000
-cleanup_table_sysc_work_loop:
-	.long	sysc_work_loop + 0x80000000, sysc_work_done + 0x80000000
-cleanup_table_io_return:
-	.long	io_return + 0x80000000, io_leave + 0x80000000
-cleanup_table_io_leave:
-	.long	io_leave + 0x80000000, io_done + 0x80000000
-cleanup_table_io_work_loop:
-	.long	io_work_loop + 0x80000000, io_work_done + 0x80000000
+cleanup_table_sysc_tif:
+	.long	sysc_tif + 0x80000000, sysc_restore + 0x80000000
+cleanup_table_sysc_restore:
+	.long	sysc_restore + 0x80000000, sysc_done + 0x80000000
+cleanup_table_io_tif:
+	.long	io_tif + 0x80000000, io_restore + 0x80000000
+cleanup_table_io_restore:
+	.long	io_restore + 0x80000000, io_done + 0x80000000
 
 cleanup_critical:
 	clc	4(4,%r12),BASED(cleanup_table_system_call)
@@ -936,49 +908,40 @@ cleanup_critical:
 	clc	4(4,%r12),BASED(cleanup_table_system_call+4)
 	bl	BASED(cleanup_system_call)
 0:
-	clc	4(4,%r12),BASED(cleanup_table_sysc_return)
-	bl	BASED(0f)
-	clc	4(4,%r12),BASED(cleanup_table_sysc_return+4)
-	bl	BASED(cleanup_sysc_return)
-0:
-	clc	4(4,%r12),BASED(cleanup_table_sysc_leave)
-	bl	BASED(0f)
-	clc	4(4,%r12),BASED(cleanup_table_sysc_leave+4)
-	bl	BASED(cleanup_sysc_leave)
-0:
-	clc	4(4,%r12),BASED(cleanup_table_sysc_work_loop)
+	clc	4(4,%r12),BASED(cleanup_table_sysc_tif)
 	bl	BASED(0f)
-	clc	4(4,%r12),BASED(cleanup_table_sysc_work_loop+4)
-	bl	BASED(cleanup_sysc_return)
+	clc	4(4,%r12),BASED(cleanup_table_sysc_tif+4)
+	bl	BASED(cleanup_sysc_tif)
 0:
-	clc	4(4,%r12),BASED(cleanup_table_io_return)
+	clc	4(4,%r12),BASED(cleanup_table_sysc_restore)
 	bl	BASED(0f)
-	clc	4(4,%r12),BASED(cleanup_table_io_return+4)
-	bl	BASED(cleanup_io_return)
+	clc	4(4,%r12),BASED(cleanup_table_sysc_restore+4)
+	bl	BASED(cleanup_sysc_restore)
 0:
-	clc	4(4,%r12),BASED(cleanup_table_io_leave)
+	clc	4(4,%r12),BASED(cleanup_table_io_tif)
 	bl	BASED(0f)
-	clc	4(4,%r12),BASED(cleanup_table_io_leave+4)
-	bl	BASED(cleanup_io_leave)
+	clc	4(4,%r12),BASED(cleanup_table_io_tif+4)
+	bl	BASED(cleanup_io_tif)
 0:
-	clc	4(4,%r12),BASED(cleanup_table_io_work_loop)
+	clc	4(4,%r12),BASED(cleanup_table_io_restore)
 	bl	BASED(0f)
-	clc	4(4,%r12),BASED(cleanup_table_io_work_loop+4)
-	bl	BASED(cleanup_io_work_loop)
+	clc	4(4,%r12),BASED(cleanup_table_io_restore+4)
+	bl	BASED(cleanup_io_restore)
 0:
 	br	%r14
 
 cleanup_system_call:
 	mvc	__LC_RETURN_PSW(8),0(%r12)
-	c	%r12,BASED(.Lmck_old_psw)
-	be	BASED(0f)
-	la	%r12,__LC_SAVE_AREA+16
-	b	BASED(1f)
-0:	la	%r12,__LC_SAVE_AREA+32
-1:
 	clc	__LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4)
 	bh	BASED(0f)
+	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+	c	%r12,BASED(.Lmck_old_psw)
+	be	BASED(0f)
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0:	c	%r12,BASED(.Lmck_old_psw)
+	la	%r12,__LC_SAVE_AREA+32
+	be	BASED(0f)
+	la	%r12,__LC_SAVE_AREA+16
 0:	clc	__LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8)
 	bhe	BASED(cleanup_vtime)
 	clc	__LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn)
@@ -1011,61 +974,54 @@ cleanup_system_call_insn:
 	.long	sysc_stime + 0x80000000
 	.long	sysc_update + 0x80000000
 
-cleanup_sysc_return:
+cleanup_sysc_tif:
 	mvc	__LC_RETURN_PSW(4),0(%r12)
-	mvc	__LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_return)
+	mvc	__LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_tif)
 	la	%r12,__LC_RETURN_PSW
 	br	%r14
 
-cleanup_sysc_leave:
-	clc	4(4,%r12),BASED(cleanup_sysc_leave_insn)
+cleanup_sysc_restore:
+	clc	4(4,%r12),BASED(cleanup_sysc_restore_insn)
 	be	BASED(2f)
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+	c	%r12,BASED(.Lmck_old_psw)
+	be	BASED(0f)
 	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	clc	4(4,%r12),BASED(cleanup_sysc_leave_insn+4)
+0:	clc	4(4,%r12),BASED(cleanup_sysc_restore_insn+4)
 	be	BASED(2f)
 	mvc	__LC_RETURN_PSW(8),SP_PSW(%r15)
 	c	%r12,BASED(.Lmck_old_psw)
-	bne	BASED(0f)
-	mvc	__LC_SAVE_AREA+32(16),SP_R12(%r15)
-	b	BASED(1f)
-0:	mvc	__LC_SAVE_AREA+16(16),SP_R12(%r15)
-1:	lm	%r0,%r11,SP_R0(%r15)
+	la	%r12,__LC_SAVE_AREA+32
+	be	BASED(1f)
+	la	%r12,__LC_SAVE_AREA+16
+1:	mvc	0(16,%r12),SP_R12(%r15)
+	lm	%r0,%r11,SP_R0(%r15)
 	l	%r15,SP_R15(%r15)
 2:	la	%r12,__LC_RETURN_PSW
 	br	%r14
-cleanup_sysc_leave_insn:
+cleanup_sysc_restore_insn:
 	.long	sysc_done - 4 + 0x80000000
 	.long	sysc_done - 8 + 0x80000000
 
-cleanup_io_return:
+cleanup_io_tif:
 	mvc	__LC_RETURN_PSW(4),0(%r12)
-	mvc	__LC_RETURN_PSW+4(4),BASED(cleanup_table_io_return)
+	mvc	__LC_RETURN_PSW+4(4),BASED(cleanup_table_io_tif)
 	la	%r12,__LC_RETURN_PSW
 	br	%r14
 
-cleanup_io_work_loop:
-	mvc	__LC_RETURN_PSW(4),0(%r12)
-	mvc	__LC_RETURN_PSW+4(4),BASED(cleanup_table_io_work_loop)
-	la	%r12,__LC_RETURN_PSW
-	br	%r14
-
-cleanup_io_leave:
-	clc	4(4,%r12),BASED(cleanup_io_leave_insn)
-	be	BASED(2f)
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	clc	4(4,%r12),BASED(cleanup_io_leave_insn+4)
-	be	BASED(2f)
+cleanup_io_restore:
+	clc	4(4,%r12),BASED(cleanup_io_restore_insn)
+	be	BASED(1f)
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+	clc	4(4,%r12),BASED(cleanup_io_restore_insn+4)
+	be	BASED(1f)
 	mvc	__LC_RETURN_PSW(8),SP_PSW(%r15)
-	c	%r12,BASED(.Lmck_old_psw)
-	bne	BASED(0f)
 	mvc	__LC_SAVE_AREA+32(16),SP_R12(%r15)
-	b	BASED(1f)
-0:	mvc	__LC_SAVE_AREA+16(16),SP_R12(%r15)
-1:	lm	%r0,%r11,SP_R0(%r15)
+	lm	%r0,%r11,SP_R0(%r15)
 	l	%r15,SP_R15(%r15)
-2:	la	%r12,__LC_RETURN_PSW
+1:	la	%r12,__LC_RETURN_PSW
 	br	%r14
-cleanup_io_leave_insn:
+cleanup_io_restore_insn:
 	.long	io_done - 4 + 0x80000000
 	.long	io_done - 8 + 0x80000000
 
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 52106d53271c..178d92536d90 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -2,7 +2,7 @@
  *  arch/s390/kernel/entry64.S
  *    S390 low-level entry points.
  *
- *    Copyright (C) IBM Corp. 1999,2006
+ *    Copyright (C) IBM Corp. 1999,2010
  *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
  *		 Hartmut Penner (hp@de.ibm.com),
  *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
@@ -59,30 +59,45 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
 
 #define BASED(name) name-system_call(%r13)
 
+	.macro	HANDLE_SIE_INTERCEPT
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+	lg	%r3,__LC_SIE_HOOK
+	ltgr	%r3,%r3
+	jz	0f
+	basr	%r14,%r3
+	0:
+#endif
+	.endm
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 	.macro	TRACE_IRQS_ON
-	 basr	%r2,%r0
-	 brasl	%r14,trace_hardirqs_on_caller
+	basr	%r2,%r0
+	brasl	%r14,trace_hardirqs_on_caller
 	.endm
 
 	.macro	TRACE_IRQS_OFF
-	 basr	%r2,%r0
-	 brasl	%r14,trace_hardirqs_off_caller
+	basr	%r2,%r0
+	brasl	%r14,trace_hardirqs_off_caller
 	.endm
 
-	.macro TRACE_IRQS_CHECK
-	basr	%r2,%r0
+	.macro TRACE_IRQS_CHECK_ON
 	tm	SP_PSW(%r15),0x03	# irqs enabled?
 	jz	0f
-	brasl	%r14,trace_hardirqs_on_caller
-	j	1f
-0:	brasl	%r14,trace_hardirqs_off_caller
-1:
+	TRACE_IRQS_ON
+0:
+	.endm
+
+	.macro TRACE_IRQS_CHECK_OFF
+	tm	SP_PSW(%r15),0x03	# irqs enabled?
+	jz	0f
+	TRACE_IRQS_OFF
+0:
 	.endm
 #else
 #define TRACE_IRQS_ON
 #define TRACE_IRQS_OFF
-#define TRACE_IRQS_CHECK
+#define TRACE_IRQS_CHECK_ON
+#define TRACE_IRQS_CHECK_OFF
 #endif
 
 #ifdef CONFIG_LOCKDEP
@@ -111,31 +126,35 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
  *    R15 - kernel stack pointer
  */
 
-	.macro	SAVE_ALL_BASE savearea
-	stmg	%r12,%r15,\savearea
-	larl	%r13,system_call
-	.endm
-
 	.macro	SAVE_ALL_SVC psworg,savearea
-	la	%r12,\psworg
+	stmg	%r11,%r15,\savearea
 	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
+	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	lg	%r11,__LC_LAST_BREAK
 	.endm
 
-	.macro	SAVE_ALL_SYNC psworg,savearea
-	la	%r12,\psworg
+	.macro	SAVE_ALL_PGM psworg,savearea
+	stmg	%r11,%r15,\savearea
 	tm	\psworg+1,0x01		# test problem state bit
-	jz	2f			# skip stack setup save
-	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
 #ifdef CONFIG_CHECK_STACK
-	j	3f
-2:	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
-	jz	stack_overflow
-3:
+	jnz	1f
+	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
+	jnz	2f
+	la	%r12,\psworg
+	j	stack_overflow
+#else
+	jz	2f
 #endif
-2:
+1:	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
+2:	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	larl	%r13,system_call
+	lg	%r11,__LC_LAST_BREAK
 	.endm
 
 	.macro	SAVE_ALL_ASYNC psworg,savearea
+	stmg	%r11,%r15,\savearea
+	larl	%r13,system_call
+	lg	%r11,__LC_LAST_BREAK
 	la	%r12,\psworg
 	tm	\psworg+1,0x01		# test problem state bit
 	jnz	1f			# from user -> load kernel stack
@@ -149,27 +168,23 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
 0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the async. stack ?
 	slgr	%r14,%r15
 	srag	%r14,%r14,STACK_SHIFT
-	jz	2f
-1:	lg	%r15,__LC_ASYNC_STACK	# load async stack
 #ifdef CONFIG_CHECK_STACK
-	j	3f
-2:	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
-	jz	stack_overflow
-3:
+	jnz	1f
+	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
+	jnz	2f
+	j	stack_overflow
+#else
+	jz	2f
 #endif
-2:
+1:	lg	%r15,__LC_ASYNC_STACK	# load async stack
+2:	aghi	%r15,-SP_SIZE		# make room for registers & psw
 	.endm
 
-	.macro	CREATE_STACK_FRAME psworg,savearea
-	aghi	%r15,-SP_SIZE		# make room for registers & psw
-	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
+	.macro	CREATE_STACK_FRAME savearea
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stg	%r2,SP_ORIG_R2(%r15)	# store original content of gpr 2
-	icm	%r12,3,__LC_SVC_ILC
-	stmg	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
-	st	%r12,SP_SVCNR(%r15)
-	mvc	SP_R12(32,%r15),\savearea # move %r12-%r15 to stack
-	la	%r12,0
-	stg	%r12,__SF_BACKCHAIN(%r15)
+	mvc	SP_R11(40,%r15),\savearea # move %r11-%r15 to stack
+	stmg	%r0,%r10,SP_R0(%r15)	# store gprs %r0-%r10 to kernel stack
 	.endm
 
 	.macro	RESTORE_ALL psworg,sync
@@ -185,6 +200,13 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
 	lpswe	\psworg			# back to caller
 	.endm
 
+	.macro	LAST_BREAK
+	srag	%r10,%r11,23
+	jz	0f
+	stg	%r11,__TI_last_break(%r12)
+0:
+	.endm
+
 /*
  * Scheduler resume function, called by switch_to
  *  gpr2 = (task_struct *) prev
@@ -230,143 +252,129 @@ __critical_start:
 system_call:
 	stpt	__LC_SYNC_ENTER_TIMER
 sysc_saveall:
-	SAVE_ALL_BASE __LC_SAVE_AREA
 	SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	llgh	%r7,__LC_SVC_INT_CODE	# get svc number from lowcore
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
+	stg	%r7,SP_ARGS(%r15)
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 sysc_vtime:
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 sysc_stime:
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 sysc_update:
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	LAST_BREAK
 sysc_do_svc:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	ltgr	%r7,%r7		# test for svc 0
+	llgh	%r7,SP_SVCNR(%r15)
+	slag	%r7,%r7,2	# shift and test for svc 0
 	jnz	sysc_nr_ok
 	# svc 0: system call number in %r1
-	cl	%r1,BASED(.Lnr_syscalls)
+	llgfr	%r1,%r1		# clear high word in r1
+	cghi	%r1,NR_syscalls
 	jnl	sysc_nr_ok
-	lgfr	%r7,%r1 	# clear high word in r1
+	sth	%r1,SP_SVCNR(%r15)
+	slag	%r7,%r1,2	# shift and test for svc 0
 sysc_nr_ok:
-	mvc	SP_ARGS(8,%r15),SP_R7(%r15)
-sysc_do_restart:
-	sth	%r7,SP_SVCNR(%r15)
-	sllg	%r7,%r7,2	# svc number * 4
 	larl	%r10,sys_call_table
 #ifdef CONFIG_COMPAT
-	tm	__TI_flags+5(%r9),(_TIF_31BIT>>16)  # running in 31 bit mode ?
+	tm	__TI_flags+5(%r12),(_TIF_31BIT>>16)  # running in 31 bit mode ?
 	jno	sysc_noemu
 	larl	%r10,sys_call_table_emu  # use 31 bit emulation system calls
 sysc_noemu:
 #endif
-	tm	__TI_flags+6(%r9),_TIF_SYSCALL
+	tm	__TI_flags+6(%r12),_TIF_SYSCALL
 	lgf	%r8,0(%r7,%r10) # load address of system call routine
 	jnz	sysc_tracesys
 	basr	%r14,%r8	# call sys_xxxx
 	stg	%r2,SP_R2(%r15) # store return value (change R2 on stack)
 
 sysc_return:
-	tm	__TI_flags+7(%r9),_TIF_WORK_SVC
+	LOCKDEP_SYS_EXIT
+sysc_tif:
+	tm	__TI_flags+7(%r12),_TIF_WORK_SVC
 	jnz	sysc_work	# there is work to do (signals etc.)
 sysc_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	larl	%r1,sysc_restore_trace_psw
-	lpswe	0(%r1)
-sysc_restore_trace:
-	TRACE_IRQS_CHECK
-	LOCKDEP_SYS_EXIT
-#endif
-sysc_leave:
 	RESTORE_ALL __LC_RETURN_PSW,1
 sysc_done:
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-	.section .data,"aw",@progbits
-	.align	8
-	.globl sysc_restore_trace_psw
-sysc_restore_trace_psw:
-	.quad	0, sysc_restore_trace
-	.previous
-#endif
-
-#
-# recheck if there is more work to do
 #
-sysc_work_loop:
-	tm	__TI_flags+7(%r9),_TIF_WORK_SVC
-	jz	sysc_restore	  # there is no work to do
-#
-# One of the work bits is on. Find out which one.
+# There is work to do, but first we need to check if we return to userspace.
 #
 sysc_work:
 	tm	SP_PSW+1(%r15),0x01	# returning to user ?
 	jno	sysc_restore
-	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
+
+#
+# One of the work bits is on. Find out which one.
+#
+sysc_work_tif:
+	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
 	jo	sysc_mcck_pending
-	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	sysc_reschedule
-	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
-	jnz	sysc_sigpending
-	tm	__TI_flags+7(%r9),_TIF_NOTIFY_RESUME
-	jnz	sysc_notify_resume
-	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	jo	sysc_sigpending
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	jo	sysc_notify_resume
+	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
 	jo	sysc_restart
-	tm	__TI_flags+7(%r9),_TIF_SINGLE_STEP
+	tm	__TI_flags+7(%r12),_TIF_SINGLE_STEP
 	jo	sysc_singlestep
-	j	sysc_restore
-sysc_work_done:
+	j	sysc_return		# beware of critical section cleanup
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
 sysc_reschedule:
-	larl	%r14,sysc_work_loop
-	jg	schedule	# return point is sysc_return
+	larl	%r14,sysc_return
+	jg	schedule		# return point is sysc_return
 
 #
 # _TIF_MCCK_PENDING is set, call handler
 #
 sysc_mcck_pending:
-	larl	%r14,sysc_work_loop
+	larl	%r14,sysc_return
 	jg	s390_handle_mcck	# TIF bit will be cleared by handler
 
 #
 # _TIF_SIGPENDING is set, call do_signal
 #
 sysc_sigpending:
-	ni	__TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
+	ni	__TI_flags+7(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_signal		# call do_signal
-	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
+	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
 	jo	sysc_restart
-	tm	__TI_flags+7(%r9),_TIF_SINGLE_STEP
+	tm	__TI_flags+7(%r12),_TIF_SINGLE_STEP
 	jo	sysc_singlestep
-	j	sysc_work_loop
+	j	sysc_return
 
 #
 # _TIF_NOTIFY_RESUME is set, call do_notify_resume
 #
 sysc_notify_resume:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
-	larl	%r14,sysc_work_loop
+	larl	%r14,sysc_return
 	jg	do_notify_resume	# call do_notify_resume
 
 #
 # _TIF_RESTART_SVC is set, set up registers and restart svc
 #
 sysc_restart:
-	ni	__TI_flags+7(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
+	ni	__TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
 	lg	%r7,SP_R2(%r15)		# load new svc number
 	mvc	SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument
 	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
-	j	sysc_do_restart 	# restart svc
+	sth	%r7,SP_SVCNR(%r15)
+	slag	%r7,%r7,2
+	j	sysc_nr_ok		# restart svc
 
 #
 # _TIF_SINGLE_STEP is set, call do_single_step
 #
 sysc_singlestep:
-	ni	__TI_flags+7(%r9),255-_TIF_SINGLE_STEP	# clear TIF_SINGLE_STEP
+	ni	__TI_flags+7(%r12),255-_TIF_SINGLE_STEP	# clear TIF_SINGLE_STEP
 	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)		# clear svc number
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	larl	%r14,sysc_return	# load adr. of system return
@@ -379,8 +387,8 @@ sysc_singlestep:
 sysc_tracesys:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	la	%r3,0
-	srl	%r7,2
-	stg	%r7,SP_R2(%r15)
+	llgh	%r0,SP_SVCNR(%r15)
+	stg	%r0,SP_R2(%r15)
 	brasl	%r14,do_syscall_trace_enter
 	lghi	%r0,NR_syscalls
 	clgr	%r0,%r2
@@ -393,7 +401,7 @@ sysc_tracego:
 	basr	%r14,%r8		# call sys_xxx
 	stg	%r2,SP_R2(%r15)		# store return value
 sysc_tracenogo:
-	tm	__TI_flags+6(%r9),_TIF_SYSCALL
+	tm	__TI_flags+6(%r12),_TIF_SYSCALL
 	jz	sysc_return
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	larl	%r14,sysc_return	# return point is sysc_return
@@ -405,7 +413,7 @@ sysc_tracenogo:
 	.globl	ret_from_fork
 ret_from_fork:
 	lg	%r13,__LC_SVC_NEW_PSW+8
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# forking a kernel thread ?
 	jo	0f
 	stg	%r15,SP_R15(%r15)	# store stack pointer for new kthread
@@ -435,12 +443,14 @@ kernel_execve:
 	br	%r14
 	# execve succeeded.
 0:	stnsm	__SF_EMPTY(%r15),0xfc	# disable interrupts
+#	TRACE_IRQS_OFF
 	lg	%r15,__LC_KERNEL_STACK	# load ksp
 	aghi	%r15,-SP_SIZE		# make room for registers & psw
 	lg	%r13,__LC_SVC_NEW_PSW+8
-	lg	%r9,__LC_THREAD_INFO
 	mvc	SP_PTREGS(__PT_SIZE,%r15),0(%r12)	# copy pt_regs
+	lg	%r12,__LC_THREAD_INFO
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+#	TRACE_IRQS_ON
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	brasl	%r14,execve_tail
 	j	sysc_return
@@ -465,20 +475,23 @@ pgm_check_handler:
  * for LPSW?).
  */
 	stpt	__LC_SYNC_ENTER_TIMER
-	SAVE_ALL_BASE __LC_SAVE_AREA
 	tm	__LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception
 	jnz	pgm_per 		 # got per exception -> special case
-	SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	xc	SP_ILC(4,%r15),SP_ILC(%r15)
+	mvc	SP_PSW(16,%r15),__LC_PGM_OLD_PSW
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	pgm_no_vtime
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	LAST_BREAK
 pgm_no_vtime:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
-	TRACE_IRQS_OFF
+	HANDLE_SIE_INTERCEPT
+	TRACE_IRQS_CHECK_OFF
+	stg	%r11,SP_ARGS(%r15)
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lghi	%r8,0x7f
 	ngr	%r8,%r3
@@ -487,8 +500,10 @@ pgm_do_call:
 	larl	%r1,pgm_check_table
 	lg	%r1,0(%r8,%r1)		# load address of handler routine
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
-	larl	%r14,sysc_return
-	br	%r1			# branch to interrupt-handler
+	basr	%r14,%r1		# branch to interrupt-handler
+pgm_exit:
+	TRACE_IRQS_CHECK_ON
+	j	sysc_return
 
 #
 # handle per exception
@@ -500,55 +515,60 @@ pgm_per:
 	clc	__LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW
 	je	pgm_svcper
 # no interesting special case, ignore PER event
-	lmg	%r12,%r15,__LC_SAVE_AREA
 	lpswe	__LC_PGM_OLD_PSW
 
 #
 # Normal per exception
 #
 pgm_per_std:
-	SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_PGM_OLD_PSW
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	pgm_no_vtime2
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	LAST_BREAK
 pgm_no_vtime2:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	TRACE_IRQS_OFF
-	lg	%r1,__TI_task(%r9)
+	HANDLE_SIE_INTERCEPT
+	TRACE_IRQS_CHECK_OFF
+	lg	%r1,__TI_task(%r12)
 	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
 	jz	kernel_per
 	mvc	__THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
 	mvc	__THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
-	oi	__TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	oi	__TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lghi	%r8,0x7f
 	ngr	%r8,%r3			# clear per-event-bit and ilc
-	je	sysc_return
+	je	pgm_exit
 	j	pgm_do_call
 
 #
 # it was a single stepped SVC that is causing all the trouble
 #
 pgm_svcper:
-	SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
+	SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	llgh	%r7,__LC_SVC_INT_CODE	# get svc number from lowcore
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	lg	%r8,__TI_task(%r9)
+	LAST_BREAK
+	TRACE_IRQS_OFF
+	lg	%r8,__TI_task(%r12)
 	mvc	__THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID
 	mvc	__THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID
-	oi	__TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	oi	__TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
 	TRACE_IRQS_ON
-	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
+	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	j	sysc_do_svc
 
 #
@@ -557,8 +577,8 @@ pgm_svcper:
 kernel_per:
 	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)	# clear svc number
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
-	larl	%r14,sysc_restore	# load adr. of system ret, no work
-	jg	do_single_step		# branch to do_single_step
+	brasl	%r14,do_single_step
+	j	pgm_exit
 
 /*
  * IO interrupt handler routine
@@ -567,162 +587,133 @@ kernel_per:
 io_int_handler:
 	stck	__LC_INT_CLOCK
 	stpt	__LC_ASYNC_ENTER_TIMER
-	SAVE_ALL_BASE __LC_SAVE_AREA+32
-	SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
-	CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
+	SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+40
+	CREATE_STACK_FRAME __LC_SAVE_AREA+40
+	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	io_no_vtime
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK
 io_no_vtime:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_OFF
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	brasl	%r14,do_IRQ		# call standard irq handler
 io_return:
-	tm	__TI_flags+7(%r9),_TIF_WORK_INT
+	LOCKDEP_SYS_EXIT
+	TRACE_IRQS_ON
+io_tif:
+	tm	__TI_flags+7(%r12),_TIF_WORK_INT
 	jnz	io_work 		# there is work to do (signals etc.)
 io_restore:
-#ifdef CONFIG_TRACE_IRQFLAGS
-	larl	%r1,io_restore_trace_psw
-	lpswe	0(%r1)
-io_restore_trace:
-	TRACE_IRQS_CHECK
-	LOCKDEP_SYS_EXIT
-#endif
-io_leave:
 	RESTORE_ALL __LC_RETURN_PSW,0
 io_done:
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-	.section .data,"aw",@progbits
-	.align	8
-	.globl io_restore_trace_psw
-io_restore_trace_psw:
-	.quad	0, io_restore_trace
-	.previous
-#endif
-
 #
-# There is work todo, we need to check if we return to userspace, then
-# check, if we are in SIE, if yes leave it
+# There is work todo, find out in which context we have been interrupted:
+# 1) if we return to user space we can do all _TIF_WORK_INT work
+# 2) if we return to kernel code and kvm is enabled check if we need to
+#    modify the psw to leave SIE
+# 3) if we return to kernel code and preemptive scheduling is enabled check
+#    the preemption counter and if it is zero call preempt_schedule_irq
+# Before any work can be done, a switch to the kernel stack is required.
 #
 io_work:
 	tm	SP_PSW+1(%r15),0x01	# returning to user ?
-#ifndef CONFIG_PREEMPT
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
-	jnz	io_work_user		# yes -> no need to check for SIE
-	la	%r1, BASED(sie_opcode)	# we return to kernel here
-	lg	%r2, SP_PSW+8(%r15)
-	clc	0(2,%r1), 0(%r2)	# is current instruction = SIE?
-	jne	io_restore		# no-> return to kernel
-	lg	%r1, SP_PSW+8(%r15)	# yes-> add 4 bytes to leave SIE
-	aghi	%r1, 4
-	stg	%r1, SP_PSW+8(%r15)
-	j	io_restore		# return to kernel
-#else
-	jno	io_restore		# no-> skip resched & signal
-#endif
-#else
-	jnz	io_work_user		# yes -> do resched & signal
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
-	la	%r1, BASED(sie_opcode)
-	lg	%r2, SP_PSW+8(%r15)
-	clc	0(2,%r1), 0(%r2)	# is current instruction = SIE?
-	jne	0f			# no -> leave PSW alone
-	lg	%r1, SP_PSW+8(%r15)	# yes-> add 4 bytes to leave SIE
-	aghi	%r1, 4
-	stg	%r1, SP_PSW+8(%r15)
-0:
-#endif
+	jo	io_work_user		# yes -> do resched & signal
+#ifdef CONFIG_PREEMPT
 	# check for preemptive scheduling
-	icm	%r0,15,__TI_precount(%r9)
+	icm	%r0,15,__TI_precount(%r12)
 	jnz	io_restore		# preemption is disabled
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
+	jno	io_restore
 	# switch to kernel stack
 	lg	%r1,SP_R15(%r15)
 	aghi	%r1,-SP_SIZE
 	mvc	SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain
 	lgr	%r15,%r1
-io_resume_loop:
-	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
-	jno	io_restore
-	larl	%r14,io_resume_loop
-	jg	preempt_schedule_irq
+	# TRACE_IRQS_ON already done at io_return, call
+	# TRACE_IRQS_OFF to keep things symmetrical
+	TRACE_IRQS_OFF
+	brasl	%r14,preempt_schedule_irq
+	j	io_return
+#else
+	j	io_restore
 #endif
 
+#
+# Need to do work before returning to userspace, switch to kernel stack
+#
 io_work_user:
 	lg	%r1,__LC_KERNEL_STACK
 	aghi	%r1,-SP_SIZE
 	mvc	SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain
 	lgr	%r15,%r1
+
 #
 # One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_RESTORE_SIGPENDING, _TIF_NEED_RESCHED
+# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
 #	       and _TIF_MCCK_PENDING
 #
-io_work_loop:
-	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
+io_work_tif:
+	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
 	jo	io_mcck_pending
-	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	io_reschedule
-	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
-	jnz	io_sigpending
-	tm	__TI_flags+7(%r9),_TIF_NOTIFY_RESUME
-	jnz	io_notify_resume
-	j	io_restore
-io_work_done:
-
-#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
-sie_opcode:
-	.long 0xb2140000
-#endif
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
+	jo	io_sigpending
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
+	jo	io_notify_resume
+	j	io_return		# beware of critical section cleanup
 
 #
 # _TIF_MCCK_PENDING is set, call handler
 #
 io_mcck_pending:
+	# TRACE_IRQS_ON already done at io_return
 	brasl	%r14,s390_handle_mcck	# TIF bit will be cleared by handler
-	j	io_work_loop
+	TRACE_IRQS_OFF
+	j	io_return
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
 io_reschedule:
-	TRACE_IRQS_ON
+	# TRACE_IRQS_ON already done at io_return
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	brasl	%r14,schedule		# call scheduler
 	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	tm	__TI_flags+7(%r9),_TIF_WORK_INT
-	jz	io_restore		# there is no work to do
-	j	io_work_loop
+	j	io_return
 
 #
 # _TIF_SIGPENDING or is set, call do_signal
 #
 io_sigpending:
-	TRACE_IRQS_ON
+	# TRACE_IRQS_ON already done at io_return
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_signal		# call do_signal
 	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_work_loop
+	j	io_return
 
 #
 # _TIF_NOTIFY_RESUME or is set, call do_notify_resume
 #
 io_notify_resume:
-	TRACE_IRQS_ON
+	# TRACE_IRQS_ON already done at io_return
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_notify_resume	# call do_notify_resume
 	stnsm	__SF_EMPTY(%r15),0xfc	# disable I/O and ext. interrupts
 	TRACE_IRQS_OFF
-	j	io_work_loop
+	j	io_return
 
 /*
  * External interrupt handler routine
@@ -731,16 +722,18 @@ io_notify_resume:
 ext_int_handler:
 	stck	__LC_INT_CLOCK
 	stpt	__LC_ASYNC_ENTER_TIMER
-	SAVE_ALL_BASE __LC_SAVE_AREA+32
-	SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
-	CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
+	SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+40
+	CREATE_STACK_FRAME __LC_SAVE_AREA+40
+	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	ext_no_vtime
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK
 ext_no_vtime:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_OFF
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	llgh	%r3,__LC_EXT_INT_CODE	# get interruption code
@@ -754,17 +747,18 @@ __critical_end:
  */
 	.globl mcck_int_handler
 mcck_int_handler:
-	stck	__LC_INT_CLOCK
+	stck	__LC_MCCK_CLOCK
 	la	%r1,4095		# revalidate r1
 	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# revalidate cpu timer
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
-	SAVE_ALL_BASE __LC_SAVE_AREA+64
+	stmg	%r11,%r15,__LC_SAVE_AREA+80
+	larl	%r13,system_call
+	lg	%r11,__LC_LAST_BREAK
 	la	%r12,__LC_MCK_OLD_PSW
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	mcck_int_main		# yes -> rest of mcck code invalid
 	la	%r14,4095
-	mvc	__LC_SAVE_AREA+104(8),__LC_ASYNC_ENTER_TIMER
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14)
 	tm	__LC_MCCK_CODE+5,0x02	# stored cpu timer value valid?
 	jo	1f
 	la	%r14,__LC_SYNC_ENTER_TIMER
@@ -778,7 +772,7 @@ mcck_int_handler:
 	jl	0f
 	la	%r14,__LC_LAST_UPDATE_TIMER
 0:	spt	0(%r14)
-	mvc	__LC_ASYNC_ENTER_TIMER(8),0(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
 1:	tm	__LC_MCCK_CODE+2,0x09	# mwp + ia of old psw valid?
 	jno	mcck_int_main		# no -> skip cleanup critical
 	tm	__LC_MCK_OLD_PSW+1,0x01 # test problem state bit
@@ -794,16 +788,19 @@ mcck_int_main:
 	srag	%r14,%r14,PAGE_SHIFT
 	jz	0f
 	lg	%r15,__LC_PANIC_STACK	# load panic stack
-0:	CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64
+0:	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	CREATE_STACK_FRAME __LC_SAVE_AREA+80
+	mvc	SP_PSW(16,%r15),0(%r12)
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	__LC_MCCK_CODE+2,0x08	# mwp of old psw valid?
 	jno	mcck_no_vtime		# no -> no timer update
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	mcck_no_vtime
-	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
+	UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER
+	LAST_BREAK
 mcck_no_vtime:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,s390_do_machine_check
 	tm	SP_PSW+1(%r15),0x01	# returning to user ?
@@ -814,8 +811,9 @@ mcck_no_vtime:
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain
 	lgr	%r15,%r1
 	stosm	__SF_EMPTY(%r15),0x04	# turn dat on
-	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
+	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
 	jno	mcck_return
+	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_OFF
 	brasl	%r14,s390_handle_mcck
 	TRACE_IRQS_ON
@@ -823,11 +821,11 @@ mcck_return:
 	mvc	__LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW
 	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
 	lmg	%r0,%r15,SP_R0(%r15)	# load gprs 0-15
-	mvc	__LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+104
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	jno	0f
 	stpt	__LC_EXIT_TIMER
 0:	lpswe	__LC_RETURN_MCCK_PSW	# back to caller
+mcck_done:
 
 /*
  * Restart interruption handler, kick starter for additional CPUs
@@ -883,14 +881,14 @@ stack_overflow:
 	lg	%r15,__LC_PANIC_STACK	# change to panic stack
 	aghi	%r15,-SP_SIZE
 	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
-	stmg	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
+	stmg	%r0,%r10,SP_R0(%r15)	# store gprs %r0-%r10 to kernel stack
 	la	%r1,__LC_SAVE_AREA
 	chi	%r12,__LC_SVC_OLD_PSW
 	je	0f
 	chi	%r12,__LC_PGM_OLD_PSW
 	je	0f
-	la	%r1,__LC_SAVE_AREA+32
-0:	mvc	SP_R12(32,%r15),0(%r1)	# move %r12-%r15 to stack
+	la	%r1,__LC_SAVE_AREA+40
+0:	mvc	SP_R11(40,%r15),0(%r1)	# move %r11-%r15 to stack
 	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
@@ -899,18 +897,14 @@ stack_overflow:
 
 cleanup_table_system_call:
 	.quad	system_call, sysc_do_svc
-cleanup_table_sysc_return:
-	.quad	sysc_return, sysc_leave
-cleanup_table_sysc_leave:
-	.quad	sysc_leave, sysc_done
-cleanup_table_sysc_work_loop:
-	.quad	sysc_work_loop, sysc_work_done
-cleanup_table_io_return:
-	.quad	io_return, io_leave
-cleanup_table_io_leave:
-	.quad	io_leave, io_done
-cleanup_table_io_work_loop:
-	.quad	io_work_loop, io_work_done
+cleanup_table_sysc_tif:
+	.quad	sysc_tif, sysc_restore
+cleanup_table_sysc_restore:
+	.quad	sysc_restore, sysc_done
+cleanup_table_io_tif:
+	.quad	io_tif, io_restore
+cleanup_table_io_restore:
+	.quad	io_restore, io_done
 
 cleanup_critical:
 	clc	8(8,%r12),BASED(cleanup_table_system_call)
@@ -918,61 +912,54 @@ cleanup_critical:
 	clc	8(8,%r12),BASED(cleanup_table_system_call+8)
 	jl	cleanup_system_call
 0:
-	clc	8(8,%r12),BASED(cleanup_table_sysc_return)
-	jl	0f
-	clc	8(8,%r12),BASED(cleanup_table_sysc_return+8)
-	jl	cleanup_sysc_return
-0:
-	clc	8(8,%r12),BASED(cleanup_table_sysc_leave)
+	clc	8(8,%r12),BASED(cleanup_table_sysc_tif)
 	jl	0f
-	clc	8(8,%r12),BASED(cleanup_table_sysc_leave+8)
-	jl	cleanup_sysc_leave
+	clc	8(8,%r12),BASED(cleanup_table_sysc_tif+8)
+	jl	cleanup_sysc_tif
 0:
-	clc	8(8,%r12),BASED(cleanup_table_sysc_work_loop)
+	clc	8(8,%r12),BASED(cleanup_table_sysc_restore)
 	jl	0f
-	clc	8(8,%r12),BASED(cleanup_table_sysc_work_loop+8)
-	jl	cleanup_sysc_return
+	clc	8(8,%r12),BASED(cleanup_table_sysc_restore+8)
+	jl	cleanup_sysc_restore
 0:
-	clc	8(8,%r12),BASED(cleanup_table_io_return)
+	clc	8(8,%r12),BASED(cleanup_table_io_tif)
 	jl	0f
-	clc	8(8,%r12),BASED(cleanup_table_io_return+8)
-	jl	cleanup_io_return
+	clc	8(8,%r12),BASED(cleanup_table_io_tif+8)
+	jl	cleanup_io_tif
 0:
-	clc	8(8,%r12),BASED(cleanup_table_io_leave)
+	clc	8(8,%r12),BASED(cleanup_table_io_restore)
 	jl	0f
-	clc	8(8,%r12),BASED(cleanup_table_io_leave+8)
-	jl	cleanup_io_leave
-0:
-	clc	8(8,%r12),BASED(cleanup_table_io_work_loop)
-	jl	0f
-	clc	8(8,%r12),BASED(cleanup_table_io_work_loop+8)
-	jl	cleanup_io_work_loop
+	clc	8(8,%r12),BASED(cleanup_table_io_restore+8)
+	jl	cleanup_io_restore
 0:
 	br	%r14
 
 cleanup_system_call:
 	mvc	__LC_RETURN_PSW(16),0(%r12)
-	cghi	%r12,__LC_MCK_OLD_PSW
-	je	0f
-	la	%r12,__LC_SAVE_AREA+32
-	j	1f
-0:	la	%r12,__LC_SAVE_AREA+64
-1:
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8)
 	jh	0f
+	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
+	cghi	%r12,__LC_MCK_OLD_PSW
+	je	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
+0:	cghi	%r12,__LC_MCK_OLD_PSW
+	la	%r12,__LC_SAVE_AREA+80
+	je	0f
+	la	%r12,__LC_SAVE_AREA+40
 0:	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16)
 	jhe	cleanup_vtime
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn)
 	jh	0f
-	mvc	__LC_SAVE_AREA(32),0(%r12)
-0:	stg	%r13,8(%r12)
-	stg	%r12,__LC_SAVE_AREA+96	# argh
-	SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	lg	%r12,__LC_SAVE_AREA+96	# argh
-	stg	%r15,24(%r12)
-	llgh	%r7,__LC_SVC_INT_CODE
+	mvc	__LC_SAVE_AREA(40),0(%r12)
+0:	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
+	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	stg	%r15,32(%r12)
+	stg	%r11,0(%r12)
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
+	stg	%r7,SP_ARGS(%r15)
+	mvc	8(8,%r12),__LC_THREAD_INFO
 cleanup_vtime:
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24)
 	jhe	cleanup_stime
@@ -983,7 +970,11 @@ cleanup_stime:
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 cleanup_update:
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8)
+	srag	%r12,%r11,23
+	lg	%r12,__LC_THREAD_INFO
+	jz	0f
+	stg	%r11,__TI_last_break(%r12)
+0:	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8)
 	la	%r12,__LC_RETURN_PSW
 	br	%r14
 cleanup_system_call_insn:
@@ -993,61 +984,54 @@ cleanup_system_call_insn:
 	.quad	sysc_stime
 	.quad	sysc_update
 
-cleanup_sysc_return:
+cleanup_sysc_tif:
 	mvc	__LC_RETURN_PSW(8),0(%r12)
-	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_return)
+	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_tif)
 	la	%r12,__LC_RETURN_PSW
 	br	%r14
 
-cleanup_sysc_leave:
-	clc	8(8,%r12),BASED(cleanup_sysc_leave_insn)
-	je	3f
-	clc	8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
+cleanup_sysc_restore:
+	clc	8(8,%r12),BASED(cleanup_sysc_restore_insn)
+	je	2f
+	clc	8(8,%r12),BASED(cleanup_sysc_restore_insn+8)
 	jhe	0f
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
+	cghi	%r12,__LC_MCK_OLD_PSW
+	je	0f
 	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
 0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
 	cghi	%r12,__LC_MCK_OLD_PSW
-	jne	1f
-	mvc	__LC_SAVE_AREA+64(32),SP_R12(%r15)
-	j	2f
-1:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
-2:	lmg	%r0,%r11,SP_R0(%r15)
+	la	%r12,__LC_SAVE_AREA+80
+	je	1f
+	la	%r12,__LC_SAVE_AREA+40
+1:	mvc	0(40,%r12),SP_R11(%r15)
+	lmg	%r0,%r10,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
-3:	la	%r12,__LC_RETURN_PSW
+2:	la	%r12,__LC_RETURN_PSW
 	br	%r14
-cleanup_sysc_leave_insn:
+cleanup_sysc_restore_insn:
 	.quad	sysc_done - 4
 	.quad	sysc_done - 16
 
-cleanup_io_return:
-	mvc	__LC_RETURN_PSW(8),0(%r12)
-	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_io_return)
-	la	%r12,__LC_RETURN_PSW
-	br	%r14
-
-cleanup_io_work_loop:
+cleanup_io_tif:
 	mvc	__LC_RETURN_PSW(8),0(%r12)
-	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_io_work_loop)
+	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_io_tif)
 	la	%r12,__LC_RETURN_PSW
 	br	%r14
 
-cleanup_io_leave:
-	clc	8(8,%r12),BASED(cleanup_io_leave_insn)
-	je	3f
-	clc	8(8,%r12),BASED(cleanup_io_leave_insn+8)
+cleanup_io_restore:
+	clc	8(8,%r12),BASED(cleanup_io_restore_insn)
+	je	1f
+	clc	8(8,%r12),BASED(cleanup_io_restore_insn+8)
 	jhe	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
 0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
-	cghi	%r12,__LC_MCK_OLD_PSW
-	jne	1f
-	mvc	__LC_SAVE_AREA+64(32),SP_R12(%r15)
-	j	2f
-1:	mvc	__LC_SAVE_AREA+32(32),SP_R12(%r15)
-2:	lmg	%r0,%r11,SP_R0(%r15)
+	mvc	__LC_SAVE_AREA+80(40),SP_R11(%r15)
+	lmg	%r0,%r10,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
-3:	la	%r12,__LC_RETURN_PSW
+1:	la	%r12,__LC_RETURN_PSW
 	br	%r14
-cleanup_io_leave_insn:
+cleanup_io_restore_insn:
 	.quad	io_done - 4
 	.quad	io_done - 16
 
@@ -1055,13 +1039,6 @@ cleanup_io_leave_insn:
  * Integer constants
  */
 		.align	4
-.Lconst:
-.Lnr_syscalls:	.long	NR_syscalls
-.L0x0130:	.short	0x130
-.L0x0140:	.short	0x140
-.L0x0150:	.short	0x150
-.L0x0160:	.short	0x160
-.L0x0170:	.short	0x170
 .Lcritical_start:
 		.quad	__critical_start
 .Lcritical_end:
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 9d1f76702d47..51838ad42d56 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -328,8 +328,8 @@ iplstart:
 #
 # reset files in VM reader
 #
-	stidp	__LC_CPUID		# store cpuid
-	tm	__LC_CPUID,0xff 	# running VM ?
+	stidp	__LC_SAVE_AREA		# store cpuid
+	tm	__LC_SAVE_AREA,0xff	# running VM ?
 	bno	.Lnoreset
 	la	%r2,.Lreset
 	lhi	%r3,26
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 015e27da40eb..ac151399ef34 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -255,7 +255,8 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	int umode;
 
 	nmi_enter();
-	s390_idle_check();
+	s390_idle_check(regs, S390_lowcore.mcck_clock,
+			S390_lowcore.mcck_enter_timer);
 
 	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
 	mcck = &__get_cpu_var(cpu_mcck);
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 0729f36c2fe3..ecb2d02b02e4 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -18,24 +18,42 @@
 #include <asm/lowcore.h>
 #include <asm/param.h>
 
+static DEFINE_PER_CPU(struct cpuid, cpu_id);
+
+/*
+ * cpu_init - initializes state that is per-CPU.
+ */
+void __cpuinit cpu_init(void)
+{
+	struct cpuid *id = &per_cpu(cpu_id, smp_processor_id());
+
+	get_cpu_id(id);
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	BUG_ON(current->mm);
+	enter_lazy_tlb(&init_mm, current);
+}
+
+/*
+ * print_cpu_info - print basic information about a cpu
+ */
 void __cpuinit print_cpu_info(void)
 {
+	struct cpuid *id = &per_cpu(cpu_id, smp_processor_id());
+
 	pr_info("Processor %d started, address %d, identification %06X\n",
-		S390_lowcore.cpu_nr, S390_lowcore.cpu_addr,
-		S390_lowcore.cpu_id.ident);
+		S390_lowcore.cpu_nr, S390_lowcore.cpu_addr, id->ident);
 }
 
 /*
  * show_cpuinfo - Get information on one CPU for use by procfs.
  */
-
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	static const char *hwcap_str[10] = {
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
 		"edat", "etf3eh", "highgprs"
 	};
-	struct _lowcore *lc;
 	unsigned long n = (unsigned long) v - 1;
 	int i;
 
@@ -55,19 +73,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	}
 
 	if (cpu_online(n)) {
-#ifdef CONFIG_SMP
-		lc = (smp_processor_id() == n) ?
-			&S390_lowcore : lowcore_ptr[n];
-#else
-		lc = &S390_lowcore;
-#endif
+		struct cpuid *id = &per_cpu(cpu_id, n);
 		seq_printf(m, "processor %li: "
 			   "version = %02X,  "
 			   "identification = %06X,  "
 			   "machine = %04X\n",
-			   n, lc->cpu_id.version,
-			   lc->cpu_id.ident,
-			   lc->cpu_id.machine);
+			   n, id->version, id->ident, id->machine);
 	}
 	preempt_enable();
 	return 0;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 9f654da4cecc..83339d33c4b1 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -57,6 +57,7 @@
 enum s390_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
+	REGSET_LAST_BREAK,
 	REGSET_GENERAL_EXTENDED,
 };
 
@@ -381,6 +382,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			copied += sizeof(unsigned long);
 		}
 		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		put_user(task_thread_info(child)->last_break,
+			 (unsigned long __user *) data);
+		return 0;
 	default:
 		/* Removing high order bit from addr (only for 31 bit). */
 		addr &= PSW_ADDR_INSN;
@@ -633,6 +638,10 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			copied += sizeof(unsigned int);
 		}
 		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		put_user(task_thread_info(child)->last_break,
+			 (unsigned int __user *) data);
+		return 0;
 	}
 	return compat_ptrace_request(child, request, addr, data);
 }
@@ -797,6 +806,28 @@ static int s390_fpregs_set(struct task_struct *target,
 	return rc;
 }
 
+#ifdef CONFIG_64BIT
+
+static int s390_last_break_get(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       void *kbuf, void __user *ubuf)
+{
+	if (count > 0) {
+		if (kbuf) {
+			unsigned long *k = kbuf;
+			*k = task_thread_info(target)->last_break;
+		} else {
+			unsigned long  __user *u = ubuf;
+			if (__put_user(task_thread_info(target)->last_break, u))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+#endif
+
 static const struct user_regset s390_regsets[] = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
@@ -814,6 +845,15 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
+#ifdef CONFIG_64BIT
+	[REGSET_LAST_BREAK] = {
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_last_break_get,
+	},
+#endif
 };
 
 static const struct user_regset_view user_s390_view = {
@@ -948,6 +988,27 @@ static int s390_compat_regs_high_set(struct task_struct *target,
 	return rc;
 }
 
+static int s390_compat_last_break_get(struct task_struct *target,
+				      const struct user_regset *regset,
+				      unsigned int pos, unsigned int count,
+				      void *kbuf, void __user *ubuf)
+{
+	compat_ulong_t last_break;
+
+	if (count > 0) {
+		last_break = task_thread_info(target)->last_break;
+		if (kbuf) {
+			unsigned long *k = kbuf;
+			*k = last_break;
+		} else {
+			unsigned long  __user *u = ubuf;
+			if (__put_user(last_break, u))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
 static const struct user_regset s390_compat_regsets[] = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
@@ -965,6 +1026,13 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
+	[REGSET_LAST_BREAK] = {
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_compat_last_break_get,
+	},
 	[REGSET_GENERAL_EXTENDED] = {
 		.core_note_type = NT_S390_HIGH_GPRS,
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index 59618bcd99b7..9ce641b5291f 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -120,7 +120,8 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code)
 	struct pt_regs *old_regs;
 
 	old_regs = set_irq_regs(regs);
-	s390_idle_check();
+	s390_idle_check(regs, S390_lowcore.int_clock,
+			S390_lowcore.async_enter_timer);
 	irq_enter();
 	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
 		/* Serve timer interrupts first. */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 91625f759ccd..7d893248d265 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -2,7 +2,7 @@
  *  arch/s390/kernel/setup.c
  *
  *  S390 version
- *    Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *    Copyright (C) IBM Corp. 1999,2010
  *    Author(s): Hartmut Penner (hp@de.ibm.com),
  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
  *
@@ -113,22 +113,6 @@ static struct resource data_resource = {
 };
 
 /*
- * cpu_init() initializes state that is per-CPU.
- */
-void __cpuinit cpu_init(void)
-{
-        /*
-         * Store processor id in lowcore (used e.g. in timer_interrupt)
-         */
-	get_cpu_id(&S390_lowcore.cpu_id);
-
-	atomic_inc(&init_mm.mm_count);
-	current->active_mm = &init_mm;
-	BUG_ON(current->mm);
-        enter_lazy_tlb(&init_mm, current);
-}
-
-/*
  * condev= and conmode= setup parameter.
  */
 
@@ -385,10 +369,6 @@ static void setup_addressing_mode(void)
 			pr_info("Address spaces switched, "
 				"mvcos not available\n");
 	}
-#ifdef CONFIG_TRACE_IRQFLAGS
-	sysc_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
-	io_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK;
-#endif
 }
 
 static void __init
@@ -421,6 +401,7 @@ setup_lowcore(void)
 	lc->io_new_psw.mask = psw_kernel_bits;
 	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
 	lc->clock_comparator = -1ULL;
+	lc->cmf_hpp = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
 	lc->async_stack = (unsigned long)
 		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
@@ -695,6 +676,7 @@ static void __init setup_hwcaps(void)
 	static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
 	unsigned long long facility_list_extended;
 	unsigned int facility_list;
+	struct cpuid cpu_id;
 	int i;
 
 	facility_list = stfl();
@@ -756,7 +738,8 @@ static void __init setup_hwcaps(void)
 	 */
 	elf_hwcap |= HWCAP_S390_HIGH_GPRS;
 
-	switch (S390_lowcore.cpu_id.machine) {
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
 	case 0x9672:
 #if !defined(CONFIG_64BIT)
 	default:	/* Use "g5" as default for 31 bit kernels. */
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6289945562b0..ee7ac8b11782 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -313,6 +313,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	   To avoid breaking binary compatibility, they are passed as args. */
 	regs->gprs[4] = current->thread.trap_no;
 	regs->gprs[5] = current->thread.prot_addr;
+	regs->gprs[6] = task_thread_info(current)->last_break;
 
 	/* Place signal number on stack to allow backtrace from handler.  */
 	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
@@ -376,6 +377,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->gprs[2] = map_signal(sig);
 	regs->gprs[3] = (unsigned long) &frame->info;
 	regs->gprs[4] = (unsigned long) &frame->uc;
+	regs->gprs[5] = task_thread_info(current)->last_break;
 	return 0;
 
 give_sigsegv:
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 247b4c2d1e51..bcef00766a64 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -37,7 +37,8 @@ struct tl_cpu {
 };
 
 struct tl_container {
-	unsigned char reserved[8];
+	unsigned char reserved[7];
+	unsigned char id;
 };
 
 union tl_entry {
@@ -58,6 +59,7 @@ struct tl_info {
 
 struct core_info {
 	struct core_info *next;
+	unsigned char id;
 	cpumask_t mask;
 };
 
@@ -73,6 +75,7 @@ static DECLARE_WORK(topology_work, topology_work_fn);
 static DEFINE_SPINLOCK(topology_lock);
 
 cpumask_t cpu_core_map[NR_CPUS];
+unsigned char cpu_core_id[NR_CPUS];
 
 static cpumask_t cpu_coregroup_map(unsigned int cpu)
 {
@@ -116,6 +119,7 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
 		for_each_present_cpu(lcpu) {
 			if (cpu_logical_map(lcpu) == rcpu) {
 				cpu_set(lcpu, core->mask);
+				cpu_core_id[lcpu] = core->id;
 				smp_cpu_polarization[lcpu] = tl_cpu->pp;
 			}
 		}
@@ -158,6 +162,7 @@ static void tl_to_cores(struct tl_info *info)
 			break;
 		case 1:
 			core = core->next;
+			core->id = tle->container.id;
 			break;
 		case 0:
 			add_cpus_to_core(&tle->cpu, core);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 6e7ad63854c0..5d8f0f3d0250 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -46,13 +46,7 @@
 
 pgm_check_handler_t *pgm_check_table[128];
 
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_PROCESS_DEBUG
-int sysctl_userprocess_debug = 1;
-#else
-int sysctl_userprocess_debug = 0;
-#endif
-#endif
+int show_unhandled_signals;
 
 extern pgm_check_handler_t do_protection_exception;
 extern pgm_check_handler_t do_dat_exception;
@@ -315,18 +309,19 @@ void die(const char * str, struct pt_regs * regs, long err)
 	do_exit(SIGSEGV);
 }
 
-static void inline
-report_user_fault(long interruption_code, struct pt_regs *regs)
+static void inline report_user_fault(struct pt_regs *regs, long int_code,
+				     int signr)
 {
-#if defined(CONFIG_SYSCTL)
-	if (!sysctl_userprocess_debug)
+	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
 		return;
-#endif
-#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG)
-	printk("User process fault: interruption code 0x%lX\n",
-	       interruption_code);
+	if (!unhandled_signal(current, signr))
+		return;
+	if (!printk_ratelimit())
+		return;
+	printk("User process fault: interruption code 0x%lX ", int_code);
+	print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN);
+	printk("\n");
 	show_regs(regs);
-#endif
 }
 
 int is_valid_bugaddr(unsigned long addr)
@@ -354,7 +349,7 @@ static void __kprobes inline do_trap(long interruption_code, int signr,
 
                 tsk->thread.trap_no = interruption_code & 0xffff;
 		force_sig_info(signr, info, tsk);
-		report_user_fault(interruption_code, regs);
+		report_user_fault(regs, interruption_code, signr);
         } else {
                 const struct exception_table_entry *fixup;
                 fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
@@ -390,8 +385,8 @@ static void default_trap_handler(struct pt_regs * regs, long interruption_code)
 {
         if (regs->psw.mask & PSW_MASK_PSTATE) {
 		local_irq_enable();
+		report_user_fault(regs, interruption_code, SIGSEGV);
 		do_exit(SIGSEGV);
-		report_user_fault(interruption_code, regs);
 	} else
 		die("Unknown program exception", regs, interruption_code);
 }
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 6bc9c197aa91..6b83870507d5 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -102,11 +102,7 @@ static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
 /*
  * Allocate/free per cpu vdso data.
  */
-#ifdef CONFIG_64BIT
 #define SEGMENT_ORDER	2
-#else
-#define SEGMENT_ORDER	1
-#endif
 
 int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
 {
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index b59a812a010e..3479f1b0d4e0 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -121,32 +121,35 @@ void account_system_vtime(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
-void vtime_start_cpu(void)
+void vtime_start_cpu(__u64 int_clock, __u64 enter_timer)
 {
 	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
 	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
 	__u64 idle_time, expires;
 
+	if (idle->idle_enter == 0ULL)
+		return;
+
 	/* Account time spent with enabled wait psw loaded as idle time. */
-	idle_time = S390_lowcore.int_clock - idle->idle_enter;
+	idle_time = int_clock - idle->idle_enter;
 	account_idle_time(idle_time);
 	S390_lowcore.steal_timer +=
 		idle->idle_enter - S390_lowcore.last_update_clock;
-	S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+	S390_lowcore.last_update_clock = int_clock;
 
 	/* Account system time spent going idle. */
 	S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
-	S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
+	S390_lowcore.last_update_timer = enter_timer;
 
 	/* Restart vtime CPU timer */
 	if (vq->do_spt) {
 		/* Program old expire value but first save progress. */
-		expires = vq->idle - S390_lowcore.async_enter_timer;
+		expires = vq->idle - enter_timer;
 		expires += get_vtimer();
 		set_vtimer(expires);
 	} else {
 		/* Don't account the CPU timer delta while the cpu was idle. */
-		vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
+		vq->elapsed -= vq->idle - enter_timer;
 	}
 
 	idle->sequence++;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index a7251580891c..2f4b687cc7fa 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -33,6 +33,17 @@ config KVM
 
 	  If unsure, say N.
 
+config KVM_AWARE_CMF
+	depends on KVM
+	bool "KVM aware sampling"
+	---help---
+	  This option enhances the sampling data from the CPU Measurement
+	  Facility with additional information, that allows to distinguish
+	  guest(s) and host when using the kernel based virtual machine
+	  functionality.
+
+	  If unsure, say N.
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/vhost/Kconfig
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index 934fd6a885f6..31646bd0e469 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -1,20 +1,60 @@
 /*
  * sie64a.S - low level sie call
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2010
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
  * as published by the Free Software Foundation.
  *
  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ *		 Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  */
 
 #include <linux/errno.h>
 #include <asm/asm-offsets.h>
+#include <asm/setup.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+
+_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING)
+
+/*
+ * offsets into stackframe
+ * SP_	= offsets into stack sie64 is called with
+ * SPI_ = offsets into irq stack
+ */
+SP_GREGS = __SF_EMPTY
+SP_HOOK  = __SF_EMPTY+8
+SP_GPP	 = __SF_EMPTY+16
+SPI_PSW  = STACK_FRAME_OVERHEAD + __PT_PSW
+
 
-SP_R5 =	5 * 8	# offset into stackframe
-SP_R6 =	6 * 8
+	.macro SPP newpp
+#ifdef CONFIG_KVM_AWARE_CMF
+	tm	__LC_MACHINE_FLAGS+6,0x20	# MACHINE_FLAG_SPP
+	jz	0f
+	.insn	s,0xb2800000,\newpp
+	0:
+#endif
+	.endm
+
+sie_irq_handler:
+	SPP	__LC_CMF_HPP			# set host id
+	larl	%r2,sie_inst
+	clg	%r2,SPI_PSW+8(0,%r15)		# intercepted sie
+	jne	1f
+	xc	__LC_SIE_HOOK(8),__LC_SIE_HOOK
+	lg	%r2,__LC_THREAD_INFO		# pointer thread_info struct
+	tm	__TI_flags+7(%r2),_TIF_EXIT_SIE
+	jz	0f
+	larl	%r2,sie_exit			# work pending, leave sie
+	stg	%r2,__LC_RETURN_PSW+8
+	br	%r14
+0:	larl	%r2,sie_reenter			# re-enter with guest id
+	stg	%r2,__LC_RETURN_PSW+8
+1:	br	%r14
 
 /*
  * sie64a calling convention:
@@ -23,23 +63,34 @@ SP_R6 =	6 * 8
  */
 	.globl	sie64a
 sie64a:
-	lgr	%r5,%r3
-	stmg	%r5,%r14,SP_R5(%r15)	# save register on entry
-	lgr	%r14,%r2		# pointer to sie control block
-	lmg	%r0,%r13,0(%r3)		# load guest gprs 0-13
+	stg	%r3,SP_GREGS(%r15)		# save guest register save area
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save registers on entry
+	lgr	%r14,%r2			# pointer to sie control block
+	larl	%r5,sie_irq_handler
+	stg	%r2,SP_GPP(%r15)
+	stg	%r5,SP_HOOK(%r15)		# save hook target
+	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+sie_reenter:
+	mvc	__LC_SIE_HOOK(8),SP_HOOK(%r15)
+	SPP	SP_GPP(%r15)			# set guest id
 sie_inst:
 	sie	0(%r14)
-	lg	%r14,SP_R5(%r15)
-	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	xc	__LC_SIE_HOOK(8),__LC_SIE_HOOK
+	SPP	__LC_CMF_HPP			# set host id
+sie_exit:
+	lg	%r14,SP_GREGS(%r15)
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
 	lghi	%r2,0
-	lmg	%r6,%r14,SP_R6(%r15)
+	lmg	%r6,%r14,__SF_GPRS(%r15)
 	br	%r14
 
 sie_err:
-	lg	%r14,SP_R5(%r15)
-	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	xc	__LC_SIE_HOOK(8),__LC_SIE_HOOK
+	SPP	__LC_CMF_HPP			# set host id
+	lg	%r14,SP_GREGS(%r15)
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
 	lghi	%r2,-EFAULT
-	lmg	%r6,%r14,SP_R6(%r15)
+	lmg	%r6,%r14,__SF_GPRS(%r15)
 	br	%r14
 
 	.section __ex_table,"a"
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3040d7c78fe0..2505b2ea0ef1 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -48,10 +48,6 @@
 #define __PF_RES_FIELD 0x8000000000000000ULL
 #endif /* CONFIG_64BIT */
 
-#ifdef CONFIG_SYSCTL
-extern int sysctl_userprocess_debug;
-#endif
-
 #define VM_FAULT_BADCONTEXT	0x010000
 #define VM_FAULT_BADMAP		0x020000
 #define VM_FAULT_BADACCESS	0x040000
@@ -120,6 +116,22 @@ static inline int user_space_fault(unsigned long trans_exc_code)
 	return trans_exc_code != 3;
 }
 
+static inline void report_user_fault(struct pt_regs *regs, long int_code,
+				     int signr, unsigned long address)
+{
+	if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
+		return;
+	if (!unhandled_signal(current, signr))
+		return;
+	if (!printk_ratelimit())
+		return;
+	printk("User process fault: interruption code 0x%lX ", int_code);
+	print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
+	printk("\n");
+	printk("failing address: %lX\n", address);
+	show_regs(regs);
+}
+
 /*
  * Send SIGSEGV to task.  This is an external routine
  * to keep the stack usage of do_page_fault small.
@@ -133,17 +145,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, long int_code,
 	address = trans_exc_code & __FAIL_ADDR_MASK;
 	current->thread.prot_addr = address;
 	current->thread.trap_no = int_code;
-#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG)
-#if defined(CONFIG_SYSCTL)
-	if (sysctl_userprocess_debug)
-#endif
-	{
-		printk("User process fault: interruption code 0x%lX\n",
-		       int_code);
-		printk("failing address: %lX\n", address);
-		show_regs(regs);
-	}
-#endif
+	report_user_fault(regs, int_code, SIGSEGV, address);
 	si.si_signo = SIGSEGV;
 	si.si_code = si_code;
 	si.si_addr = (void __user *) address;