summary refs log tree commit diff
path: root/arch/s390
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-18 10:38:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-18 10:38:34 -0700
commit619297855aa16646246ea4b1f6e05f1b2455c808 (patch)
treea07fe1b24c372f5eabf244555db41fdf574c1205 /arch/s390
parente16b396ce314b2bcdfe6c173fe075bf8e3432368 (diff)
parent1ef1d1c2353967e2d61ecaddf76edfd058a778b4 (diff)
downloadlinux-619297855aa16646246ea4b1f6e05f1b2455c808.tar.gz
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (30 commits)
  trace, filters: Initialize the match variable in process_ops() properly
  trace, documentation: Fix branch profiling location in debugfs
  oprofile, s390: Cleanups
  oprofile, s390: Remove hwsampler_files.c and merge it into init.c
  perf: Fix tear-down of inherited group events
  perf: Reorder & optimize perf_event_context to remove alignment padding on 64 bit builds
  perf: Handle stopped state with tracepoints
  perf: Fix the software events state check
  perf, powerpc: Handle events that raise an exception without overflowing
  perf, x86: Use INTEL_*_CONSTRAINT() for all PEBS event constraints
  perf, x86: Clean up SandyBridge PEBS events
  perf lock: Fix sorting by wait_min
  perf tools: Version incorrect with some versions of grep
  perf evlist: New command to list the names of events present in a perf.data file
  perf script: Add support for H/W and S/W events
  perf script: Add support for dumping symbols
  perf script: Support custom field selection for output
  perf script: Move printing of 'common' data from print_event and rename
  perf tracing: Remove print_graph_cpu and print_graph_proc from trace-event-parse
  perf script: Change process_event prototype
  ...
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/oprofile/Makefile2
-rw-r--r--arch/s390/oprofile/hwsampler.c1256
-rw-r--r--arch/s390/oprofile/hwsampler.h113
-rw-r--r--arch/s390/oprofile/init.c165
4 files changed, 1532 insertions, 4 deletions
diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile
index 537b2d840e69..d698cddcfbdd 100644
--- a/arch/s390/oprofile/Makefile
+++ b/arch/s390/oprofile/Makefile
@@ -6,4 +6,4 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
 		oprofilefs.o oprofile_stats.o  \
 		timer_int.o )
 
-oprofile-y				:= $(DRIVER_OBJS) init.o backtrace.o
+oprofile-y :=	$(DRIVER_OBJS) init.o backtrace.o hwsampler.o
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
new file mode 100644
index 000000000000..3d48f4db246d
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.c
@@ -0,0 +1,1256 @@
+/**
+ * arch/s390/oprofile/hwsampler.c
+ *
+ * Copyright IBM Corp. 2010
+ * Author: Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/semaphore.h>
+#include <linux/oom.h>
+#include <linux/oprofile.h>
+
+#include <asm/lowcore.h>
+#include <asm/s390_ext.h>
+
+#include "hwsampler.h"
+
+#define MAX_NUM_SDB 511
+#define MIN_NUM_SDB 1
+
+#define ALERT_REQ_MASK   0x4000000000000000ul
+#define BUFFER_FULL_MASK 0x8000000000000000ul
+
+#define EI_IEA      (1 << 31)	/* invalid entry address              */
+#define EI_ISE      (1 << 30)	/* incorrect SDBT entry               */
+#define EI_PRA      (1 << 29)	/* program request alert              */
+#define EI_SACA     (1 << 23)	/* sampler authorization change alert */
+#define EI_LSDA     (1 << 22)	/* loss of sample data alert          */
+
+DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+
+struct hws_execute_parms {
+	void *buffer;
+	signed int rc;
+};
+
+DEFINE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer);
+EXPORT_PER_CPU_SYMBOL(sampler_cpu_buffer);
+
+static DEFINE_MUTEX(hws_sem);
+static DEFINE_MUTEX(hws_sem_oom);
+
+static unsigned char hws_flush_all;
+static unsigned int hws_oom;
+static struct workqueue_struct *hws_wq;
+
+static unsigned int hws_state;
+enum {
+	HWS_INIT = 1,
+	HWS_DEALLOCATED,
+	HWS_STOPPED,
+	HWS_STARTED,
+	HWS_STOPPING };
+
+/* set to 1 if called by kernel during memory allocation */
+static unsigned char oom_killer_was_active;
+/* size of SDBT and SDB as of allocate API */
+static unsigned long num_sdbt = 100;
+static unsigned long num_sdb = 511;
+/* sampling interval (machine cycles) */
+static unsigned long interval;
+
+static unsigned long min_sampler_rate;
+static unsigned long max_sampler_rate;
+
+static int ssctl(void *buffer)
+{
+	int cc;
+
+	/* set in order to detect a program check */
+	cc = 1;
+
+	asm volatile(
+		"0: .insn s,0xB2870000,0(%1)\n"
+		"1: ipm %0\n"
+		"   srl %0,28\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "+d" (cc), "+a" (buffer)
+		: "m" (*((struct hws_ssctl_request_block *)buffer))
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0 ;
+}
+
+static int qsi(void *buffer)
+{
+	int cc;
+	cc = 1;
+
+	asm volatile(
+		"0: .insn s,0xB2860000,0(%1)\n"
+		"1: lhi %0,0\n"
+		"2:\n"
+		EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+		: "=d" (cc), "+a" (buffer)
+		: "m" (*((struct hws_qsi_info_block *)buffer))
+		: "cc", "memory");
+
+	return cc ? -EINVAL : 0;
+}
+
+static void execute_qsi(void *parms)
+{
+	struct hws_execute_parms *ep = parms;
+
+	ep->rc = qsi(ep->buffer);
+}
+
+static void execute_ssctl(void *parms)
+{
+	struct hws_execute_parms *ep = parms;
+
+	ep->rc = ssctl(ep->buffer);
+}
+
+static int smp_ctl_ssctl_stop(int cpu)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.es = 0;
+	cb->ssctl.cs = 0;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc) {
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+		dump_stack();
+	}
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	if (cb->qsi.es || cb->qsi.cs) {
+		printk(KERN_EMERG "CPUMF sampling did not stop properly.\n");
+		dump_stack();
+	}
+
+	return rc;
+}
+
+static int smp_ctl_ssctl_deactivate(int cpu)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.es = 1;
+	cb->ssctl.cs = 0;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	if (cb->qsi.cs)
+		printk(KERN_EMERG "CPUMF sampling was not set inactive.\n");
+
+	return rc;
+}
+
+static int smp_ctl_ssctl_enable_activate(int cpu, unsigned long interval)
+{
+	int rc;
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	cb->ssctl.h = 1;
+	cb->ssctl.tear = cb->first_sdbt;
+	cb->ssctl.dear = *(unsigned long *) cb->first_sdbt;
+	cb->ssctl.interval = interval;
+	cb->ssctl.es = 1;
+	cb->ssctl.cs = 1;
+
+	ep.buffer = &cb->ssctl;
+	smp_call_function_single(cpu, execute_ssctl, &ep, 1);
+	rc = ep.rc;
+	if (rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF SSCTL failed.\n", cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+	if (ep.rc)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF QSI failed.\n", cpu);
+
+	return rc;
+}
+
+static int smp_ctl_qsi(int cpu)
+{
+	struct hws_execute_parms ep;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	ep.buffer = &cb->qsi;
+	smp_call_function_single(cpu, execute_qsi, &ep, 1);
+
+	return ep.rc;
+}
+
+static inline unsigned long *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *)v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return (unsigned long *) ret;
+}
+
+/* prototypes for external interrupt handler and worker */
+static void hws_ext_handler(unsigned int ext_int_code,
+				unsigned int param32, unsigned long param64);
+
+static void worker(struct work_struct *work);
+
+static void add_samples_to_oprofile(unsigned cpu, unsigned long *,
+				unsigned long *dear);
+
+static void init_all_cpu_buffers(void)
+{
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		memset(cb, 0, sizeof(struct hws_cpu_buffer));
+	}
+}
+
+static int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1ul ? 1 : 0;
+}
+
+static unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return (unsigned long *) (*s & ~0x1ul);
+}
+
+static int prepare_cpu_buffers(void)
+{
+	int cpu;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		atomic_set(&cb->ext_params, 0);
+		cb->worker_entry = 0;
+		cb->sample_overflow = 0;
+		cb->req_alert = 0;
+		cb->incorrect_sdbt_entry = 0;
+		cb->invalid_entry_address = 0;
+		cb->loss_of_sample_data = 0;
+		cb->sample_auth_change_alert = 0;
+		cb->finish = 0;
+		cb->oom = 0;
+		cb->stop_mode = 0;
+	}
+
+	return rc;
+}
+
+/*
+ * allocate_sdbt() - allocate sampler memory
+ * @cpu: the cpu for which sampler memory is allocated
+ *
+ * A 4K page is allocated for each requested SDBT.
+ * A maximum of 511 4K pages are allocated for the SDBs in each of the SDBTs.
+ * Set ALERT_REQ mask in each SDBs trailer.
+ * Returns zero if successful, <0 otherwise.
+ */
+static int allocate_sdbt(int cpu)
+{
+	int j, k, rc;
+	unsigned long *sdbt;
+	unsigned long  sdb;
+	unsigned long *tail;
+	unsigned long *trailer;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	if (cb->first_sdbt)
+		return -EINVAL;
+
+	sdbt = NULL;
+	tail = sdbt;
+
+	for (j = 0; j < num_sdbt; j++) {
+		sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+
+		mutex_lock(&hws_sem_oom);
+		/* OOM killer might have been activated */
+		barrier();
+		if (oom_killer_was_active || !sdbt) {
+			if (sdbt)
+				free_page((unsigned long)sdbt);
+
+			goto allocate_sdbt_error;
+		}
+		if (cb->first_sdbt == 0)
+			cb->first_sdbt = (unsigned long)sdbt;
+
+		/* link current page to tail of chain */
+		if (tail)
+			*tail = (unsigned long)(void *)sdbt + 1;
+
+		mutex_unlock(&hws_sem_oom);
+
+		for (k = 0; k < num_sdb; k++) {
+			/* get and set SDB page */
+			sdb = get_zeroed_page(GFP_KERNEL);
+
+			mutex_lock(&hws_sem_oom);
+			/* OOM killer might have been activated */
+			barrier();
+			if (oom_killer_was_active || !sdb) {
+				if (sdb)
+					free_page(sdb);
+
+				goto allocate_sdbt_error;
+			}
+			*sdbt = sdb;
+			trailer = trailer_entry_ptr(*sdbt);
+			*trailer = ALERT_REQ_MASK;
+			sdbt++;
+			mutex_unlock(&hws_sem_oom);
+		}
+		tail = sdbt;
+	}
+	mutex_lock(&hws_sem_oom);
+	if (oom_killer_was_active)
+		goto allocate_sdbt_error;
+
+	rc = 0;
+	if (tail)
+		*tail = (unsigned long)
+			((void *)cb->first_sdbt) + 1;
+
+allocate_sdbt_exit:
+	mutex_unlock(&hws_sem_oom);
+	return rc;
+
+allocate_sdbt_error:
+	rc = -ENOMEM;
+	goto allocate_sdbt_exit;
+}
+
+/*
+ * deallocate_sdbt() - deallocate all sampler memory
+ *
+ * For each online CPU all SDBT trees are deallocated.
+ * Returns the number of freed pages.
+ */
+static int deallocate_sdbt(void)
+{
+	int cpu;
+	int counter;
+
+	counter = 0;
+
+	for_each_online_cpu(cpu) {
+		unsigned long start;
+		unsigned long sdbt;
+		unsigned long *curr;
+		struct hws_cpu_buffer *cb;
+
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+		if (!cb->first_sdbt)
+			continue;
+
+		sdbt = cb->first_sdbt;
+		curr = (unsigned long *) sdbt;
+		start = sdbt;
+
+		/* we'll free the SDBT after all SDBs are processed... */
+		while (1) {
+			if (!*curr || !sdbt)
+				break;
+
+			/* watch for link entry reset if found */
+			if (is_link_entry(curr)) {
+				curr = get_next_sdbt(curr);
+				if (sdbt)
+					free_page(sdbt);
+
+				/* we are done if we reach the start */
+				if ((unsigned long) curr == start)
+					break;
+				else
+					sdbt = (unsigned long) curr;
+			} else {
+				/* process SDB pointer */
+				if (*curr) {
+					free_page(*curr);
+					curr++;
+				}
+			}
+			counter++;
+		}
+		cb->first_sdbt = 0;
+	}
+	return counter;
+}
+
+static int start_sampling(int cpu)
+{
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+	if (rc) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl failed.\n", cpu);
+		goto start_exit;
+	}
+
+	rc = -EINVAL;
+	if (!cb->qsi.es) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl not enabled.\n", cpu);
+		goto start_exit;
+	}
+
+	if (!cb->qsi.cs) {
+		printk(KERN_INFO "hwsampler: CPU %d ssctl not active.\n", cpu);
+		goto start_exit;
+	}
+
+	printk(KERN_INFO
+		"hwsampler: CPU %d, CPUMF Sampling started, interval %lu.\n",
+		cpu, interval);
+
+	rc = 0;
+
+start_exit:
+	return rc;
+}
+
+static int stop_sampling(int cpu)
+{
+	unsigned long v;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = smp_ctl_qsi(cpu);
+	WARN_ON(rc);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (!rc && !cb->qsi.es)
+		printk(KERN_INFO "hwsampler: CPU %d, already stopped.\n", cpu);
+
+	rc = smp_ctl_ssctl_stop(cpu);
+	if (rc) {
+		printk(KERN_INFO "hwsampler: CPU %d, ssctl stop error %d.\n",
+				cpu, rc);
+		goto stop_exit;
+	}
+
+	printk(KERN_INFO "hwsampler: CPU %d, CPUMF Sampling stopped.\n", cpu);
+
+stop_exit:
+	v = cb->req_alert;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Request alert,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->loss_of_sample_data;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Loss of sample data,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->invalid_entry_address;
+	if (v)
+		printk(KERN_ERR "hwsampler: CPU %d CPUMF Invalid entry address,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->incorrect_sdbt_entry;
+	if (v)
+		printk(KERN_ERR
+				"hwsampler: CPU %d CPUMF Incorrect SDBT address,"
+				" count=%lu.\n", cpu, v);
+
+	v = cb->sample_auth_change_alert;
+	if (v)
+		printk(KERN_ERR
+				"hwsampler: CPU %d CPUMF Sample authorization change,"
+				" count=%lu.\n", cpu, v);
+
+	return rc;
+}
+
+static int check_hardware_prerequisites(void)
+{
+	unsigned long long facility_bits[2];
+
+	memcpy(facility_bits, S390_lowcore.stfle_fac_list, 32);
+	if (!(facility_bits[1] & (1ULL << 59)))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+/*
+ * hws_oom_callback() - the OOM callback function
+ *
+ * In case the callback is invoked during memory allocation for the
+ *  hw sampler, all obtained memory is deallocated and a flag is set
+ *  so main sampler memory allocation can exit with a failure code.
+ * In case the callback is invoked during sampling the hw sampler
+ *  is deactivated for all CPUs.
+ */
+static int hws_oom_callback(struct notifier_block *nfb,
+	unsigned long dummy, void *parm)
+{
+	unsigned long *freed;
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	freed = parm;
+
+	mutex_lock(&hws_sem_oom);
+
+	if (hws_state == HWS_DEALLOCATED) {
+		/* during memory allocation */
+		if (oom_killer_was_active == 0) {
+			oom_killer_was_active = 1;
+			*freed += deallocate_sdbt();
+		}
+	} else {
+		int i;
+		cpu = get_cpu();
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+		if (!cb->oom) {
+			for_each_online_cpu(i) {
+				smp_ctl_ssctl_deactivate(i);
+				cb->oom = 1;
+			}
+			cb->finish = 1;
+
+			printk(KERN_INFO
+				"hwsampler: CPU %d, OOM notify during CPUMF Sampling.\n",
+				cpu);
+		}
+	}
+
+	mutex_unlock(&hws_sem_oom);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block hws_oom_notifier = {
+	.notifier_call = hws_oom_callback
+};
+
+static int hws_cpu_callback(struct notifier_block *nfb,
+	unsigned long action, void *hcpu)
+{
+	/* We do not have sampler space available for all possible CPUs.
+	   All CPUs should be online when hw sampling is activated. */
+	return NOTIFY_BAD;
+}
+
+static struct notifier_block hws_cpu_notifier = {
+	.notifier_call = hws_cpu_callback
+};
+
+/**
+ * hwsampler_deactivate() - set hardware sampling temporarily inactive
+ * @cpu:  specifies the CPU to be set inactive.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deactivate(unsigned int cpu)
+{
+	/*
+	 * Deactivate hw sampling temporarily and flush the buffer
+	 * by pushing all the pending samples to oprofile buffer.
+	 *
+	 * This function can be called under one of the following conditions:
+	 *     Memory unmap, task is exiting.
+	 */
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	mutex_lock(&hws_sem);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (hws_state == HWS_STARTED) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (cb->qsi.cs) {
+			rc = smp_ctl_ssctl_deactivate(cpu);
+			if (rc) {
+				printk(KERN_INFO
+				"hwsampler: CPU %d, CPUMF Deactivation failed.\n", cpu);
+				cb->finish = 1;
+				hws_state = HWS_STOPPING;
+			} else  {
+				hws_flush_all = 1;
+				/* Add work to queue to read pending samples.*/
+				queue_work_on(cpu, hws_wq, &cb->worker);
+			}
+		}
+	}
+	mutex_unlock(&hws_sem);
+
+	if (hws_wq)
+		flush_workqueue(hws_wq);
+
+	return rc;
+}
+
+/**
+ * hwsampler_activate() - activate/resume hardware sampling which was deactivated
+ * @cpu:  specifies the CPU to be set active.
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_activate(unsigned int cpu)
+{
+	/*
+	 * Re-activate hw sampling. This should be called in pair with
+	 * hwsampler_deactivate().
+	 */
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	mutex_lock(&hws_sem);
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	if (hws_state == HWS_STARTED) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (!cb->qsi.cs) {
+			hws_flush_all = 0;
+			rc = smp_ctl_ssctl_enable_activate(cpu, interval);
+			if (rc) {
+				printk(KERN_ERR
+				"CPU %d, CPUMF activate sampling failed.\n",
+					 cpu);
+			}
+		}
+	}
+
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
+
+static void hws_ext_handler(unsigned int ext_int_code,
+			    unsigned int param32, unsigned long param64)
+{
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	cpu = smp_processor_id();
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	atomic_xchg(
+			&cb->ext_params,
+			atomic_read(&cb->ext_params)
+				| S390_lowcore.ext_params);
+
+	if (hws_wq)
+		queue_work(hws_wq, &cb->worker);
+}
+
+static int check_qsi_on_setup(void)
+{
+	int rc;
+	unsigned int cpu;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (rc)
+			return -EOPNOTSUPP;
+
+		if (!cb->qsi.as) {
+			printk(KERN_INFO "hwsampler: CPUMF sampling is not authorized.\n");
+			return -EINVAL;
+		}
+
+		if (cb->qsi.es) {
+			printk(KERN_WARNING "hwsampler: CPUMF is still enabled.\n");
+			rc = smp_ctl_ssctl_stop(cpu);
+			if (rc)
+				return -EINVAL;
+
+			printk(KERN_INFO
+				"CPU %d, CPUMF Sampling stopped now.\n", cpu);
+		}
+	}
+	return 0;
+}
+
+static int check_qsi_on_start(void)
+{
+	unsigned int cpu;
+	int rc;
+	struct hws_cpu_buffer *cb;
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+
+		if (!cb->qsi.as)
+			return -EINVAL;
+
+		if (cb->qsi.es)
+			return -EINVAL;
+
+		if (cb->qsi.cs)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static void worker_on_start(unsigned int cpu)
+{
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	cb->worker_entry = cb->first_sdbt;
+}
+
+static int worker_check_error(unsigned int cpu, int ext_params)
+{
+	int rc;
+	unsigned long *sdbt;
+	struct hws_cpu_buffer *cb;
+
+	rc = 0;
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+	sdbt = (unsigned long *) cb->worker_entry;
+
+	if (!sdbt || !*sdbt)
+		return -EINVAL;
+
+	if (ext_params & EI_IEA)
+		cb->req_alert++;
+
+	if (ext_params & EI_LSDA)
+		cb->loss_of_sample_data++;
+
+	if (ext_params & EI_IEA) {
+		cb->invalid_entry_address++;
+		rc = -EINVAL;
+	}
+
+	if (ext_params & EI_ISE) {
+		cb->incorrect_sdbt_entry++;
+		rc = -EINVAL;
+	}
+
+	if (ext_params & EI_SACA) {
+		cb->sample_auth_change_alert++;
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+static void worker_on_finish(unsigned int cpu)
+{
+	int rc, i;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	if (cb->finish) {
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (cb->qsi.es) {
+			printk(KERN_INFO
+				"hwsampler: CPU %d, CPUMF Stop/Deactivate sampling.\n",
+				cpu);
+			rc = smp_ctl_ssctl_stop(cpu);
+			if (rc)
+				printk(KERN_INFO
+					"hwsampler: CPU %d, CPUMF Deactivation failed.\n",
+					cpu);
+
+			for_each_online_cpu(i) {
+				if (i == cpu)
+					continue;
+				if (!cb->finish) {
+					cb->finish = 1;
+					queue_work_on(i, hws_wq,
+						&cb->worker);
+				}
+			}
+		}
+	}
+}
+
+static void worker_on_interrupt(unsigned int cpu)
+{
+	unsigned long *sdbt;
+	unsigned char done;
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	sdbt = (unsigned long *) cb->worker_entry;
+
+	done = 0;
+	/* do not proceed if stop was entered,
+	 * forget the buffers not yet processed */
+	while (!done && !cb->stop_mode) {
+		unsigned long *trailer;
+		struct hws_trailer_entry *te;
+		unsigned long *dear = 0;
+
+		trailer = trailer_entry_ptr(*sdbt);
+		/* leave loop if no more work to do */
+		if (!(*trailer & BUFFER_FULL_MASK)) {
+			done = 1;
+			if (!hws_flush_all)
+				continue;
+		}
+
+		te = (struct hws_trailer_entry *)trailer;
+		cb->sample_overflow += te->overflow;
+
+		add_samples_to_oprofile(cpu, sdbt, dear);
+
+		/* reset trailer */
+		xchg((unsigned char *) te, 0x40);
+
+		/* advance to next sdb slot in current sdbt */
+		sdbt++;
+		/* in case link bit is set use address w/o link bit */
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		cb->worker_entry = (unsigned long)sdbt;
+	}
+}
+
+static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
+		unsigned long *dear)
+{
+	struct hws_data_entry *sample_data_ptr;
+	unsigned long *trailer;
+
+	trailer = trailer_entry_ptr(*sdbt);
+	if (dear) {
+		if (dear > trailer)
+			return;
+		trailer = dear;
+	}
+
+	sample_data_ptr = (struct hws_data_entry *)(*sdbt);
+
+	while ((unsigned long *)sample_data_ptr < trailer) {
+		struct pt_regs *regs = NULL;
+		struct task_struct *tsk = NULL;
+
+		/*
+		 * Check sampling mode, 1 indicates basic (=customer) sampling
+		 * mode.
+		 */
+		if (sample_data_ptr->def != 1) {
+			/* sample slot is not yet written */
+			break;
+		} else {
+			/* make sure we don't use it twice,
+			 * the next time the sampler will set it again */
+			sample_data_ptr->def = 0;
+		}
+
+		/* Get pt_regs. */
+		if (sample_data_ptr->P == 1) {
+			/* userspace sample */
+			unsigned int pid = sample_data_ptr->prim_asn;
+			rcu_read_lock();
+			tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
+			if (tsk)
+				regs = task_pt_regs(tsk);
+			rcu_read_unlock();
+		} else {
+			/* kernelspace sample */
+			regs = task_pt_regs(current);
+		}
+
+		mutex_lock(&hws_sem);
+		oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
+				!sample_data_ptr->P, tsk);
+		mutex_unlock(&hws_sem);
+
+		sample_data_ptr++;
+	}
+}
+
+static void worker(struct work_struct *work)
+{
+	unsigned int cpu;
+	int ext_params;
+	struct hws_cpu_buffer *cb;
+
+	cb = container_of(work, struct hws_cpu_buffer, worker);
+	cpu = smp_processor_id();
+	ext_params = atomic_xchg(&cb->ext_params, 0);
+
+	if (!cb->worker_entry)
+		worker_on_start(cpu);
+
+	if (worker_check_error(cpu, ext_params))
+		return;
+
+	if (!cb->finish)
+		worker_on_interrupt(cpu);
+
+	if (cb->finish)
+		worker_on_finish(cpu);
+}
+
+/**
+ * hwsampler_allocate() - allocate memory for the hardware sampler
+ * @sdbt:  number of SDBTs per online CPU (must be > 0)
+ * @sdb:   number of SDBs per SDBT (minimum 1, maximum 511)
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb)
+{
+	int cpu, rc;
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state != HWS_DEALLOCATED)
+		goto allocate_exit;
+
+	if (sdbt < 1)
+		goto allocate_exit;
+
+	if (sdb > MAX_NUM_SDB || sdb < MIN_NUM_SDB)
+		goto allocate_exit;
+
+	num_sdbt = sdbt;
+	num_sdb = sdb;
+
+	oom_killer_was_active = 0;
+	register_oom_notifier(&hws_oom_notifier);
+
+	for_each_online_cpu(cpu) {
+		if (allocate_sdbt(cpu)) {
+			unregister_oom_notifier(&hws_oom_notifier);
+			goto allocate_error;
+		}
+	}
+	unregister_oom_notifier(&hws_oom_notifier);
+	if (oom_killer_was_active)
+		goto allocate_error;
+
+	hws_state = HWS_STOPPED;
+	rc = 0;
+
+allocate_exit:
+	mutex_unlock(&hws_sem);
+	return rc;
+
+allocate_error:
+	rc = -ENOMEM;
+	printk(KERN_ERR "hwsampler: CPUMF Memory allocation failed.\n");
+	goto allocate_exit;
+}
+
+/**
+ * hwsampler_deallocate() - deallocate hardware sampler memory
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_deallocate()
+{
+	int rc;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state != HWS_STOPPED)
+		goto deallocate_exit;
+
+	smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */
+	deallocate_sdbt();
+
+	hws_state = HWS_DEALLOCATED;
+	rc = 0;
+
+deallocate_exit:
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
+
+long hwsampler_query_min_interval(void)
+{
+	if (min_sampler_rate)
+		return min_sampler_rate;
+	else
+		return -EINVAL;
+}
+
+long hwsampler_query_max_interval(void)
+{
+	if (max_sampler_rate)
+		return max_sampler_rate;
+	else
+		return -EINVAL;
+}
+
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu)
+{
+	struct hws_cpu_buffer *cb;
+
+	cb = &per_cpu(sampler_cpu_buffer, cpu);
+
+	return cb->sample_overflow;
+}
+
+int hwsampler_setup()
+{
+	int rc;
+	int cpu;
+	struct hws_cpu_buffer *cb;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state)
+		goto setup_exit;
+
+	hws_state = HWS_INIT;
+
+	init_all_cpu_buffers();
+
+	rc = check_hardware_prerequisites();
+	if (rc)
+		goto setup_exit;
+
+	rc = check_qsi_on_setup();
+	if (rc)
+		goto setup_exit;
+
+	rc = -EINVAL;
+	hws_wq = create_workqueue("hwsampler");
+	if (!hws_wq)
+		goto setup_exit;
+
+	register_cpu_notifier(&hws_cpu_notifier);
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		INIT_WORK(&cb->worker, worker);
+		rc = smp_ctl_qsi(cpu);
+		WARN_ON(rc);
+		if (min_sampler_rate != cb->qsi.min_sampl_rate) {
+			if (min_sampler_rate) {
+				printk(KERN_WARNING
+					"hwsampler: different min sampler rate values.\n");
+				if (min_sampler_rate < cb->qsi.min_sampl_rate)
+					min_sampler_rate =
+						cb->qsi.min_sampl_rate;
+			} else
+				min_sampler_rate = cb->qsi.min_sampl_rate;
+		}
+		if (max_sampler_rate != cb->qsi.max_sampl_rate) {
+			if (max_sampler_rate) {
+				printk(KERN_WARNING
+					"hwsampler: different max sampler rate values.\n");
+				if (max_sampler_rate > cb->qsi.max_sampl_rate)
+					max_sampler_rate =
+						cb->qsi.max_sampl_rate;
+			} else
+				max_sampler_rate = cb->qsi.max_sampl_rate;
+		}
+	}
+	register_external_interrupt(0x1407, hws_ext_handler);
+
+	hws_state = HWS_DEALLOCATED;
+	rc = 0;
+
+setup_exit:
+	mutex_unlock(&hws_sem);
+	return rc;
+}
+
+int hwsampler_shutdown()
+{
+	int rc;
+
+	mutex_lock(&hws_sem);
+
+	rc = -EINVAL;
+	if (hws_state == HWS_DEALLOCATED || hws_state == HWS_STOPPED) {
+		mutex_unlock(&hws_sem);
+
+		if (hws_wq)
+			flush_workqueue(hws_wq);
+
+		mutex_lock(&hws_sem);
+
+		if (hws_state == HWS_STOPPED) {
+			smp_ctl_clear_bit(0, 5); /* set bit 58 CR0 off */
+			deallocate_sdbt();
+		}
+		if (hws_wq) {
+			destroy_workqueue(hws_wq);
+			hws_wq = NULL;
+		}
+
+		unregister_external_interrupt(0x1407, hws_ext_handler);
+		hws_state = HWS_INIT;
+		rc = 0;
+	}
+	mutex_unlock(&hws_sem);
+
+	unregister_cpu_notifier(&hws_cpu_notifier);
+
+	return rc;
+}
+
+/**
+ * hwsampler_start_all() - start hardware sampling on all online CPUs
+ * @rate:  specifies the used interval when samples are taken
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_start_all(unsigned long rate)
+{
+	int rc, cpu;
+
+	mutex_lock(&hws_sem);
+
+	hws_oom = 0;
+
+	rc = -EINVAL;
+	if (hws_state != HWS_STOPPED)
+		goto start_all_exit;
+
+	interval = rate;
+
+	/* fail if rate is not valid */
+	if (interval < min_sampler_rate || interval > max_sampler_rate)
+		goto start_all_exit;
+
+	rc = check_qsi_on_start();
+	if (rc)
+		goto start_all_exit;
+
+	rc = prepare_cpu_buffers();
+	if (rc)
+		goto start_all_exit;
+
+	for_each_online_cpu(cpu) {
+		rc = start_sampling(cpu);
+		if (rc)
+			break;
+	}
+	if (rc) {
+		for_each_online_cpu(cpu) {
+			stop_sampling(cpu);
+		}
+		goto start_all_exit;
+	}
+	hws_state = HWS_STARTED;
+	rc = 0;
+
+start_all_exit:
+	mutex_unlock(&hws_sem);
+
+	if (rc)
+		return rc;
+
+	register_oom_notifier(&hws_oom_notifier);
+	hws_oom = 1;
+	hws_flush_all = 0;
+	/* now let them in, 1407 CPUMF external interrupts */
+	smp_ctl_set_bit(0, 5); /* set CR0 bit 58 */
+
+	return 0;
+}
+
+/**
+ * hwsampler_stop_all() - stop hardware sampling on all online CPUs
+ *
+ * Returns 0 on success, !0 on failure.
+ */
+int hwsampler_stop_all()
+{
+	int tmp_rc, rc, cpu;
+	struct hws_cpu_buffer *cb;
+
+	mutex_lock(&hws_sem);
+
+	rc = 0;
+	if (hws_state == HWS_INIT) {
+		mutex_unlock(&hws_sem);
+		return rc;
+	}
+	hws_state = HWS_STOPPING;
+	mutex_unlock(&hws_sem);
+
+	for_each_online_cpu(cpu) {
+		cb = &per_cpu(sampler_cpu_buffer, cpu);
+		cb->stop_mode = 1;
+		tmp_rc = stop_sampling(cpu);
+		if (tmp_rc)
+			rc = tmp_rc;
+	}
+
+	if (hws_wq)
+		flush_workqueue(hws_wq);
+
+	mutex_lock(&hws_sem);
+	if (hws_oom) {
+		unregister_oom_notifier(&hws_oom_notifier);
+		hws_oom = 0;
+	}
+	hws_state = HWS_STOPPED;
+	mutex_unlock(&hws_sem);
+
+	return rc;
+}
diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h
new file mode 100644
index 000000000000..8c72b59316b5
--- /dev/null
+++ b/arch/s390/oprofile/hwsampler.h
@@ -0,0 +1,113 @@
+/*
+ * CPUMF HW sampler functions and internal structures
+ *
+ *    Copyright IBM Corp. 2010
+ *    Author(s): Heinz Graalfs <graalfs@de.ibm.com>
+ */
+
+#ifndef HWSAMPLER_H_
+#define HWSAMPLER_H_
+
+#include <linux/workqueue.h>
+
+struct hws_qsi_info_block          /* QUERY SAMPLING information block  */
+{ /* Bit(s) */
+	unsigned int b0_13:14;      /* 0-13: zeros                       */
+	unsigned int as:1;          /* 14: sampling authorisation control*/
+	unsigned int b15_21:7;      /* 15-21: zeros                      */
+	unsigned int es:1;          /* 22: sampling enable control       */
+	unsigned int b23_29:7;      /* 23-29: zeros                      */
+	unsigned int cs:1;          /* 30: sampling activation control   */
+	unsigned int:1;             /* 31: reserved                      */
+	unsigned int bsdes:16;      /* 4-5: size of sampling entry       */
+	unsigned int:16;            /* 6-7: reserved                     */
+	unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */
+	unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/
+	unsigned long tear;         /* 24-31: TEAR contents              */
+	unsigned long dear;         /* 32-39: DEAR contents              */
+	unsigned int rsvrd0;        /* 40-43: reserved                   */
+	unsigned int cpu_speed;     /* 44-47: CPU speed                  */
+	unsigned long long rsvrd1;  /* 48-55: reserved                   */
+	unsigned long long rsvrd2;  /* 56-63: reserved                   */
+};
+
+struct hws_ssctl_request_block     /* SET SAMPLING CONTROLS req block   */
+{ /* bytes 0 - 7  Bit(s) */
+	unsigned int s:1;           /* 0: maximum buffer indicator       */
+	unsigned int h:1;           /* 1: part. level reserved for VM use*/
+	unsigned long b2_53:52;     /* 2-53: zeros                       */
+	unsigned int es:1;          /* 54: sampling enable control       */
+	unsigned int b55_61:7;      /* 55-61: - zeros                    */
+	unsigned int cs:1;          /* 62: sampling activation control   */
+	unsigned int b63:1;         /* 63: zero                          */
+	unsigned long interval;     /* 8-15: sampling interval           */
+	unsigned long tear;         /* 16-23: TEAR contents              */
+	unsigned long dear;         /* 24-31: DEAR contents              */
+	/* 32-63:                                                        */
+	unsigned long rsvrd1;       /* reserved                          */
+	unsigned long rsvrd2;       /* reserved                          */
+	unsigned long rsvrd3;       /* reserved                          */
+	unsigned long rsvrd4;       /* reserved                          */
+};
+
+struct hws_cpu_buffer {
+	unsigned long first_sdbt;       /* @ of 1st SDB-Table for this CP*/
+	unsigned long worker_entry;
+	unsigned long sample_overflow;  /* taken from SDB ...            */
+	struct hws_qsi_info_block qsi;
+	struct hws_ssctl_request_block ssctl;
+	struct work_struct worker;
+	atomic_t ext_params;
+	unsigned long req_alert;
+	unsigned long loss_of_sample_data;
+	unsigned long invalid_entry_address;
+	unsigned long incorrect_sdbt_entry;
+	unsigned long sample_auth_change_alert;
+	unsigned int finish:1;
+	unsigned int oom:1;
+	unsigned int stop_mode:1;
+};
+
+struct hws_data_entry {
+	unsigned int def:16;        /* 0-15  Data Entry Format           */
+	unsigned int R:4;           /* 16-19 reserved                    */
+	unsigned int U:4;           /* 20-23 Number of unique instruct.  */
+	unsigned int z:2;           /* zeros                             */
+	unsigned int T:1;           /* 26 PSW DAT mode                   */
+	unsigned int W:1;           /* 27 PSW wait state                 */
+	unsigned int P:1;           /* 28 PSW Problem state              */
+	unsigned int AS:2;          /* 29-30 PSW address-space control   */
+	unsigned int I:1;           /* 31 entry valid or invalid         */
+	unsigned int:16;
+	unsigned int prim_asn:16;   /* primary ASN                       */
+	unsigned long long ia;      /* Instruction Address               */
+	unsigned long long lpp;     /* Logical-Partition Program Param.  */
+	unsigned long long vpp;     /* Virtual-Machine Program Param.    */
+};
+
+struct hws_trailer_entry {
+	unsigned int f:1;           /* 0 - Block Full Indicator          */
+	unsigned int a:1;           /* 1 - Alert request control         */
+	unsigned long:62;           /* 2 - 63: Reserved                  */
+	unsigned long overflow;     /* 64 - sample Overflow count        */
+	unsigned long timestamp;    /* 16 - time-stamp                   */
+	unsigned long timestamp1;   /*                                   */
+	unsigned long reserved1;    /* 32 -Reserved                      */
+	unsigned long reserved2;    /*                                   */
+	unsigned long progusage1;   /* 48 - reserved for programming use */
+	unsigned long progusage2;   /*                                   */
+};
+
+int hwsampler_setup(void);
+int hwsampler_shutdown(void);
+int hwsampler_allocate(unsigned long sdbt, unsigned long sdb);
+int hwsampler_deallocate(void);
+long hwsampler_query_min_interval(void);
+long hwsampler_query_max_interval(void);
+int hwsampler_start_all(unsigned long interval);
+int hwsampler_stop_all(void);
+int hwsampler_deactivate(unsigned int cpu);
+int hwsampler_activate(unsigned int cpu);
+unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu);
+
+#endif /*HWSAMPLER_H_*/
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 7a995113b918..16c76def4a9d 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -4,23 +4,182 @@
  * S390 Version
  *   Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
+ *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
+ *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
  *
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2011 OProfile authors
  */
 
 #include <linux/oprofile.h>
 #include <linux/init.h>
 #include <linux/errno.h>
+#include <linux/oprofile.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+
+#include "../../../drivers/oprofile/oprof.h"
+#include "hwsampler.h"
+
+#define DEFAULT_INTERVAL	4096
+
+#define DEFAULT_SDBT_BLOCKS	1
+#define DEFAULT_SDB_BLOCKS	511
+
+static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
+static unsigned long oprofile_min_interval;
+static unsigned long oprofile_max_interval;
+
+static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
+static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
 
+static int hwsampler_file;
+static int hwsampler_running;	/* start_mutex must be held to change */
+
+static struct oprofile_operations timer_ops;
 
 extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
 
-int __init oprofile_arch_init(struct oprofile_operations* ops)
+static int oprofile_hwsampler_start(void)
+{
+	int retval;
+
+	hwsampler_running = hwsampler_file;
+
+	if (!hwsampler_running)
+		return timer_ops.start();
+
+	retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
+	if (retval)
+		return retval;
+
+	retval = hwsampler_start_all(oprofile_hw_interval);
+	if (retval)
+		hwsampler_deallocate();
+
+	return retval;
+}
+
+static void oprofile_hwsampler_stop(void)
+{
+	if (!hwsampler_running) {
+		timer_ops.stop();
+		return;
+	}
+
+	hwsampler_stop_all();
+	hwsampler_deallocate();
+	return;
+}
+
+static ssize_t hwsampler_read(struct file *file, char __user *buf,
+		size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
+}
+
+static ssize_t hwsampler_write(struct file *file, char const __user *buf,
+		size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	if (oprofile_started)
+		/*
+		 * save to do without locking as we set
+		 * hwsampler_running in start() when start_mutex is
+		 * held
+		 */
+		return -EBUSY;
+
+	hwsampler_file = val;
+
+	return count;
+}
+
+static const struct file_operations hwsampler_fops = {
+	.read		= hwsampler_read,
+	.write		= hwsampler_write,
+};
+
+static int oprofile_create_hwsampling_files(struct super_block *sb,
+						struct dentry *root)
+{
+	struct dentry *hw_dir;
+
+	/* reinitialize default values */
+	hwsampler_file = 1;
+
+	hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
+	if (!hw_dir)
+		return -EINVAL;
+
+	oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
+	oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
+				&oprofile_hw_interval);
+	oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
+				&oprofile_min_interval);
+	oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
+				&oprofile_max_interval);
+	oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
+				&oprofile_sdbt_blocks);
+
+	return 0;
+}
+
+static int oprofile_hwsampler_init(struct oprofile_operations *ops)
+{
+	if (hwsampler_setup())
+		return -ENODEV;
+
+	/*
+	 * create hwsampler files only if hwsampler_setup() succeeds.
+	 */
+	oprofile_min_interval = hwsampler_query_min_interval();
+	if (oprofile_min_interval < 0) {
+		oprofile_min_interval = 0;
+		return -ENODEV;
+	}
+	oprofile_max_interval = hwsampler_query_max_interval();
+	if (oprofile_max_interval < 0) {
+		oprofile_max_interval = 0;
+		return -ENODEV;
+	}
+
+	if (oprofile_timer_init(ops))
+		return -ENODEV;
+
+	printk(KERN_INFO "oprofile: using hardware sampling\n");
+
+	memcpy(&timer_ops, ops, sizeof(timer_ops));
+
+	ops->start = oprofile_hwsampler_start;
+	ops->stop = oprofile_hwsampler_stop;
+	ops->create_files = oprofile_create_hwsampling_files;
+
+	return 0;
+}
+
+static void oprofile_hwsampler_exit(void)
+{
+	oprofile_timer_exit();
+	hwsampler_shutdown();
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	ops->backtrace = s390_backtrace;
-	return -ENODEV;
+
+	return oprofile_hwsampler_init(ops);
 }
 
 void oprofile_arch_exit(void)
 {
+	oprofile_hwsampler_exit();
 }