summary refs log tree commit diff
path: root/drivers/hwtracing
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 20:55:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 20:55:59 -0700
commitf4dd029ee0b92b77769a1ac6dce03e829e74763e (patch)
treef918cc855a8171d68746ab61a32b9e61b024845b /drivers/hwtracing
parent974668417b74ec5f68df2411f53b3d3812565059 (diff)
parentcbbdc6082917a92da0fc07cee255111de16ed64a (diff)
downloadlinux-f4dd029ee0b92b77769a1ac6dce03e829e74763e.tar.gz
Merge tag 'char-misc-4.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc
Pull char/misc updates from Greg KH:
 "Here is the "big" char/misc driver patchset for 4.13-rc1.

  Lots of stuff in here, a large thunderbolt update, w1 driver header
  reorg, the new mux driver subsystem, google firmware driver updates,
  and a raft of other smaller things. Full details in the shortlog.

  All of these have been in linux-next for a while with the only
  reported issue being a merge problem with this tree and the jc-docs
  tree in the w1 documentation area"

* tag 'char-misc-4.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc: (147 commits)
  misc: apds990x: Use sysfs_match_string() helper
  mei: drop unreachable code in mei_start
  mei: validate the message header only in first fragment.
  DocBook: w1: Update W1 file locations and names in DocBook
  mux: adg792a: always require I2C support
  nvmem: rockchip-efuse: add support for rk322x-efuse
  nvmem: core: add locking to nvmem_find_cell
  nvmem: core: Call put_device() in nvmem_unregister()
  nvmem: core: fix leaks on registration errors
  nvmem: correct Broadcom OTP controller driver writes
  w1: Add subsystem kernel public interface
  drivers/fsi: Add module license to core driver
  drivers/fsi: Use asynchronous slave mode
  drivers/fsi: Add hub master support
  drivers/fsi: Add SCOM FSI client device driver
  drivers/fsi/gpio: Add tracepoints for GPIO master
  drivers/fsi: Add GPIO based FSI master
  drivers/fsi: Document FSI master sysfs files in ABI
  drivers/fsi: Add error handling for slave
  drivers/fsi: Add tracepoints for low-level operations
  ...
Diffstat (limited to 'drivers/hwtracing')
-rw-r--r--drivers/hwtracing/coresight/Kconfig14
-rw-r--r--drivers/hwtracing/coresight/Makefile1
-rw-r--r--drivers/hwtracing/coresight/coresight-cpu-debug.c700
-rw-r--r--drivers/hwtracing/coresight/coresight-etb10.c7
-rw-r--r--drivers/hwtracing/coresight/coresight-etm-perf.c3
-rw-r--r--drivers/hwtracing/coresight/coresight-tmc-etf.c25
-rw-r--r--drivers/hwtracing/coresight/coresight-tmc.c7
-rw-r--r--drivers/hwtracing/coresight/coresight.c34
-rw-r--r--drivers/hwtracing/coresight/of_coresight.c47
9 files changed, 800 insertions, 38 deletions
diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig
index 130cb2114059..8d55d6d79015 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -89,4 +89,18 @@ config CORESIGHT_STM
 	  logging useful software events or data coming from various entities
 	  in the system, possibly running different OSs
 
+config CORESIGHT_CPU_DEBUG
+	tristate "CoreSight CPU Debug driver"
+	depends on ARM || ARM64
+	depends on DEBUG_FS
+	help
+	  This driver provides support for coresight debugging module. This
+	  is primarily used to dump sample-based profiling registers when
+	  system triggers panic, the driver will parse context registers so
+	  can quickly get to know program counter (PC), secure state,
+	  exception level, etc. Before use debugging functionality, platform
+	  needs to ensure the clock domain and power domain are enabled
+	  properly, please refer Documentation/trace/coresight-cpu-debug.txt
+	  for detailed description and the example for usage.
+
 endif
diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile
index af480d9c1441..433d59025eb6 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \
 					coresight-etm4x-sysfs.o
 obj-$(CONFIG_CORESIGHT_QCOM_REPLICATOR) += coresight-replicator-qcom.o
 obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
+obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c
new file mode 100644
index 000000000000..64a77e00eaa6
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c
@@ -0,0 +1,700 @@
+/*
+ * Copyright (c) 2017 Linaro Limited. All rights reserved.
+ *
+ * Author: Leo Yan <leo.yan@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <linux/cpu.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "coresight-priv.h"
+
+#define EDPCSR				0x0A0
+#define EDCIDSR				0x0A4
+#define EDVIDSR				0x0A8
+#define EDPCSR_HI			0x0AC
+#define EDOSLAR				0x300
+#define EDPRCR				0x310
+#define EDPRSR				0x314
+#define EDDEVID1			0xFC4
+#define EDDEVID				0xFC8
+
+#define EDPCSR_PROHIBITED		0xFFFFFFFF
+
+/* bits definition for EDPCSR */
+#define EDPCSR_THUMB			BIT(0)
+#define EDPCSR_ARM_INST_MASK		GENMASK(31, 2)
+#define EDPCSR_THUMB_INST_MASK		GENMASK(31, 1)
+
+/* bits definition for EDPRCR */
+#define EDPRCR_COREPURQ			BIT(3)
+#define EDPRCR_CORENPDRQ		BIT(0)
+
+/* bits definition for EDPRSR */
+#define EDPRSR_DLK			BIT(6)
+#define EDPRSR_PU			BIT(0)
+
+/* bits definition for EDVIDSR */
+#define EDVIDSR_NS			BIT(31)
+#define EDVIDSR_E2			BIT(30)
+#define EDVIDSR_E3			BIT(29)
+#define EDVIDSR_HV			BIT(28)
+#define EDVIDSR_VMID			GENMASK(7, 0)
+
+/*
+ * bits definition for EDDEVID1:PSCROffset
+ *
+ * NOTE: armv8 and armv7 have different definition for the register,
+ * so consolidate the bits definition as below:
+ *
+ * 0b0000 - Sample offset applies based on the instruction state, we
+ *          rely on EDDEVID to check if EDPCSR is implemented or not
+ * 0b0001 - No offset applies.
+ * 0b0010 - No offset applies, but do not use in AArch32 mode
+ *
+ */
+#define EDDEVID1_PCSR_OFFSET_MASK	GENMASK(3, 0)
+#define EDDEVID1_PCSR_OFFSET_INS_SET	(0x0)
+#define EDDEVID1_PCSR_NO_OFFSET_DIS_AARCH32	(0x2)
+
+/* bits definition for EDDEVID */
+#define EDDEVID_PCSAMPLE_MODE		GENMASK(3, 0)
+#define EDDEVID_IMPL_EDPCSR		(0x1)
+#define EDDEVID_IMPL_EDPCSR_EDCIDSR	(0x2)
+#define EDDEVID_IMPL_FULL		(0x3)
+
+#define DEBUG_WAIT_SLEEP		1000
+#define DEBUG_WAIT_TIMEOUT		32000
+
+struct debug_drvdata {
+	void __iomem	*base;
+	struct device	*dev;
+	int		cpu;
+
+	bool		edpcsr_present;
+	bool		edcidsr_present;
+	bool		edvidsr_present;
+	bool		pc_has_offset;
+
+	u32		edpcsr;
+	u32		edpcsr_hi;
+	u32		edprsr;
+	u32		edvidsr;
+	u32		edcidsr;
+};
+
+static DEFINE_MUTEX(debug_lock);
+static DEFINE_PER_CPU(struct debug_drvdata *, debug_drvdata);
+static int debug_count;
+static struct dentry *debug_debugfs_dir;
+
+static bool debug_enable;
+module_param_named(enable, debug_enable, bool, 0600);
+MODULE_PARM_DESC(enable, "Control to enable coresight CPU debug functionality");
+
+static void debug_os_unlock(struct debug_drvdata *drvdata)
+{
+	/* Unlocks the debug registers */
+	writel_relaxed(0x0, drvdata->base + EDOSLAR);
+
+	/* Make sure the registers are unlocked before accessing */
+	wmb();
+}
+
+/*
+ * According to ARM DDI 0487A.k, before access external debug
+ * registers should firstly check the access permission; if any
+ * below condition has been met then cannot access debug
+ * registers to avoid lockup issue:
+ *
+ * - CPU power domain is powered off;
+ * - The OS Double Lock is locked;
+ *
+ * By checking EDPRSR can get to know if meet these conditions.
+ */
+static bool debug_access_permitted(struct debug_drvdata *drvdata)
+{
+	/* CPU is powered off */
+	if (!(drvdata->edprsr & EDPRSR_PU))
+		return false;
+
+	/* The OS Double Lock is locked */
+	if (drvdata->edprsr & EDPRSR_DLK)
+		return false;
+
+	return true;
+}
+
+static void debug_force_cpu_powered_up(struct debug_drvdata *drvdata)
+{
+	u32 edprcr;
+
+try_again:
+
+	/*
+	 * Send request to power management controller and assert
+	 * DBGPWRUPREQ signal; if power management controller has
+	 * sane implementation, it should enable CPU power domain
+	 * in case CPU is in low power state.
+	 */
+	edprcr = readl_relaxed(drvdata->base + EDPRCR);
+	edprcr |= EDPRCR_COREPURQ;
+	writel_relaxed(edprcr, drvdata->base + EDPRCR);
+
+	/* Wait for CPU to be powered up (timeout~=32ms) */
+	if (readx_poll_timeout_atomic(readl_relaxed, drvdata->base + EDPRSR,
+			drvdata->edprsr, (drvdata->edprsr & EDPRSR_PU),
+			DEBUG_WAIT_SLEEP, DEBUG_WAIT_TIMEOUT)) {
+		/*
+		 * Unfortunately the CPU cannot be powered up, so return
+		 * back and later has no permission to access other
+		 * registers. For this case, should disable CPU low power
+		 * states to ensure CPU power domain is enabled!
+		 */
+		dev_err(drvdata->dev, "%s: power up request for CPU%d failed\n",
+			__func__, drvdata->cpu);
+		return;
+	}
+
+	/*
+	 * At this point the CPU is powered up, so set the no powerdown
+	 * request bit so we don't lose power and emulate power down.
+	 */
+	edprcr = readl_relaxed(drvdata->base + EDPRCR);
+	edprcr |= EDPRCR_COREPURQ | EDPRCR_CORENPDRQ;
+	writel_relaxed(edprcr, drvdata->base + EDPRCR);
+
+	drvdata->edprsr = readl_relaxed(drvdata->base + EDPRSR);
+
+	/* The core power domain got switched off on use, try again */
+	if (unlikely(!(drvdata->edprsr & EDPRSR_PU)))
+		goto try_again;
+}
+
+static void debug_read_regs(struct debug_drvdata *drvdata)
+{
+	u32 save_edprcr;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Unlock os lock */
+	debug_os_unlock(drvdata);
+
+	/* Save EDPRCR register */
+	save_edprcr = readl_relaxed(drvdata->base + EDPRCR);
+
+	/*
+	 * Ensure CPU power domain is enabled to let registers
+	 * are accessiable.
+	 */
+	debug_force_cpu_powered_up(drvdata);
+
+	if (!debug_access_permitted(drvdata))
+		goto out;
+
+	drvdata->edpcsr = readl_relaxed(drvdata->base + EDPCSR);
+
+	/*
+	 * As described in ARM DDI 0487A.k, if the processing
+	 * element (PE) is in debug state, or sample-based
+	 * profiling is prohibited, EDPCSR reads as 0xFFFFFFFF;
+	 * EDCIDSR, EDVIDSR and EDPCSR_HI registers also become
+	 * UNKNOWN state. So directly bail out for this case.
+	 */
+	if (drvdata->edpcsr == EDPCSR_PROHIBITED)
+		goto out;
+
+	/*
+	 * A read of the EDPCSR normally has the side-effect of
+	 * indirectly writing to EDCIDSR, EDVIDSR and EDPCSR_HI;
+	 * at this point it's safe to read value from them.
+	 */
+	if (IS_ENABLED(CONFIG_64BIT))
+		drvdata->edpcsr_hi = readl_relaxed(drvdata->base + EDPCSR_HI);
+
+	if (drvdata->edcidsr_present)
+		drvdata->edcidsr = readl_relaxed(drvdata->base + EDCIDSR);
+
+	if (drvdata->edvidsr_present)
+		drvdata->edvidsr = readl_relaxed(drvdata->base + EDVIDSR);
+
+out:
+	/* Restore EDPRCR register */
+	writel_relaxed(save_edprcr, drvdata->base + EDPRCR);
+
+	CS_LOCK(drvdata->base);
+}
+
+#ifdef CONFIG_64BIT
+static unsigned long debug_adjust_pc(struct debug_drvdata *drvdata)
+{
+	return (unsigned long)drvdata->edpcsr_hi << 32 |
+	       (unsigned long)drvdata->edpcsr;
+}
+#else
+static unsigned long debug_adjust_pc(struct debug_drvdata *drvdata)
+{
+	unsigned long arm_inst_offset = 0, thumb_inst_offset = 0;
+	unsigned long pc;
+
+	pc = (unsigned long)drvdata->edpcsr;
+
+	if (drvdata->pc_has_offset) {
+		arm_inst_offset = 8;
+		thumb_inst_offset = 4;
+	}
+
+	/* Handle thumb instruction */
+	if (pc & EDPCSR_THUMB) {
+		pc = (pc & EDPCSR_THUMB_INST_MASK) - thumb_inst_offset;
+		return pc;
+	}
+
+	/*
+	 * Handle arm instruction offset, if the arm instruction
+	 * is not 4 byte alignment then it's possible the case
+	 * for implementation defined; keep original value for this
+	 * case and print info for notice.
+	 */
+	if (pc & BIT(1))
+		dev_emerg(drvdata->dev,
+			  "Instruction offset is implementation defined\n");
+	else
+		pc = (pc & EDPCSR_ARM_INST_MASK) - arm_inst_offset;
+
+	return pc;
+}
+#endif
+
+static void debug_dump_regs(struct debug_drvdata *drvdata)
+{
+	struct device *dev = drvdata->dev;
+	unsigned long pc;
+
+	dev_emerg(dev, " EDPRSR:  %08x (Power:%s DLK:%s)\n",
+		  drvdata->edprsr,
+		  drvdata->edprsr & EDPRSR_PU ? "On" : "Off",
+		  drvdata->edprsr & EDPRSR_DLK ? "Lock" : "Unlock");
+
+	if (!debug_access_permitted(drvdata)) {
+		dev_emerg(dev, "No permission to access debug registers!\n");
+		return;
+	}
+
+	if (drvdata->edpcsr == EDPCSR_PROHIBITED) {
+		dev_emerg(dev, "CPU is in Debug state or profiling is prohibited!\n");
+		return;
+	}
+
+	pc = debug_adjust_pc(drvdata);
+	dev_emerg(dev, " EDPCSR:  [<%p>] %pS\n", (void *)pc, (void *)pc);
+
+	if (drvdata->edcidsr_present)
+		dev_emerg(dev, " EDCIDSR: %08x\n", drvdata->edcidsr);
+
+	if (drvdata->edvidsr_present)
+		dev_emerg(dev, " EDVIDSR: %08x (State:%s Mode:%s Width:%dbits VMID:%x)\n",
+			  drvdata->edvidsr,
+			  drvdata->edvidsr & EDVIDSR_NS ?
+			  "Non-secure" : "Secure",
+			  drvdata->edvidsr & EDVIDSR_E3 ? "EL3" :
+				(drvdata->edvidsr & EDVIDSR_E2 ?
+				 "EL2" : "EL1/0"),
+			  drvdata->edvidsr & EDVIDSR_HV ? 64 : 32,
+			  drvdata->edvidsr & (u32)EDVIDSR_VMID);
+}
+
+static void debug_init_arch_data(void *info)
+{
+	struct debug_drvdata *drvdata = info;
+	u32 mode, pcsr_offset;
+	u32 eddevid, eddevid1;
+
+	CS_UNLOCK(drvdata->base);
+
+	/* Read device info */
+	eddevid  = readl_relaxed(drvdata->base + EDDEVID);
+	eddevid1 = readl_relaxed(drvdata->base + EDDEVID1);
+
+	CS_LOCK(drvdata->base);
+
+	/* Parse implementation feature */
+	mode = eddevid & EDDEVID_PCSAMPLE_MODE;
+	pcsr_offset = eddevid1 & EDDEVID1_PCSR_OFFSET_MASK;
+
+	drvdata->edpcsr_present  = false;
+	drvdata->edcidsr_present = false;
+	drvdata->edvidsr_present = false;
+	drvdata->pc_has_offset   = false;
+
+	switch (mode) {
+	case EDDEVID_IMPL_FULL:
+		drvdata->edvidsr_present = true;
+		/* Fall through */
+	case EDDEVID_IMPL_EDPCSR_EDCIDSR:
+		drvdata->edcidsr_present = true;
+		/* Fall through */
+	case EDDEVID_IMPL_EDPCSR:
+		/*
+		 * In ARM DDI 0487A.k, the EDDEVID1.PCSROffset is used to
+		 * define if has the offset for PC sampling value; if read
+		 * back EDDEVID1.PCSROffset == 0x2, then this means the debug
+		 * module does not sample the instruction set state when
+		 * armv8 CPU in AArch32 state.
+		 */
+		drvdata->edpcsr_present =
+			((IS_ENABLED(CONFIG_64BIT) && pcsr_offset != 0) ||
+			 (pcsr_offset != EDDEVID1_PCSR_NO_OFFSET_DIS_AARCH32));
+
+		drvdata->pc_has_offset =
+			(pcsr_offset == EDDEVID1_PCSR_OFFSET_INS_SET);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * Dump out information on panic.
+ */
+static int debug_notifier_call(struct notifier_block *self,
+			       unsigned long v, void *p)
+{
+	int cpu;
+	struct debug_drvdata *drvdata;
+
+	mutex_lock(&debug_lock);
+
+	/* Bail out if the functionality is disabled */
+	if (!debug_enable)
+		goto skip_dump;
+
+	pr_emerg("ARM external debug module:\n");
+
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		dev_emerg(drvdata->dev, "CPU[%d]:\n", drvdata->cpu);
+
+		debug_read_regs(drvdata);
+		debug_dump_regs(drvdata);
+	}
+
+skip_dump:
+	mutex_unlock(&debug_lock);
+	return 0;
+}
+
+static struct notifier_block debug_notifier = {
+	.notifier_call = debug_notifier_call,
+};
+
+static int debug_enable_func(void)
+{
+	struct debug_drvdata *drvdata;
+	int cpu, ret = 0;
+	cpumask_t mask;
+
+	/*
+	 * Use cpumask to track which debug power domains have
+	 * been powered on and use it to handle failure case.
+	 */
+	cpumask_clear(&mask);
+
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		ret = pm_runtime_get_sync(drvdata->dev);
+		if (ret < 0)
+			goto err;
+		else
+			cpumask_set_cpu(cpu, &mask);
+	}
+
+	return 0;
+
+err:
+	/*
+	 * If pm_runtime_get_sync() has failed, need rollback on
+	 * all the other CPUs that have been enabled before that.
+	 */
+	for_each_cpu(cpu, &mask) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		pm_runtime_put_noidle(drvdata->dev);
+	}
+
+	return ret;
+}
+
+static int debug_disable_func(void)
+{
+	struct debug_drvdata *drvdata;
+	int cpu, ret, err = 0;
+
+	/*
+	 * Disable debug power domains, records the error and keep
+	 * circling through all other CPUs when an error has been
+	 * encountered.
+	 */
+	for_each_possible_cpu(cpu) {
+		drvdata = per_cpu(debug_drvdata, cpu);
+		if (!drvdata)
+			continue;
+
+		ret = pm_runtime_put(drvdata->dev);
+		if (ret < 0)
+			err = ret;
+	}
+
+	return err;
+}
+
+static ssize_t debug_func_knob_write(struct file *f,
+		const char __user *buf, size_t count, loff_t *ppos)
+{
+	u8 val;
+	int ret;
+
+	ret = kstrtou8_from_user(buf, count, 2, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&debug_lock);
+
+	if (val == debug_enable)
+		goto out;
+
+	if (val)
+		ret = debug_enable_func();
+	else
+		ret = debug_disable_func();
+
+	if (ret) {
+		pr_err("%s: unable to %s debug function: %d\n",
+		       __func__, val ? "enable" : "disable", ret);
+		goto err;
+	}
+
+	debug_enable = val;
+out:
+	ret = count;
+err:
+	mutex_unlock(&debug_lock);
+	return ret;
+}
+
+static ssize_t debug_func_knob_read(struct file *f,
+		char __user *ubuf, size_t count, loff_t *ppos)
+{
+	ssize_t ret;
+	char buf[3];
+
+	mutex_lock(&debug_lock);
+	snprintf(buf, sizeof(buf), "%d\n", debug_enable);
+	mutex_unlock(&debug_lock);
+
+	ret = simple_read_from_buffer(ubuf, count, ppos, buf, sizeof(buf));
+	return ret;
+}
+
+static const struct file_operations debug_func_knob_fops = {
+	.open	= simple_open,
+	.read	= debug_func_knob_read,
+	.write	= debug_func_knob_write,
+};
+
+static int debug_func_init(void)
+{
+	struct dentry *file;
+	int ret;
+
+	/* Create debugfs node */
+	debug_debugfs_dir = debugfs_create_dir("coresight_cpu_debug", NULL);
+	if (!debug_debugfs_dir) {
+		pr_err("%s: unable to create debugfs directory\n", __func__);
+		return -ENOMEM;
+	}
+
+	file = debugfs_create_file("enable", 0644, debug_debugfs_dir, NULL,
+				   &debug_func_knob_fops);
+	if (!file) {
+		pr_err("%s: unable to create enable knob file\n", __func__);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Register function to be called for panic */
+	ret = atomic_notifier_chain_register(&panic_notifier_list,
+					     &debug_notifier);
+	if (ret) {
+		pr_err("%s: unable to register notifier: %d\n",
+		       __func__, ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	debugfs_remove_recursive(debug_debugfs_dir);
+	return ret;
+}
+
+static void debug_func_exit(void)
+{
+	atomic_notifier_chain_unregister(&panic_notifier_list,
+					 &debug_notifier);
+	debugfs_remove_recursive(debug_debugfs_dir);
+}
+
+static int debug_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	void __iomem *base;
+	struct device *dev = &adev->dev;
+	struct debug_drvdata *drvdata;
+	struct resource *res = &adev->res;
+	struct device_node *np = adev->dev.of_node;
+	int ret;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	drvdata->cpu = np ? of_coresight_get_cpu(np) : 0;
+	if (per_cpu(debug_drvdata, drvdata->cpu)) {
+		dev_err(dev, "CPU%d drvdata has already been initialized\n",
+			drvdata->cpu);
+		return -EBUSY;
+	}
+
+	drvdata->dev = &adev->dev;
+	amba_set_drvdata(adev, drvdata);
+
+	/* Validity for the resource is already checked by the AMBA core */
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	drvdata->base = base;
+
+	get_online_cpus();
+	per_cpu(debug_drvdata, drvdata->cpu) = drvdata;
+	ret = smp_call_function_single(drvdata->cpu, debug_init_arch_data,
+				       drvdata, 1);
+	put_online_cpus();
+
+	if (ret) {
+		dev_err(dev, "CPU%d debug arch init failed\n", drvdata->cpu);
+		goto err;
+	}
+
+	if (!drvdata->edpcsr_present) {
+		dev_err(dev, "CPU%d sample-based profiling isn't implemented\n",
+			drvdata->cpu);
+		ret = -ENXIO;
+		goto err;
+	}
+
+	if (!debug_count++) {
+		ret = debug_func_init();
+		if (ret)
+			goto err_func_init;
+	}
+
+	mutex_lock(&debug_lock);
+	/* Turn off debug power domain if debugging is disabled */
+	if (!debug_enable)
+		pm_runtime_put(dev);
+	mutex_unlock(&debug_lock);
+
+	dev_info(dev, "Coresight debug-CPU%d initialized\n", drvdata->cpu);
+	return 0;
+
+err_func_init:
+	debug_count--;
+err:
+	per_cpu(debug_drvdata, drvdata->cpu) = NULL;
+	return ret;
+}
+
+static int debug_remove(struct amba_device *adev)
+{
+	struct device *dev = &adev->dev;
+	struct debug_drvdata *drvdata = amba_get_drvdata(adev);
+
+	per_cpu(debug_drvdata, drvdata->cpu) = NULL;
+
+	mutex_lock(&debug_lock);
+	/* Turn off debug power domain before rmmod the module */
+	if (debug_enable)
+		pm_runtime_put(dev);
+	mutex_unlock(&debug_lock);
+
+	if (!--debug_count)
+		debug_func_exit();
+
+	return 0;
+}
+
+static struct amba_id debug_ids[] = {
+	{       /* Debug for Cortex-A53 */
+		.id	= 0x000bbd03,
+		.mask	= 0x000fffff,
+	},
+	{       /* Debug for Cortex-A57 */
+		.id	= 0x000bbd07,
+		.mask	= 0x000fffff,
+	},
+	{       /* Debug for Cortex-A72 */
+		.id	= 0x000bbd08,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver debug_driver = {
+	.drv = {
+		.name   = "coresight-cpu-debug",
+		.suppress_bind_attrs = true,
+	},
+	.probe		= debug_probe,
+	.remove		= debug_remove,
+	.id_table	= debug_ids,
+};
+
+module_amba_driver(debug_driver);
+
+MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
+MODULE_DESCRIPTION("ARM Coresight CPU Debug Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 979ea6ec7902..d5b96423e1a5 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -375,7 +375,7 @@ static void etb_update_buffer(struct coresight_device *csdev,
 
 	/*
 	 * Entries should be aligned to the frame size.  If they are not
-	 * go back to the last alignement point to give decoding tools a
+	 * go back to the last alignment point to give decoding tools a
 	 * chance to fix things.
 	 */
 	if (write_ptr % ETB_FRAME_SIZE_WORDS) {
@@ -675,11 +675,8 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id)
 
 	drvdata->buf = devm_kzalloc(dev,
 				    drvdata->buffer_depth * 4, GFP_KERNEL);
-	if (!drvdata->buf) {
-		dev_err(dev, "Failed to allocate %u bytes for buffer data\n",
-			drvdata->buffer_depth * 4);
+	if (!drvdata->buf)
 		return -ENOMEM;
-	}
 
 	desc.type = CORESIGHT_DEV_TYPE_SINK;
 	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 288a423c1b27..8f546f59a3fd 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -201,6 +201,7 @@ static void *etm_setup_aux(int event_cpu, void **pages,
 	event_data = alloc_event_data(event_cpu);
 	if (!event_data)
 		return NULL;
+	INIT_WORK(&event_data->work, free_event_data);
 
 	/*
 	 * In theory nothing prevent tracers in a trace session from being
@@ -217,8 +218,6 @@ static void *etm_setup_aux(int event_cpu, void **pages,
 	if (!sink)
 		goto err;
 
-	INIT_WORK(&event_data->work, free_event_data);
-
 	mask = &event_data->mask;
 
 	/* Setup the path for each CPU in a trace session */
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index aec61a6d5c63..e3b9fb82eb8d 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -166,9 +166,6 @@ out:
 	if (!used)
 		kfree(buf);
 
-	if (!ret)
-		dev_info(drvdata->dev, "TMC-ETB/ETF enabled\n");
-
 	return ret;
 }
 
@@ -204,15 +201,27 @@ out:
 
 static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
 {
+	int ret;
+	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+
 	switch (mode) {
 	case CS_MODE_SYSFS:
-		return tmc_enable_etf_sink_sysfs(csdev);
+		ret = tmc_enable_etf_sink_sysfs(csdev);
+		break;
 	case CS_MODE_PERF:
-		return tmc_enable_etf_sink_perf(csdev);
+		ret = tmc_enable_etf_sink_perf(csdev);
+		break;
+	/* We shouldn't be here */
+	default:
+		ret = -EINVAL;
+		break;
 	}
 
-	/* We shouldn't be here */
-	return -EINVAL;
+	if (ret)
+		return ret;
+
+	dev_info(drvdata->dev, "TMC-ETB/ETF enabled\n");
+	return 0;
 }
 
 static void tmc_disable_etf_sink(struct coresight_device *csdev)
@@ -273,7 +282,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev,
 	drvdata->mode = CS_MODE_DISABLED;
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
-	dev_info(drvdata->dev, "TMC disabled\n");
+	dev_info(drvdata->dev, "TMC-ETF disabled\n");
 }
 
 static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu,
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index d8517d2a968c..864488793f09 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -362,6 +362,13 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
 		desc.type = CORESIGHT_DEV_TYPE_SINK;
 		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
 		desc.ops = &tmc_etr_cs_ops;
+		/*
+		 * ETR configuration uses a 40-bit AXI master in place of
+		 * the embedded SRAM of ETB/ETF.
+		 */
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
+		if (ret)
+			goto out;
 	} else {
 		desc.type = CORESIGHT_DEV_TYPE_LINKSINK;
 		desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 0c37356e417c..6a0202b7384f 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -253,14 +253,22 @@ static int coresight_enable_source(struct coresight_device *csdev, u32 mode)
 	return 0;
 }
 
-static void coresight_disable_source(struct coresight_device *csdev)
+/**
+ *  coresight_disable_source - Drop the reference count by 1 and disable
+ *  the device if there are no users left.
+ *
+ *  @csdev - The coresight device to disable
+ *
+ *  Returns true if the device has been disabled.
+ */
+static bool coresight_disable_source(struct coresight_device *csdev)
 {
 	if (atomic_dec_return(csdev->refcnt) == 0) {
-		if (source_ops(csdev)->disable) {
+		if (source_ops(csdev)->disable)
 			source_ops(csdev)->disable(csdev, NULL);
-			csdev->enable = false;
-		}
+		csdev->enable = false;
 	}
+	return !csdev->enable;
 }
 
 void coresight_disable_path(struct list_head *path)
@@ -550,6 +558,9 @@ int coresight_enable(struct coresight_device *csdev)
 	int cpu, ret = 0;
 	struct coresight_device *sink;
 	struct list_head *path;
+	enum coresight_dev_subtype_source subtype;
+
+	subtype = csdev->subtype.source_subtype;
 
 	mutex_lock(&coresight_mutex);
 
@@ -557,8 +568,16 @@ int coresight_enable(struct coresight_device *csdev)
 	if (ret)
 		goto out;
 
-	if (csdev->enable)
+	if (csdev->enable) {
+		/*
+		 * There could be multiple applications driving the software
+		 * source. So keep the refcount for each such user when the
+		 * source is already enabled.
+		 */
+		if (subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE)
+			atomic_inc(csdev->refcnt);
 		goto out;
+	}
 
 	/*
 	 * Search for a valid sink for this session but don't reset the
@@ -585,7 +604,7 @@ int coresight_enable(struct coresight_device *csdev)
 	if (ret)
 		goto err_source;
 
-	switch (csdev->subtype.source_subtype) {
+	switch (subtype) {
 	case CORESIGHT_DEV_SUBTYPE_SOURCE_PROC:
 		/*
 		 * When working from sysFS it is important to keep track
@@ -629,7 +648,7 @@ void coresight_disable(struct coresight_device *csdev)
 	if (ret)
 		goto out;
 
-	if (!csdev->enable)
+	if (!csdev->enable || !coresight_disable_source(csdev))
 		goto out;
 
 	switch (csdev->subtype.source_subtype) {
@@ -647,7 +666,6 @@ void coresight_disable(struct coresight_device *csdev)
 		break;
 	}
 
-	coresight_disable_source(csdev);
 	coresight_disable_path(path);
 	coresight_release_path(path);
 
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 09142e99e915..a18794128bf8 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -52,7 +52,7 @@ of_coresight_get_endpoint_device(struct device_node *endpoint)
 			       endpoint, of_dev_node_match);
 }
 
-static void of_coresight_get_ports(struct device_node *node,
+static void of_coresight_get_ports(const struct device_node *node,
 				   int *nr_inport, int *nr_outport)
 {
 	struct device_node *ep = NULL;
@@ -101,14 +101,40 @@ static int of_coresight_alloc_memory(struct device *dev,
 	return 0;
 }
 
-struct coresight_platform_data *of_get_coresight_platform_data(
-				struct device *dev, struct device_node *node)
+int of_coresight_get_cpu(const struct device_node *node)
 {
-	int i = 0, ret = 0, cpu;
+	int cpu;
+	bool found;
+	struct device_node *dn, *np;
+
+	dn = of_parse_phandle(node, "cpu", 0);
+
+	/* Affinity defaults to CPU0 */
+	if (!dn)
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		np = of_cpu_device_node_get(cpu);
+		found = (dn == np);
+		of_node_put(np);
+		if (found)
+			break;
+	}
+	of_node_put(dn);
+
+	/* Affinity to CPU0 if no cpu nodes are found */
+	return found ? cpu : 0;
+}
+EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
+
+struct coresight_platform_data *
+of_get_coresight_platform_data(struct device *dev,
+			       const struct device_node *node)
+{
+	int i = 0, ret = 0;
 	struct coresight_platform_data *pdata;
 	struct of_endpoint endpoint, rendpoint;
 	struct device *rdev;
-	struct device_node *dn;
 	struct device_node *ep = NULL;
 	struct device_node *rparent = NULL;
 	struct device_node *rport = NULL;
@@ -175,16 +201,7 @@ struct coresight_platform_data *of_get_coresight_platform_data(
 		} while (ep);
 	}
 
-	/* Affinity defaults to CPU0 */
-	pdata->cpu = 0;
-	dn = of_parse_phandle(node, "cpu", 0);
-	for (cpu = 0; dn && cpu < nr_cpu_ids; cpu++) {
-		if (dn == of_get_cpu_node(cpu, NULL)) {
-			pdata->cpu = cpu;
-			break;
-		}
-	}
-	of_node_put(dn);
+	pdata->cpu = of_coresight_get_cpu(node);
 
 	return pdata;
 }