65 files changed, 4332 insertions, 2821 deletions
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index ba78e7c2a069..4bc7287806de 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -96,7 +96,7 @@ new	- new frequency
 For details about OPP, see Documentation/power/opp.txt
 
 dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
-	cpufreq_frequency_table_cpuinfo which is provided with the list of
+	cpufreq_table_validate_and_show() which is provided with the list of
 	frequencies that are available for operation. This function provides
 	a ready to use conversion routine to translate the OPP layer's internal
 	information about the available frequencies into a format readily
@@ -110,7 +110,7 @@ dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
 		/* Do things */
 		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
 		if (!r)
-			cpufreq_frequency_table_cpuinfo(policy, freq_table);
+			cpufreq_table_validate_and_show(policy, freq_table);
 		/* Do other things */
 	 }
 
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 14f4e6336d88..772b94fde264 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -231,7 +231,7 @@ if you want to skip one entry in the table, set the frequency to
 CPUFREQ_ENTRY_INVALID. The entries don't need to be in ascending
 order.
 
-By calling cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
+By calling cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
 					struct cpufreq_frequency_table *table);
 the cpuinfo.min_freq and cpuinfo.max_freq values are detected, and
 policy->min and policy->max are set to the same values. This is
@@ -244,14 +244,12 @@ policy->max, and all other criteria are met. This is helpful for the
 ->verify call.
 
 int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-                                   struct cpufreq_frequency_table *table,
                                    unsigned int target_freq,
-                                   unsigned int relation,
-                                   unsigned int *index);
+                                   unsigned int relation);
 
 is the corresponding frequency table helper for the ->target
-stage. Just pass the values to this function, and the unsigned int
-index returns the number of the frequency table entry which contains
+stage. Just pass the values to this function, and this function
+returns the number of the frequency table entry which contains
 the frequency the CPU shall be set to.
 
 The following macros can be used as iterators over cpufreq_frequency_table:
diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
index 0a94224ad296..9e3c3b33514c 100644
--- a/Documentation/cpu-freq/pcc-cpufreq.txt
+++ b/Documentation/cpu-freq/pcc-cpufreq.txt
@@ -159,8 +159,8 @@ to be strictly associated with a P-state.
 
 2.2 cpuinfo_transition_latency:
 -------------------------------
-The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
-does not include a field to expose this value currently.
+The cpuinfo_transition_latency field is 0. The PCC specification does
+not include a field to expose this value currently.
 
 2.3 cpuinfo_cur_freq:
 ---------------------
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 724970a25666..118538b548f6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3598,6 +3598,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 				present during boot.
 		nocompress	Don't compress/decompress hibernation images.
 		no		Disable hibernation and resume.
+		protect_image	Turn on image protection during restoration
+				(that will set all pages holding image data
+				during restoration read-only).
 
 	retain_initrd	[RAM] Keep initrd memory after extraction
 
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 82607d621aca..88301e53f085 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -85,61 +85,57 @@ static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
 	cancel_delayed_work_sync(&info->work);
 }
 
-static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event)
+static int spu_gov_start(struct cpufreq_policy *policy)
 {
 	unsigned int cpu = policy->cpu;
-	struct spu_gov_info_struct *info, *affected_info;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	struct spu_gov_info_struct *affected_info;
 	int i;
-	int ret = 0;
 
-	info = &per_cpu(spu_gov_info, cpu);
-
-	switch (event) {
-	case CPUFREQ_GOV_START:
-		if (!cpu_online(cpu)) {
-			printk(KERN_ERR "cpu %d is not online\n", cpu);
-			ret = -EINVAL;
-			break;
-		}
+	if (!cpu_online(cpu)) {
+		printk(KERN_ERR "cpu %d is not online\n", cpu);
+		return -EINVAL;
+	}
 
-		if (!policy->cur) {
-			printk(KERN_ERR "no cpu specified in policy\n");
-			ret = -EINVAL;
-			break;
-		}
+	if (!policy->cur) {
+		printk(KERN_ERR "no cpu specified in policy\n");
+		return -EINVAL;
+	}
 
-		/* initialize spu_gov_info for all affected cpus */
-		for_each_cpu(i, policy->cpus) {
-			affected_info = &per_cpu(spu_gov_info, i);
-			affected_info->policy = policy;
-		}
+	/* initialize spu_gov_info for all affected cpus */
+	for_each_cpu(i, policy->cpus) {
+		affected_info = &per_cpu(spu_gov_info, i);
+		affected_info->policy = policy;
+	}
 
-		info->poll_int = POLL_TIME;
+	info->poll_int = POLL_TIME;
 
-		/* setup timer */
-		spu_gov_init_work(info);
+	/* setup timer */
+	spu_gov_init_work(info);
 
-		break;
+	return 0;
+}
 
-	case CPUFREQ_GOV_STOP:
-		/* cancel timer */
-		spu_gov_cancel_work(info);
+static void spu_gov_stop(struct cpufreq_policy *policy)
+{
+	unsigned int cpu = policy->cpu;
+	struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+	int i;
 
-		/* clean spu_gov_info for all affected cpus */
-		for_each_cpu (i, policy->cpus) {
-			info = &per_cpu(spu_gov_info, i);
-			info->policy = NULL;
-		}
+	/* cancel timer */
+	spu_gov_cancel_work(info);
 
-		break;
+	/* clean spu_gov_info for all affected cpus */
+	for_each_cpu (i, policy->cpus) {
+		info = &per_cpu(spu_gov_info, i);
+		info->policy = NULL;
 	}
-
-	return ret;
 }
 
 static struct cpufreq_governor spu_governor = {
 	.name = "spudemand",
-	.governor = spu_gov_govern,
+	.start = spu_gov_start,
+	.stop = spu_gov_stop,
 	.owner = THIS_MODULE,
 };
 
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5a73a9c62c39..56f4c6676b29 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -64,8 +64,6 @@
 
 #define MSR_OFFCORE_RSP_0		0x000001a6
 #define MSR_OFFCORE_RSP_1		0x000001a7
-#define MSR_NHM_TURBO_RATIO_LIMIT	0x000001ad
-#define MSR_IVT_TURBO_RATIO_LIMIT	0x000001ae
 #define MSR_TURBO_RATIO_LIMIT		0x000001ad
 #define MSR_TURBO_RATIO_LIMIT1		0x000001ae
 #define MSR_TURBO_RATIO_LIMIT2		0x000001af
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 0576b6157f3a..c9734dc76257 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -135,6 +135,7 @@ int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
 int common_cpu_die(unsigned int cpu);
 void native_cpu_die(unsigned int cpu);
+void hlt_play_dead(void);
 void native_play_dead(void);
 void play_dead_common(void);
 void wbinvd_on_cpu(int cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d0a51939c150..c93609c97406 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1644,7 +1644,7 @@ static inline void mwait_play_dead(void)
 	}
 }
 
-static inline void hlt_play_dead(void)
+void hlt_play_dead(void)
 {
 	if (__this_cpu_read(cpu_info.x86) >= 4)
 		wbinvd();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index d5f64996394a..b12c26e2e309 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/smp.h>
 #include <linux/perf_event.h>
+#include <linux/tboot.h>
 
 #include <asm/pgtable.h>
 #include <asm/proto.h>
@@ -266,6 +267,35 @@ void notrace restore_processor_state(void)
 EXPORT_SYMBOL(restore_processor_state);
 #endif
 
+#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU)
+static void resume_play_dead(void)
+{
+	play_dead_common();
+	tboot_shutdown(TB_SHUTDOWN_WFS);
+	hlt_play_dead();
+}
+
+int hibernate_resume_nonboot_cpu_disable(void)
+{
+	void (*play_dead)(void) = smp_ops.play_dead;
+	int ret;
+
+	/*
+	 * Ensure that MONITOR/MWAIT will not be used in the "play dead" loop
+	 * during hibernate image restoration, because it is likely that the
+	 * monitored address will be actually written to at that time and then
+	 * the "dead" CPU will attempt to execute instructions again, but the
+	 * address in its instruction pointer may not be possible to resolve
+	 * any more at that point (the page tables used by it previously may
+	 * have been overwritten by hibernate image data).
+	 */
+	smp_ops.play_dead = resume_play_dead;
+	ret = disable_nonboot_cpus();
+	smp_ops.play_dead = play_dead;
+	return ret;
+}
+#endif
+
 /*
  * When bsp_check() is called in hibernate and suspend, cpu hotplug
  * is disabled already. So it's unnessary to handle race condition between
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 3657ac1cb801..8e2e4757adcb 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -121,6 +121,7 @@ int pm_clk_add(struct device *dev, const char *con_id)
 {
 	return __pm_clk_add(dev, con_id, NULL);
 }
+EXPORT_SYMBOL_GPL(pm_clk_add);
 
 /**
  * pm_clk_add_clk - Start using a device clock for power management.
@@ -136,9 +137,42 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk)
 {
 	return __pm_clk_add(dev, NULL, clk);
 }
+EXPORT_SYMBOL_GPL(pm_clk_add_clk);
 
 
 /**
+ * of_pm_clk_add_clk - Start using a device clock for power management.
+ * @dev: Device whose clock is going to be used for power management.
+ * @name: Name of clock that is going to be used for power management.
+ *
+ * Add the clock described in the 'clocks' device-tree node that matches
+ * with the 'name' provided, to the list of clocks used for the power
+ * management of @dev. On success, returns 0. Returns a negative error
+ * code if the clock is not found or cannot be added.
+ */
+int of_pm_clk_add_clk(struct device *dev, const char *name)
+{
+	struct clk *clk;
+	int ret;
+
+	if (!dev || !dev->of_node || !name)
+		return -EINVAL;
+
+	clk = of_clk_get_by_name(dev->of_node, name);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	ret = pm_clk_add_clk(dev, clk);
+	if (ret) {
+		clk_put(clk);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_pm_clk_add_clk);
+
+/**
  * of_pm_clk_add_clks - Start using device clock(s) for power management.
  * @dev: Device whose clock(s) is going to be used for power management.
  *
@@ -192,6 +226,7 @@ error:
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(of_pm_clk_add_clks);
 
 /**
  * __pm_clk_remove - Destroy PM clock entry.
@@ -252,6 +287,7 @@ void pm_clk_remove(struct device *dev, const char *con_id)
 
 	__pm_clk_remove(ce);
 }
+EXPORT_SYMBOL_GPL(pm_clk_remove);
 
 /**
  * pm_clk_remove_clk - Stop using a device clock for power management.
@@ -285,6 +321,7 @@ void pm_clk_remove_clk(struct device *dev, struct clk *clk)
 
 	__pm_clk_remove(ce);
 }
+EXPORT_SYMBOL_GPL(pm_clk_remove_clk);
 
 /**
  * pm_clk_init - Initialize a device's list of power management clocks.
@@ -299,6 +336,7 @@ void pm_clk_init(struct device *dev)
 	if (psd)
 		INIT_LIST_HEAD(&psd->clock_list);
 }
+EXPORT_SYMBOL_GPL(pm_clk_init);
 
 /**
  * pm_clk_create - Create and initialize a device's list of PM clocks.
@@ -311,6 +349,7 @@ int pm_clk_create(struct device *dev)
 {
 	return dev_pm_get_subsys_data(dev);
 }
+EXPORT_SYMBOL_GPL(pm_clk_create);
 
 /**
  * pm_clk_destroy - Destroy a device's list of power management clocks.
@@ -345,6 +384,7 @@ void pm_clk_destroy(struct device *dev)
 		__pm_clk_remove(ce);
 	}
 }
+EXPORT_SYMBOL_GPL(pm_clk_destroy);
 
 /**
  * pm_clk_suspend - Disable clocks in a device's PM clock list.
@@ -375,6 +415,7 @@ int pm_clk_suspend(struct device *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pm_clk_suspend);
 
 /**
  * pm_clk_resume - Enable clocks in a device's PM clock list.
@@ -400,6 +441,7 @@ int pm_clk_resume(struct device *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pm_clk_resume);
 
 /**
  * pm_clk_notify - Notify routine for device addition and removal.
@@ -480,6 +522,7 @@ int pm_clk_runtime_suspend(struct device *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pm_clk_runtime_suspend);
 
 int pm_clk_runtime_resume(struct device *dev)
 {
@@ -495,6 +538,7 @@ int pm_clk_runtime_resume(struct device *dev)
 
 	return pm_generic_runtime_resume(dev);
 }
+EXPORT_SYMBOL_GPL(pm_clk_runtime_resume);
 
 #else /* !CONFIG_PM_CLK */
 
@@ -598,3 +642,4 @@ void pm_clk_add_notifier(struct bus_type *bus,
 	clknb->nb.notifier_call = pm_clk_notify;
 	bus_register_notifier(bus, &clknb->nb);
 }
+EXPORT_SYMBOL_GPL(pm_clk_add_notifier);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index de23b648fce3..a1f2aff33997 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -187,8 +187,7 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth)
 	struct gpd_link *link;
 	int ret = 0;
 
-	if (genpd->status == GPD_STATE_ACTIVE
-	    || (genpd->prepared_count > 0 && genpd->suspend_power_off))
+	if (genpd->status == GPD_STATE_ACTIVE)
 		return 0;
 
 	/*
@@ -735,82 +734,24 @@ static int pm_genpd_prepare(struct device *dev)
 
 	mutex_lock(&genpd->lock);
 
-	if (genpd->prepared_count++ == 0) {
+	if (genpd->prepared_count++ == 0)
 		genpd->suspended_count = 0;
-		genpd->suspend_power_off = genpd->status == GPD_STATE_POWER_OFF;
-	}
 
 	mutex_unlock(&genpd->lock);
 
-	if (genpd->suspend_power_off)
-		return 0;
-
-	/*
-	 * The PM domain must be in the GPD_STATE_ACTIVE state at this point,
-	 * so genpd_poweron() will return immediately, but if the device
-	 * is suspended (e.g. it's been stopped by genpd_stop_dev()), we need
-	 * to make it operational.
-	 */
-	pm_runtime_resume(dev);
-	__pm_runtime_disable(dev, false);
-
 	ret = pm_generic_prepare(dev);
 	if (ret) {
 		mutex_lock(&genpd->lock);
 
-		if (--genpd->prepared_count == 0)
-			genpd->suspend_power_off = false;
+		genpd->prepared_count--;
 
 		mutex_unlock(&genpd->lock);
-		pm_runtime_enable(dev);
 	}
 
 	return ret;
 }
 
 /**
- * pm_genpd_suspend - Suspend a device belonging to an I/O PM domain.
- * @dev: Device to suspend.
- *
- * Suspend a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a PM domain consisting of I/O devices.
- */
-static int pm_genpd_suspend(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev);
-}
-
-/**
- * pm_genpd_suspend_late - Late suspend of a device from an I/O PM domain.
- * @dev: Device to suspend.
- *
- * Carry out a late suspend of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a PM domain consisting of I/O devices.
- */
-static int pm_genpd_suspend_late(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_suspend_late(dev);
-}
-
-/**
  * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain.
  * @dev: Device to suspend.
  *
@@ -820,6 +761,7 @@ static int pm_genpd_suspend_late(struct device *dev)
 static int pm_genpd_suspend_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -827,11 +769,14 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off
-	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
+	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
 		return 0;
 
-	genpd_stop_dev(genpd, dev);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start) {
+		ret = pm_runtime_force_suspend(dev);
+		if (ret)
+			return ret;
+	}
 
 	/*
 	 * Since all of the "noirq" callbacks are executed sequentially, it is
@@ -853,6 +798,7 @@ static int pm_genpd_suspend_noirq(struct device *dev)
 static int pm_genpd_resume_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -860,8 +806,7 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	if (genpd->suspend_power_off
-	    || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)))
+	if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))
 		return 0;
 
 	/*
@@ -872,93 +817,10 @@ static int pm_genpd_resume_noirq(struct device *dev)
 	pm_genpd_sync_poweron(genpd, true);
 	genpd->suspended_count--;
 
-	return genpd_start_dev(genpd, dev);
-}
-
-/**
- * pm_genpd_resume_early - Early resume of a device in an I/O PM domain.
- * @dev: Device to resume.
- *
- * Carry out an early resume of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
- */
-static int pm_genpd_resume_early(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_resume_early(dev);
-}
-
-/**
- * pm_genpd_resume - Resume of device in an I/O PM domain.
- * @dev: Device to resume.
- *
- * Resume a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static int pm_genpd_resume(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_resume(dev);
-}
-
-/**
- * pm_genpd_freeze - Freezing a device in an I/O PM domain.
- * @dev: Device to freeze.
- *
- * Freeze a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static int pm_genpd_freeze(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev);
-}
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_resume(dev);
 
-/**
- * pm_genpd_freeze_late - Late freeze of a device in an I/O PM domain.
- * @dev: Device to freeze.
- *
- * Carry out a late freeze of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
- */
-static int pm_genpd_freeze_late(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_freeze_late(dev);
+	return ret;
 }
 
 /**
@@ -973,6 +835,7 @@ static int pm_genpd_freeze_late(struct device *dev)
 static int pm_genpd_freeze_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -980,7 +843,10 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_stop_dev(genpd, dev);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_suspend(dev);
+
+	return ret;
 }
 
 /**
@@ -993,6 +859,7 @@ static int pm_genpd_freeze_noirq(struct device *dev)
 static int pm_genpd_thaw_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1000,51 +867,10 @@ static int pm_genpd_thaw_noirq(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ?
-		0 : genpd_start_dev(genpd, dev);
-}
-
-/**
- * pm_genpd_thaw_early - Early thaw of device in an I/O PM domain.
- * @dev: Device to thaw.
- *
- * Carry out an early thaw of a device under the assumption that its
- * pm_domain field points to the domain member of an object of type
- * struct generic_pm_domain representing a power domain consisting of I/O
- * devices.
- */
-static int pm_genpd_thaw_early(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_thaw_early(dev);
-}
-
-/**
- * pm_genpd_thaw - Thaw a device belonging to an I/O power domain.
- * @dev: Device to thaw.
- *
- * Thaw a device under the assumption that its pm_domain field points to the
- * domain member of an object of type struct generic_pm_domain representing
- * a power domain consisting of I/O devices.
- */
-static int pm_genpd_thaw(struct device *dev)
-{
-	struct generic_pm_domain *genpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_resume(dev);
 
-	genpd = dev_to_genpd(dev);
-	if (IS_ERR(genpd))
-		return -EINVAL;
-
-	return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev);
+	return ret;
 }
 
 /**
@@ -1057,6 +883,7 @@ static int pm_genpd_thaw(struct device *dev)
 static int pm_genpd_restore_noirq(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	int ret = 0;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1072,30 +899,20 @@ static int pm_genpd_restore_noirq(struct device *dev)
 	 * At this point suspended_count == 0 means we are being run for the
 	 * first time for the given domain in the present cycle.
 	 */
-	if (genpd->suspended_count++ == 0) {
+	if (genpd->suspended_count++ == 0)
 		/*
 		 * The boot kernel might put the domain into arbitrary state,
 		 * so make it appear as powered off to pm_genpd_sync_poweron(),
 		 * so that it tries to power it on in case it was really off.
 		 */
 		genpd->status = GPD_STATE_POWER_OFF;
-		if (genpd->suspend_power_off) {
-			/*
-			 * If the domain was off before the hibernation, make
-			 * sure it will be off going forward.
-			 */
-			genpd_power_off(genpd, true);
-
-			return 0;
-		}
-	}
-
-	if (genpd->suspend_power_off)
-		return 0;
 
 	pm_genpd_sync_poweron(genpd, true);
 
-	return genpd_start_dev(genpd, dev);
+	if (genpd->dev_ops.stop && genpd->dev_ops.start)
+		ret = pm_runtime_force_resume(dev);
+
+	return ret;
 }
 
 /**
@@ -1110,7 +927,6 @@ static int pm_genpd_restore_noirq(struct device *dev)
 static void pm_genpd_complete(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	bool run_complete;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
@@ -1118,20 +934,15 @@ static void pm_genpd_complete(struct device *dev)
 	if (IS_ERR(genpd))
 		return;
 
+	pm_generic_complete(dev);
+
 	mutex_lock(&genpd->lock);
 
-	run_complete = !genpd->suspend_power_off;
-	if (--genpd->prepared_count == 0)
-		genpd->suspend_power_off = false;
+	genpd->prepared_count--;
+	if (!genpd->prepared_count)
+		genpd_queue_power_off_work(genpd);
 
 	mutex_unlock(&genpd->lock);
-
-	if (run_complete) {
-		pm_generic_complete(dev);
-		pm_runtime_set_active(dev);
-		pm_runtime_enable(dev);
-		pm_request_idle(dev);
-	}
 }
 
 /**
@@ -1173,18 +984,10 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 #else /* !CONFIG_PM_SLEEP */
 
 #define pm_genpd_prepare		NULL
-#define pm_genpd_suspend		NULL
-#define pm_genpd_suspend_late		NULL
 #define pm_genpd_suspend_noirq		NULL
-#define pm_genpd_resume_early		NULL
 #define pm_genpd_resume_noirq		NULL
-#define pm_genpd_resume			NULL
-#define pm_genpd_freeze			NULL
-#define pm_genpd_freeze_late		NULL
 #define pm_genpd_freeze_noirq		NULL
-#define pm_genpd_thaw_early		NULL
 #define pm_genpd_thaw_noirq		NULL
-#define pm_genpd_thaw			NULL
 #define pm_genpd_restore_noirq		NULL
 #define pm_genpd_complete		NULL
 
@@ -1455,12 +1258,14 @@ EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain);
  * @genpd: PM domain object to initialize.
  * @gov: PM domain governor to associate with the domain (may be NULL).
  * @is_off: Initial value of the domain's power_is_off field.
+ *
+ * Returns 0 on successful initialization, else a negative error code.
  */
-void pm_genpd_init(struct generic_pm_domain *genpd,
-		   struct dev_power_governor *gov, bool is_off)
+int pm_genpd_init(struct generic_pm_domain *genpd,
+		  struct dev_power_governor *gov, bool is_off)
 {
 	if (IS_ERR_OR_NULL(genpd))
-		return;
+		return -EINVAL;
 
 	INIT_LIST_HEAD(&genpd->master_links);
 	INIT_LIST_HEAD(&genpd->slave_links);
@@ -1476,24 +1281,24 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
 	genpd->domain.ops.runtime_resume = genpd_runtime_resume;
 	genpd->domain.ops.prepare = pm_genpd_prepare;
-	genpd->domain.ops.suspend = pm_genpd_suspend;
-	genpd->domain.ops.suspend_late = pm_genpd_suspend_late;
+	genpd->domain.ops.suspend = pm_generic_suspend;
+	genpd->domain.ops.suspend_late = pm_generic_suspend_late;
 	genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq;
-	genpd->domain.ops.resume_early = pm_genpd_resume_early;
-	genpd->domain.ops.resume = pm_genpd_resume;
-	genpd->domain.ops.freeze = pm_genpd_freeze;
-	genpd->domain.ops.freeze_late = pm_genpd_freeze_late;
+	genpd->domain.ops.resume_early = pm_generic_resume_early;
+	genpd->domain.ops.resume = pm_generic_resume;
+	genpd->domain.ops.freeze = pm_generic_freeze;
+	genpd->domain.ops.freeze_late = pm_generic_freeze_late;
 	genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
 	genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
-	genpd->domain.ops.thaw_early = pm_genpd_thaw_early;
-	genpd->domain.ops.thaw = pm_genpd_thaw;
-	genpd->domain.ops.poweroff = pm_genpd_suspend;
-	genpd->domain.ops.poweroff_late = pm_genpd_suspend_late;
+	genpd->domain.ops.thaw_early = pm_generic_thaw_early;
+	genpd->domain.ops.thaw = pm_generic_thaw;
+	genpd->domain.ops.poweroff = pm_generic_poweroff;
+	genpd->domain.ops.poweroff_late = pm_generic_poweroff_late;
 	genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
-	genpd->domain.ops.restore_early = pm_genpd_resume_early;
-	genpd->domain.ops.restore = pm_genpd_resume;
+	genpd->domain.ops.restore_early = pm_generic_restore_early;
+	genpd->domain.ops.restore = pm_generic_restore;
 	genpd->domain.ops.complete = pm_genpd_complete;
 
 	if (genpd->flags & GENPD_FLAG_PM_CLK) {
@@ -1518,6 +1323,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(pm_genpd_init);
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b74690418504..e097d355cc04 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1045,10 +1045,14 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status)
 		 */
 		if (!parent->power.disable_depth
 		    && !parent->power.ignore_children
-		    && parent->power.runtime_status != RPM_ACTIVE)
+		    && parent->power.runtime_status != RPM_ACTIVE) {
+			dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n",
+				dev_name(dev),
+				dev_name(parent));
 			error = -EBUSY;
-		else if (dev->power.runtime_status == RPM_SUSPENDED)
+		} else if (dev->power.runtime_status == RPM_SUSPENDED) {
 			atomic_inc(&parent->power.child_count);
+		}
 
 		spin_unlock(&parent->power.lock);
 
@@ -1256,7 +1260,7 @@ void pm_runtime_allow(struct device *dev)
 
 	dev->power.runtime_auto = true;
 	if (atomic_dec_and_test(&dev->power.usage_count))
-		rpm_idle(dev, RPM_AUTO);
+		rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1506,6 +1510,9 @@ int pm_runtime_force_resume(struct device *dev)
 		goto out;
 	}
 
+	if (!pm_runtime_status_suspended(dev))
+		goto out;
+
 	ret = pm_runtime_set_active(dev);
 	if (ret)
 		goto out;
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index b7445b6ae5a4..c822d72629d5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -31,23 +31,18 @@ config CPU_FREQ_BOOST_SW
 	depends on THERMAL
 
 config CPU_FREQ_STAT
-	tristate "CPU frequency translation statistics"
+	bool "CPU frequency transition statistics"
 	default y
 	help
-	  This driver exports CPU frequency statistics information through sysfs
-	  file system.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called cpufreq_stats.
+	  Export CPU frequency statistics information through sysfs.
 
 	  If in doubt, say N.
 
 config CPU_FREQ_STAT_DETAILS
-	bool "CPU frequency translation statistics details"
+	bool "CPU frequency transition statistics details"
 	depends on CPU_FREQ_STAT
 	help
-	  This will show detail CPU frequency translation table in sysfs file
-	  system.
+	  Show detailed CPU frequency transition table in sysfs.
 
 	  If in doubt, say N.
 
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 32a15052f363..297e9128fe9f 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -468,20 +468,17 @@ unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
 	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct acpi_processor_performance *perf;
 	struct cpufreq_frequency_table *entry;
-	unsigned int next_perf_state, next_freq, freq;
+	unsigned int next_perf_state, next_freq, index;
 
 	/*
 	 * Find the closest frequency above target_freq.
-	 *
-	 * The table is sorted in the reverse order with respect to the
-	 * frequency and all of the entries are valid (see the initialization).
 	 */
-	entry = policy->freq_table;
-	do {
-		entry++;
-		freq = entry->frequency;
-	} while (freq >= target_freq && freq != CPUFREQ_TABLE_END);
-	entry--;
+	if (policy->cached_target_freq == target_freq)
+		index = policy->cached_resolved_idx;
+	else
+		index = cpufreq_table_find_index_dl(policy, target_freq);
+
+	entry = &policy->freq_table[index];
 	next_freq = entry->frequency;
 	next_perf_state = entry->driver_data;
 
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index 404360cad25c..042023bbbf62 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -48,9 +48,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 	struct dbs_data *od_data = policy_dbs->dbs_data;
 	struct od_dbs_tuners *od_tuners = od_data->tuners;
-	struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs);
 
-	if (!od_info->freq_table)
+	if (!policy->freq_table)
 		return freq_next;
 
 	rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_ACTUAL,
@@ -92,10 +91,9 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
 		else {
 			unsigned int index;
 
-			cpufreq_frequency_table_target(policy,
-				od_info->freq_table, policy->cur - 1,
-				CPUFREQ_RELATION_H, &index);
-			freq_next = od_info->freq_table[index].frequency;
+			index = cpufreq_table_find_index_h(policy,
+							   policy->cur - 1);
+			freq_next = policy->freq_table[index].frequency;
 		}
 
 		data->freq_prev = freq_next;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 5617c7087d77..3dd4884c6f9e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -74,19 +74,12 @@ static inline bool has_target(void)
 }
 
 /* internal prototypes */
-static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
 static unsigned int __cpufreq_get(struct cpufreq_policy *policy);
+static int cpufreq_init_governor(struct cpufreq_policy *policy);
+static void cpufreq_exit_governor(struct cpufreq_policy *policy);
 static int cpufreq_start_governor(struct cpufreq_policy *policy);
-
-static inline void cpufreq_exit_governor(struct cpufreq_policy *policy)
-{
-	(void)cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT);
-}
-
-static inline void cpufreq_stop_governor(struct cpufreq_policy *policy)
-{
-	(void)cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-}
+static void cpufreq_stop_governor(struct cpufreq_policy *policy);
+static void cpufreq_governor_limits(struct cpufreq_policy *policy);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -133,15 +126,6 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(get_governor_parent_kobj);
 
-struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu)
-{
-	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	return policy && !policy_is_inactive(policy) ?
-		policy->freq_table : NULL;
-}
-EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table);
-
 static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall)
 {
 	u64 idle_time;
@@ -354,6 +338,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
 		pr_debug("FREQ: %lu - CPU: %lu\n",
 			 (unsigned long)freqs->new, (unsigned long)freqs->cpu);
 		trace_cpu_frequency(freqs->new, freqs->cpu);
+		cpufreq_stats_record_transition(policy, freqs->new);
 		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
 				CPUFREQ_POSTCHANGE, freqs);
 		if (likely(policy) && likely(policy->cpu == freqs->cpu))
@@ -507,6 +492,38 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch);
 
+/**
+ * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported
+ * one.
+ * @target_freq: target frequency to resolve.
+ *
+ * The target to driver frequency mapping is cached in the policy.
+ *
+ * Return: Lowest driver-supported frequency greater than or equal to the
+ * given target_freq, subject to policy (min/max) and driver limitations.
+ */
+unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
+					 unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+	policy->cached_target_freq = target_freq;
+
+	if (cpufreq_driver->target_index) {
+		int idx;
+
+		idx = cpufreq_frequency_table_target(policy, target_freq,
+						     CPUFREQ_RELATION_L);
+		policy->cached_resolved_idx = idx;
+		return policy->freq_table[idx].frequency;
+	}
+
+	if (cpufreq_driver->resolve_freq)
+		return cpufreq_driver->resolve_freq(policy, target_freq);
+
+	return target_freq;
+}
+EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
+
 /*********************************************************************
  *                          SYSFS INTERFACE                          *
  *********************************************************************/
@@ -1115,6 +1132,7 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify)
 					     CPUFREQ_REMOVE_POLICY, policy);
 
 	down_write(&policy->rwsem);
+	cpufreq_stats_free_table(policy);
 	cpufreq_remove_dev_symlink(policy);
 	kobj = &policy->kobj;
 	cmp = &policy->kobj_unregister;
@@ -1265,13 +1283,12 @@ static int cpufreq_online(unsigned int cpu)
 		}
 	}
 
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-				     CPUFREQ_START, policy);
-
 	if (new_policy) {
 		ret = cpufreq_add_dev_interface(policy);
 		if (ret)
 			goto out_exit_policy;
+
+		cpufreq_stats_create_table(policy);
 		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
 				CPUFREQ_CREATE_POLICY, policy);
 
@@ -1280,6 +1297,9 @@ static int cpufreq_online(unsigned int cpu)
 		write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 	}
 
+	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+				     CPUFREQ_START, policy);
+
 	ret = cpufreq_init_policy(policy);
 	if (ret) {
 		pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
@@ -1556,9 +1576,6 @@ static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy)
 {
 	unsigned int new_freq;
 
-	if (cpufreq_suspended)
-		return 0;
-
 	new_freq = cpufreq_driver->get(policy->cpu);
 	if (!new_freq)
 		return 0;
@@ -1864,14 +1881,17 @@ static int __target_intermediate(struct cpufreq_policy *policy,
 	return ret;
 }
 
-static int __target_index(struct cpufreq_policy *policy,
-			  struct cpufreq_frequency_table *freq_table, int index)
+static int __target_index(struct cpufreq_policy *policy, int index)
 {
 	struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
 	unsigned int intermediate_freq = 0;
+	unsigned int newfreq = policy->freq_table[index].frequency;
 	int retval = -EINVAL;
 	bool notify;
 
+	if (newfreq == policy->cur)
+		return 0;
+
 	notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
 	if (notify) {
 		/* Handle switching to intermediate frequency */
@@ -1886,7 +1906,7 @@ static int __target_index(struct cpufreq_policy *policy,
 				freqs.old = freqs.new;
 		}
 
-		freqs.new = freq_table[index].frequency;
+		freqs.new = newfreq;
 		pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
 			 __func__, policy->cpu, freqs.old, freqs.new);
 
@@ -1923,17 +1943,13 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 			    unsigned int relation)
 {
 	unsigned int old_target_freq = target_freq;
-	struct cpufreq_frequency_table *freq_table;
-	int index, retval;
+	int index;
 
 	if (cpufreq_disabled())
 		return -ENODEV;
 
 	/* Make sure that target_freq is within supported range */
-	if (target_freq > policy->max)
-		target_freq = policy->max;
-	if (target_freq < policy->min)
-		target_freq = policy->min;
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
 
 	pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
 		 policy->cpu, target_freq, relation, old_target_freq);
@@ -1956,23 +1972,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	if (!cpufreq_driver->target_index)
 		return -EINVAL;
 
-	freq_table = cpufreq_frequency_get_table(policy->cpu);
-	if (unlikely(!freq_table)) {
-		pr_err("%s: Unable to find freq_table\n", __func__);
-		return -EINVAL;
-	}
-
-	retval = cpufreq_frequency_table_target(policy, freq_table, target_freq,
-						relation, &index);
-	if (unlikely(retval)) {
-		pr_err("%s: Unable to find matching freq\n", __func__);
-		return retval;
-	}
-
-	if (freq_table[index].frequency == policy->cur)
-		return 0;
+	index = cpufreq_frequency_table_target(policy, target_freq, relation);
 
-	return __target_index(policy, freq_table, index);
+	return __target_index(policy, index);
 }
 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
 
@@ -1997,7 +1999,7 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void)
 	return NULL;
 }
 
-static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
+static int cpufreq_init_governor(struct cpufreq_policy *policy)
 {
 	int ret;
 
@@ -2025,36 +2027,82 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
 		}
 	}
 
-	if (event == CPUFREQ_GOV_POLICY_INIT)
-		if (!try_module_get(policy->governor->owner))
-			return -EINVAL;
-
-	pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event);
+	if (!try_module_get(policy->governor->owner))
+		return -EINVAL;
 
-	ret = policy->governor->governor(policy, event);
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
 
-	if (event == CPUFREQ_GOV_POLICY_INIT) {
-		if (ret)
+	if (policy->governor->init) {
+		ret = policy->governor->init(policy);
+		if (ret) {
 			module_put(policy->governor->owner);
-		else
-			policy->governor->initialized++;
-	} else if (event == CPUFREQ_GOV_POLICY_EXIT) {
-		policy->governor->initialized--;
-		module_put(policy->governor->owner);
+			return ret;
+		}
 	}
 
-	return ret;
+	return 0;
+}
+
+static void cpufreq_exit_governor(struct cpufreq_policy *policy)
+{
+	if (cpufreq_suspended || !policy->governor)
+		return;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
+	if (policy->governor->exit)
+		policy->governor->exit(policy);
+
+	module_put(policy->governor->owner);
 }
 
 static int cpufreq_start_governor(struct cpufreq_policy *policy)
 {
 	int ret;
 
+	if (cpufreq_suspended)
+		return 0;
+
+	if (!policy->governor)
+		return -EINVAL;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy)
 		cpufreq_update_current_freq(policy);
 
-	ret = cpufreq_governor(policy, CPUFREQ_GOV_START);
-	return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+	if (policy->governor->start) {
+		ret = policy->governor->start(policy);
+		if (ret)
+			return ret;
+	}
+
+	if (policy->governor->limits)
+		policy->governor->limits(policy);
+
+	return 0;
+}
+
+static void cpufreq_stop_governor(struct cpufreq_policy *policy)
+{
+	if (cpufreq_suspended || !policy->governor)
+		return;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
+	if (policy->governor->stop)
+		policy->governor->stop(policy);
+}
+
+static void cpufreq_governor_limits(struct cpufreq_policy *policy)
+{
+	if (cpufreq_suspended || !policy->governor)
+		return;
+
+	pr_debug("%s: for CPU %u\n", __func__, policy->cpu);
+
+	if (policy->governor->limits)
+		policy->governor->limits(policy);
 }
 
 int cpufreq_register_governor(struct cpufreq_governor *governor)
@@ -2069,7 +2117,6 @@ int cpufreq_register_governor(struct cpufreq_governor *governor)
 
 	mutex_lock(&cpufreq_governor_mutex);
 
-	governor->initialized = 0;
 	err = -EBUSY;
 	if (!find_governor(governor->name)) {
 		err = 0;
@@ -2184,6 +2231,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	policy->min = new_policy->min;
 	policy->max = new_policy->max;
 
+	policy->cached_target_freq = UINT_MAX;
+
 	pr_debug("new min and max freqs are %u - %u kHz\n",
 		 policy->min, policy->max);
 
@@ -2195,7 +2244,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 
 	if (new_policy->governor == policy->governor) {
 		pr_debug("cpufreq: governor limits update\n");
-		return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
+		cpufreq_governor_limits(policy);
+		return 0;
 	}
 
 	pr_debug("governor switch\n");
@@ -2210,7 +2260,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 
 	/* start new governor */
 	policy->governor = new_policy->governor;
-	ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT);
+	ret = cpufreq_init_governor(policy);
 	if (!ret) {
 		ret = cpufreq_start_governor(policy);
 		if (!ret) {
@@ -2224,7 +2274,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
 	pr_debug("starting governor %s failed\n", policy->governor->name);
 	if (old_gov) {
 		policy->governor = old_gov;
-		if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT))
+		if (cpufreq_init_governor(policy))
 			policy->governor = NULL;
 		else
 			cpufreq_start_governor(policy);
@@ -2309,26 +2359,25 @@ static struct notifier_block __refdata cpufreq_cpu_notifier = {
  *********************************************************************/
 static int cpufreq_boost_set_sw(int state)
 {
-	struct cpufreq_frequency_table *freq_table;
 	struct cpufreq_policy *policy;
 	int ret = -EINVAL;
 
 	for_each_active_policy(policy) {
-		freq_table = cpufreq_frequency_get_table(policy->cpu);
-		if (freq_table) {
-			ret = cpufreq_frequency_table_cpuinfo(policy,
-							freq_table);
-			if (ret) {
-				pr_err("%s: Policy frequency update failed\n",
-				       __func__);
-				break;
-			}
-
-			down_write(&policy->rwsem);
-			policy->user_policy.max = policy->max;
-			cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
-			up_write(&policy->rwsem);
+		if (!policy->freq_table)
+			continue;
+
+		ret = cpufreq_frequency_table_cpuinfo(policy,
+						      policy->freq_table);
+		if (ret) {
+			pr_err("%s: Policy frequency update failed\n",
+			       __func__);
+			break;
 		}
+
+		down_write(&policy->rwsem);
+		policy->user_policy.max = policy->max;
+		cpufreq_governor_limits(policy);
+		up_write(&policy->rwsem);
 	}
 
 	return ret;
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 316df247e00d..18da4f8051d3 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -17,7 +17,6 @@
 struct cs_policy_dbs_info {
 	struct policy_dbs_info policy_dbs;
 	unsigned int down_skip;
-	unsigned int requested_freq;
 };
 
 static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
@@ -75,19 +74,17 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
 
 	/* Check for frequency increase */
 	if (load > dbs_data->up_threshold) {
+		unsigned int requested_freq = policy->cur;
+
 		dbs_info->down_skip = 0;
 
 		/* if we are already at full speed then break out early */
-		if (dbs_info->requested_freq == policy->max)
+		if (requested_freq == policy->max)
 			goto out;
 
-		dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
-
-		if (dbs_info->requested_freq > policy->max)
-			dbs_info->requested_freq = policy->max;
+		requested_freq += get_freq_target(cs_tuners, policy);
 
-		__cpufreq_driver_target(policy, dbs_info->requested_freq,
-			CPUFREQ_RELATION_H);
+		__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H);
 		goto out;
 	}
 
@@ -98,36 +95,27 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
 
 	/* Check for frequency decrease */
 	if (load < cs_tuners->down_threshold) {
-		unsigned int freq_target;
+		unsigned int freq_target, requested_freq = policy->cur;
 		/*
 		 * if we cannot reduce the frequency anymore, break out early
 		 */
-		if (policy->cur == policy->min)
+		if (requested_freq == policy->min)
 			goto out;
 
 		freq_target = get_freq_target(cs_tuners, policy);
-		if (dbs_info->requested_freq > freq_target)
-			dbs_info->requested_freq -= freq_target;
+		if (requested_freq > freq_target)
+			requested_freq -= freq_target;
 		else
-			dbs_info->requested_freq = policy->min;
+			requested_freq = policy->min;
 
-		__cpufreq_driver_target(policy, dbs_info->requested_freq,
-				CPUFREQ_RELATION_L);
+		__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L);
 	}
 
  out:
 	return dbs_data->sampling_rate;
 }
 
-static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-				void *data);
-
-static struct notifier_block cs_cpufreq_notifier_block = {
-	.notifier_call = dbs_cpufreq_notifier,
-};
-
 /************************** sysfs interface ************************/
-static struct dbs_governor cs_dbs_gov;
 
 static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set,
 					  const char *buf, size_t count)
@@ -268,15 +256,13 @@ static void cs_free(struct policy_dbs_info *policy_dbs)
 	kfree(to_dbs_info(policy_dbs));
 }
 
-static int cs_init(struct dbs_data *dbs_data, bool notify)
+static int cs_init(struct dbs_data *dbs_data)
 {
 	struct cs_dbs_tuners *tuners;
 
 	tuners = kzalloc(sizeof(*tuners), GFP_KERNEL);
-	if (!tuners) {
-		pr_err("%s: kzalloc failed\n", __func__);
+	if (!tuners)
 		return -ENOMEM;
-	}
 
 	tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD;
 	tuners->freq_step = DEF_FREQUENCY_STEP;
@@ -288,19 +274,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify)
 	dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO *
 		jiffies_to_usecs(10);
 
-	if (notify)
-		cpufreq_register_notifier(&cs_cpufreq_notifier_block,
-					  CPUFREQ_TRANSITION_NOTIFIER);
-
 	return 0;
 }
 
-static void cs_exit(struct dbs_data *dbs_data, bool notify)
+static void cs_exit(struct dbs_data *dbs_data)
 {
-	if (notify)
-		cpufreq_unregister_notifier(&cs_cpufreq_notifier_block,
-					    CPUFREQ_TRANSITION_NOTIFIER);
-
 	kfree(dbs_data->tuners);
 }
 
@@ -309,16 +287,10 @@ static void cs_start(struct cpufreq_policy *policy)
 	struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
 
 	dbs_info->down_skip = 0;
-	dbs_info->requested_freq = policy->cur;
 }
 
-static struct dbs_governor cs_dbs_gov = {
-	.gov = {
-		.name = "conservative",
-		.governor = cpufreq_governor_dbs,
-		.max_transition_latency = TRANSITION_LATENCY_LIMIT,
-		.owner = THIS_MODULE,
-	},
+static struct dbs_governor cs_governor = {
+	.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"),
 	.kobj_type = { .default_attrs = cs_attributes },
 	.gov_dbs_timer = cs_dbs_timer,
 	.alloc = cs_alloc,
@@ -328,33 +300,7 @@ static struct dbs_governor cs_dbs_gov = {
 	.start = cs_start,
 };
 
-#define CPU_FREQ_GOV_CONSERVATIVE	(&cs_dbs_gov.gov)
-
-static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-				void *data)
-{
-	struct cpufreq_freqs *freq = data;
-	struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu);
-	struct cs_policy_dbs_info *dbs_info;
-
-	if (!policy)
-		return 0;
-
-	/* policy isn't governed by conservative governor */
-	if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE)
-		return 0;
-
-	dbs_info = to_dbs_info(policy->governor_data);
-	/*
-	 * we only care if our internally tracked freq moves outside the 'valid'
-	 * ranges of frequency available to us otherwise we do not change it
-	*/
-	if (dbs_info->requested_freq > policy->max
-			|| dbs_info->requested_freq < policy->min)
-		dbs_info->requested_freq = freq->new;
-
-	return 0;
-}
+#define CPU_FREQ_GOV_CONSERVATIVE	(&cs_governor.gov)
 
 static int __init cpufreq_gov_dbs_init(void)
 {
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index be498d56dd69..e415349ab31b 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -336,17 +336,6 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy)
 	synchronize_sched();
 }
 
-static void gov_cancel_work(struct cpufreq_policy *policy)
-{
-	struct policy_dbs_info *policy_dbs = policy->governor_data;
-
-	gov_clear_update_util(policy_dbs->policy);
-	irq_work_sync(&policy_dbs->irq_work);
-	cancel_work_sync(&policy_dbs->work);
-	atomic_set(&policy_dbs->work_count, 0);
-	policy_dbs->work_in_progress = false;
-}
-
 static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
 						     struct dbs_governor *gov)
 {
@@ -389,7 +378,7 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
 	gov->free(policy_dbs);
 }
 
-static int cpufreq_governor_init(struct cpufreq_policy *policy)
+int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct dbs_data *dbs_data;
@@ -429,7 +418,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
 
 	gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
 
-	ret = gov->init(dbs_data, !policy->governor->initialized);
+	ret = gov->init(dbs_data);
 	if (ret)
 		goto free_policy_dbs_info;
 
@@ -458,13 +447,13 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
 		goto out;
 
 	/* Failure, so roll back. */
-	pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret);
+	pr_err("initialization failed (dbs_data kobject init error %d)\n", ret);
 
 	policy->governor_data = NULL;
 
 	if (!have_governor_per_policy())
 		gov->gdbs_data = NULL;
-	gov->exit(dbs_data, !policy->governor->initialized);
+	gov->exit(dbs_data);
 	kfree(dbs_data);
 
 free_policy_dbs_info:
@@ -474,8 +463,9 @@ out:
 	mutex_unlock(&gov_dbs_data_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_init);
 
-static int cpufreq_governor_exit(struct cpufreq_policy *policy)
+void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
@@ -493,17 +483,17 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy)
 		if (!have_governor_per_policy())
 			gov->gdbs_data = NULL;
 
-		gov->exit(dbs_data, policy->governor->initialized == 1);
+		gov->exit(dbs_data);
 		kfree(dbs_data);
 	}
 
 	free_policy_dbs_info(policy_dbs, gov);
 
 	mutex_unlock(&gov_dbs_data_mutex);
-	return 0;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_exit);
 
-static int cpufreq_governor_start(struct cpufreq_policy *policy)
+int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
 {
 	struct dbs_governor *gov = dbs_governor_of(policy);
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
@@ -539,47 +529,28 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy)
 	gov_set_update_util(policy_dbs, sampling_rate);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_start);
 
-static int cpufreq_governor_stop(struct cpufreq_policy *policy)
+void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy)
 {
-	gov_cancel_work(policy);
-	return 0;
+	struct policy_dbs_info *policy_dbs = policy->governor_data;
+
+	gov_clear_update_util(policy_dbs->policy);
+	irq_work_sync(&policy_dbs->irq_work);
+	cancel_work_sync(&policy_dbs->work);
+	atomic_set(&policy_dbs->work_count, 0);
+	policy_dbs->work_in_progress = false;
 }
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop);
 
-static int cpufreq_governor_limits(struct cpufreq_policy *policy)
+void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy)
 {
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 
 	mutex_lock(&policy_dbs->timer_mutex);
-
-	if (policy->max < policy->cur)
-		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
-	else if (policy->min > policy->cur)
-		__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
-
+	cpufreq_policy_apply_limits(policy);
 	gov_update_sample_delay(policy_dbs, 0);
 
 	mutex_unlock(&policy_dbs->timer_mutex);
-
-	return 0;
-}
-
-int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
-{
-	if (event == CPUFREQ_GOV_POLICY_INIT) {
-		return cpufreq_governor_init(policy);
-	} else if (policy->governor_data) {
-		switch (event) {
-		case CPUFREQ_GOV_POLICY_EXIT:
-			return cpufreq_governor_exit(policy);
-		case CPUFREQ_GOV_START:
-			return cpufreq_governor_start(policy);
-		case CPUFREQ_GOV_STOP:
-			return cpufreq_governor_stop(policy);
-		case CPUFREQ_GOV_LIMITS:
-			return cpufreq_governor_limits(policy);
-		}
-	}
-	return -EINVAL;
 }
-EXPORT_SYMBOL_GPL(cpufreq_governor_dbs);
+EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 34eb214b6d57..ef1037e9c92b 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -138,8 +138,8 @@ struct dbs_governor {
 	unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy);
 	struct policy_dbs_info *(*alloc)(void);
 	void (*free)(struct policy_dbs_info *policy_dbs);
-	int (*init)(struct dbs_data *dbs_data, bool notify);
-	void (*exit)(struct dbs_data *dbs_data, bool notify);
+	int (*init)(struct dbs_data *dbs_data);
+	void (*exit)(struct dbs_data *dbs_data);
 	void (*start)(struct cpufreq_policy *policy);
 };
 
@@ -148,6 +148,25 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy
 	return container_of(policy->governor, struct dbs_governor, gov);
 }
 
+/* Governor callback routines */
+int cpufreq_dbs_governor_init(struct cpufreq_policy *policy);
+void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy);
+int cpufreq_dbs_governor_start(struct cpufreq_policy *policy);
+void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy);
+void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy);
+
+#define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_)			\
+	{								\
+		.name = _name_,						\
+		.max_transition_latency	= TRANSITION_LATENCY_LIMIT,	\
+		.owner = THIS_MODULE,					\
+		.init = cpufreq_dbs_governor_init,			\
+		.exit = cpufreq_dbs_governor_exit,			\
+		.start = cpufreq_dbs_governor_start,			\
+		.stop = cpufreq_dbs_governor_stop,			\
+		.limits = cpufreq_dbs_governor_limits,			\
+	}
+
 /* Governor specific operations */
 struct od_ops {
 	unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy,
@@ -155,7 +174,6 @@ struct od_ops {
 };
 
 unsigned int dbs_update(struct cpufreq_policy *policy);
-int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
 void od_register_powersave_bias_handler(unsigned int (*f)
 		(struct cpufreq_policy *, unsigned int, unsigned int),
 		unsigned int powersave_bias);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 300163430516..3a1f49f5f4c6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -65,34 +65,30 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
 {
 	unsigned int freq_req, freq_reduc, freq_avg;
 	unsigned int freq_hi, freq_lo;
-	unsigned int index = 0;
+	unsigned int index;
 	unsigned int delay_hi_us;
 	struct policy_dbs_info *policy_dbs = policy->governor_data;
 	struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
 	struct dbs_data *dbs_data = policy_dbs->dbs_data;
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
+	struct cpufreq_frequency_table *freq_table = policy->freq_table;
 
-	if (!dbs_info->freq_table) {
+	if (!freq_table) {
 		dbs_info->freq_lo = 0;
 		dbs_info->freq_lo_delay_us = 0;
 		return freq_next;
 	}
 
-	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
-			relation, &index);
-	freq_req = dbs_info->freq_table[index].frequency;
+	index = cpufreq_frequency_table_target(policy, freq_next, relation);
+	freq_req = freq_table[index].frequency;
 	freq_reduc = freq_req * od_tuners->powersave_bias / 1000;
 	freq_avg = freq_req - freq_reduc;
 
 	/* Find freq bounds for freq_avg in freq_table */
-	index = 0;
-	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
-			CPUFREQ_RELATION_H, &index);
-	freq_lo = dbs_info->freq_table[index].frequency;
-	index = 0;
-	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
-			CPUFREQ_RELATION_L, &index);
-	freq_hi = dbs_info->freq_table[index].frequency;
+	index = cpufreq_table_find_index_h(policy, freq_avg);
+	freq_lo = freq_table[index].frequency;
+	index = cpufreq_table_find_index_l(policy, freq_avg);
+	freq_hi = freq_table[index].frequency;
 
 	/* Find out how long we have to be in hi and lo freqs */
 	if (freq_hi == freq_lo) {
@@ -113,7 +109,6 @@ static void ondemand_powersave_bias_init(struct cpufreq_policy *policy)
 {
 	struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
 
-	dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu);
 	dbs_info->freq_lo = 0;
 }
 
@@ -361,17 +356,15 @@ static void od_free(struct policy_dbs_info *policy_dbs)
 	kfree(to_dbs_info(policy_dbs));
 }
 
-static int od_init(struct dbs_data *dbs_data, bool notify)
+static int od_init(struct dbs_data *dbs_data)
 {
 	struct od_dbs_tuners *tuners;
 	u64 idle_time;
 	int cpu;
 
 	tuners = kzalloc(sizeof(*tuners), GFP_KERNEL);
-	if (!tuners) {
-		pr_err("%s: kzalloc failed\n", __func__);
+	if (!tuners)
 		return -ENOMEM;
-	}
 
 	cpu = get_cpu();
 	idle_time = get_cpu_idle_time_us(cpu, NULL);
@@ -402,7 +395,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify)
 	return 0;
 }
 
-static void od_exit(struct dbs_data *dbs_data, bool notify)
+static void od_exit(struct dbs_data *dbs_data)
 {
 	kfree(dbs_data->tuners);
 }
@@ -420,12 +413,7 @@ static struct od_ops od_ops = {
 };
 
 static struct dbs_governor od_dbs_gov = {
-	.gov = {
-		.name = "ondemand",
-		.governor = cpufreq_governor_dbs,
-		.max_transition_latency	= TRANSITION_LATENCY_LIMIT,
-		.owner = THIS_MODULE,
-	},
+	.gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"),
 	.kobj_type = { .default_attrs = od_attributes },
 	.gov_dbs_timer = od_dbs_timer,
 	.alloc = od_alloc,
diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h
index f0121db3cd9e..640ea4e97106 100644
--- a/drivers/cpufreq/cpufreq_ondemand.h
+++ b/drivers/cpufreq/cpufreq_ondemand.h
@@ -13,7 +13,6 @@
 
 struct od_policy_dbs_info {
 	struct policy_dbs_info policy_dbs;
-	struct cpufreq_frequency_table *freq_table;
 	unsigned int freq_lo;
 	unsigned int freq_lo_delay_us;
 	unsigned int freq_hi_delay_us;
diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c
index af9f4b96f5a8..dafb679adc58 100644
--- a/drivers/cpufreq/cpufreq_performance.c
+++ b/drivers/cpufreq/cpufreq_performance.c
@@ -16,27 +16,16 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
-static int cpufreq_governor_performance(struct cpufreq_policy *policy,
-					unsigned int event)
+static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy)
 {
-	switch (event) {
-	case CPUFREQ_GOV_START:
-	case CPUFREQ_GOV_LIMITS:
-		pr_debug("setting to %u kHz because of event %u\n",
-						policy->max, event);
-		__cpufreq_driver_target(policy, policy->max,
-						CPUFREQ_RELATION_H);
-		break;
-	default:
-		break;
-	}
-	return 0;
+	pr_debug("setting to %u kHz\n", policy->max);
+	__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
 }
 
 static struct cpufreq_governor cpufreq_gov_performance = {
 	.name		= "performance",
-	.governor	= cpufreq_governor_performance,
 	.owner		= THIS_MODULE,
+	.limits		= cpufreq_gov_performance_limits,
 };
 
 static int __init cpufreq_gov_performance_init(void)
diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c
index b8b400232a74..78a651038faf 100644
--- a/drivers/cpufreq/cpufreq_powersave.c
+++ b/drivers/cpufreq/cpufreq_powersave.c
@@ -16,26 +16,15 @@
 #include <linux/init.h>
 #include <linux/module.h>
 
-static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
-					unsigned int event)
+static void cpufreq_gov_powersave_limits(struct cpufreq_policy *policy)
 {
-	switch (event) {
-	case CPUFREQ_GOV_START:
-	case CPUFREQ_GOV_LIMITS:
-		pr_debug("setting to %u kHz because of event %u\n",
-							policy->min, event);
-		__cpufreq_driver_target(policy, policy->min,
-						CPUFREQ_RELATION_L);
-		break;
-	default:
-		break;
-	}
-	return 0;
+	pr_debug("setting to %u kHz\n", policy->min);
+	__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
 }
 
 static struct cpufreq_governor cpufreq_gov_powersave = {
 	.name		= "powersave",
-	.governor	= cpufreq_governor_powersave,
+	.limits		= cpufreq_gov_powersave_limits,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 5e370a30a964..06d3abdffd3a 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -15,7 +15,7 @@
 #include <linux/slab.h>
 #include <linux/cputime.h>
 
-static spinlock_t cpufreq_stats_lock;
+static DEFINE_SPINLOCK(cpufreq_stats_lock);
 
 struct cpufreq_stats {
 	unsigned int total_trans;
@@ -52,6 +52,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	ssize_t len = 0;
 	int i;
 
+	if (policy->fast_switch_enabled)
+		return 0;
+
 	cpufreq_stats_update(stats);
 	for (i = 0; i < stats->state_num; i++) {
 		len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i],
@@ -68,6 +71,9 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 	ssize_t len = 0;
 	int i, j;
 
+	if (policy->fast_switch_enabled)
+		return 0;
+
 	len += snprintf(buf + len, PAGE_SIZE - len, "   From  :    To\n");
 	len += snprintf(buf + len, PAGE_SIZE - len, "         : ");
 	for (i = 0; i < stats->state_num; i++) {
@@ -130,7 +136,7 @@ static int freq_table_get_index(struct cpufreq_stats *stats, unsigned int freq)
 	return -1;
 }
 
-static void __cpufreq_stats_free_table(struct cpufreq_policy *policy)
+void cpufreq_stats_free_table(struct cpufreq_policy *policy)
 {
 	struct cpufreq_stats *stats = policy->stats;
 
@@ -146,39 +152,25 @@ static void __cpufreq_stats_free_table(struct cpufreq_policy *policy)
 	policy->stats = NULL;
 }
 
-static void cpufreq_stats_free_table(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-
-	policy = cpufreq_cpu_get(cpu);
-	if (!policy)
-		return;
-
-	__cpufreq_stats_free_table(policy);
-
-	cpufreq_cpu_put(policy);
-}
-
-static int __cpufreq_stats_create_table(struct cpufreq_policy *policy)
+void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 {
 	unsigned int i = 0, count = 0, ret = -ENOMEM;
 	struct cpufreq_stats *stats;
 	unsigned int alloc_size;
-	unsigned int cpu = policy->cpu;
 	struct cpufreq_frequency_table *pos, *table;
 
 	/* We need cpufreq table for creating stats table */
-	table = cpufreq_frequency_get_table(cpu);
+	table = policy->freq_table;
 	if (unlikely(!table))
-		return 0;
+		return;
 
 	/* stats already initialized */
 	if (policy->stats)
-		return -EEXIST;
+		return;
 
 	stats = kzalloc(sizeof(*stats), GFP_KERNEL);
 	if (!stats)
-		return -ENOMEM;
+		return;
 
 	/* Find total allocation size */
 	cpufreq_for_each_valid_entry(pos, table)
@@ -215,80 +207,32 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	policy->stats = stats;
 	ret = sysfs_create_group(&policy->kobj, &stats_attr_group);
 	if (!ret)
-		return 0;
+		return;
 
 	/* We failed, release resources */
 	policy->stats = NULL;
 	kfree(stats->time_in_state);
 free_stat:
 	kfree(stats);
-
-	return ret;
-}
-
-static void cpufreq_stats_create_table(unsigned int cpu)
-{
-	struct cpufreq_policy *policy;
-
-	/*
-	 * "likely(!policy)" because normally cpufreq_stats will be registered
-	 * before cpufreq driver
-	 */
-	policy = cpufreq_cpu_get(cpu);
-	if (likely(!policy))
-		return;
-
-	__cpufreq_stats_create_table(policy);
-
-	cpufreq_cpu_put(policy);
 }
 
-static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
-		unsigned long val, void *data)
+void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
+				     unsigned int new_freq)
 {
-	int ret = 0;
-	struct cpufreq_policy *policy = data;
-
-	if (val == CPUFREQ_CREATE_POLICY)
-		ret = __cpufreq_stats_create_table(policy);
-	else if (val == CPUFREQ_REMOVE_POLICY)
-		__cpufreq_stats_free_table(policy);
-
-	return ret;
-}
-
-static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
-		unsigned long val, void *data)
-{
-	struct cpufreq_freqs *freq = data;
-	struct cpufreq_policy *policy = cpufreq_cpu_get(freq->cpu);
-	struct cpufreq_stats *stats;
+	struct cpufreq_stats *stats = policy->stats;
 	int old_index, new_index;
 
-	if (!policy) {
-		pr_err("%s: No policy found\n", __func__);
-		return 0;
-	}
-
-	if (val != CPUFREQ_POSTCHANGE)
-		goto put_policy;
-
-	if (!policy->stats) {
+	if (!stats) {
 		pr_debug("%s: No stats found\n", __func__);
-		goto put_policy;
+		return;
 	}
 
-	stats = policy->stats;
-
 	old_index = stats->last_index;
-	new_index = freq_table_get_index(stats, freq->new);
+	new_index = freq_table_get_index(stats, new_freq);
 
 	/* We can't do stats->time_in_state[-1]= .. */
-	if (old_index == -1 || new_index == -1)
-		goto put_policy;
-
-	if (old_index == new_index)
-		goto put_policy;
+	if (old_index == -1 || new_index == -1 || old_index == new_index)
+		return;
 
 	cpufreq_stats_update(stats);
 
@@ -297,61 +241,4 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
 	stats->trans_table[old_index * stats->max_state + new_index]++;
 #endif
 	stats->total_trans++;
-
-put_policy:
-	cpufreq_cpu_put(policy);
-	return 0;
 }
-
-static struct notifier_block notifier_policy_block = {
-	.notifier_call = cpufreq_stat_notifier_policy
-};
-
-static struct notifier_block notifier_trans_block = {
-	.notifier_call = cpufreq_stat_notifier_trans
-};
-
-static int __init cpufreq_stats_init(void)
-{
-	int ret;
-	unsigned int cpu;
-
-	spin_lock_init(&cpufreq_stats_lock);
-	ret = cpufreq_register_notifier(&notifier_policy_block,
-				CPUFREQ_POLICY_NOTIFIER);
-	if (ret)
-		return ret;
-
-	for_each_online_cpu(cpu)
-		cpufreq_stats_create_table(cpu);
-
-	ret = cpufreq_register_notifier(&notifier_trans_block,
-				CPUFREQ_TRANSITION_NOTIFIER);
-	if (ret) {
-		cpufreq_unregister_notifier(&notifier_policy_block,
-				CPUFREQ_POLICY_NOTIFIER);
-		for_each_online_cpu(cpu)
-			cpufreq_stats_free_table(cpu);
-		return ret;
-	}
-
-	return 0;
-}
-static void __exit cpufreq_stats_exit(void)
-{
-	unsigned int cpu;
-
-	cpufreq_unregister_notifier(&notifier_policy_block,
-			CPUFREQ_POLICY_NOTIFIER);
-	cpufreq_unregister_notifier(&notifier_trans_block,
-			CPUFREQ_TRANSITION_NOTIFIER);
-	for_each_online_cpu(cpu)
-		cpufreq_stats_free_table(cpu);
-}
-
-MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
-MODULE_DESCRIPTION("Export cpufreq stats via sysfs");
-MODULE_LICENSE("GPL");
-
-module_init(cpufreq_stats_init);
-module_exit(cpufreq_stats_exit);
diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c
index 9f3dec9a3f36..bd897e3e134d 100644
--- a/drivers/cpufreq/cpufreq_userspace.c
+++ b/drivers/cpufreq/cpufreq_userspace.c
@@ -65,66 +65,66 @@ static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
-				   unsigned int event)
+static void cpufreq_userspace_policy_exit(struct cpufreq_policy *policy)
+{
+	mutex_lock(&userspace_mutex);
+	kfree(policy->governor_data);
+	policy->governor_data = NULL;
+	mutex_unlock(&userspace_mutex);
+}
+
+static int cpufreq_userspace_policy_start(struct cpufreq_policy *policy)
 {
 	unsigned int *setspeed = policy->governor_data;
-	unsigned int cpu = policy->cpu;
-	int rc = 0;
 
-	if (event == CPUFREQ_GOV_POLICY_INIT)
-		return cpufreq_userspace_policy_init(policy);
+	BUG_ON(!policy->cur);
+	pr_debug("started managing cpu %u\n", policy->cpu);
 
-	if (!setspeed)
-		return -EINVAL;
-
-	switch (event) {
-	case CPUFREQ_GOV_POLICY_EXIT:
-		mutex_lock(&userspace_mutex);
-		policy->governor_data = NULL;
-		kfree(setspeed);
-		mutex_unlock(&userspace_mutex);
-		break;
-	case CPUFREQ_GOV_START:
-		BUG_ON(!policy->cur);
-		pr_debug("started managing cpu %u\n", cpu);
-
-		mutex_lock(&userspace_mutex);
-		per_cpu(cpu_is_managed, cpu) = 1;
-		*setspeed = policy->cur;
-		mutex_unlock(&userspace_mutex);
-		break;
-	case CPUFREQ_GOV_STOP:
-		pr_debug("managing cpu %u stopped\n", cpu);
-
-		mutex_lock(&userspace_mutex);
-		per_cpu(cpu_is_managed, cpu) = 0;
-		*setspeed = 0;
-		mutex_unlock(&userspace_mutex);
-		break;
-	case CPUFREQ_GOV_LIMITS:
-		mutex_lock(&userspace_mutex);
-		pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n",
-			cpu, policy->min, policy->max, policy->cur, *setspeed);
-
-		if (policy->max < *setspeed)
-			__cpufreq_driver_target(policy, policy->max,
-						CPUFREQ_RELATION_H);
-		else if (policy->min > *setspeed)
-			__cpufreq_driver_target(policy, policy->min,
-						CPUFREQ_RELATION_L);
-		else
-			__cpufreq_driver_target(policy, *setspeed,
-						CPUFREQ_RELATION_L);
-		mutex_unlock(&userspace_mutex);
-		break;
-	}
-	return rc;
+	mutex_lock(&userspace_mutex);
+	per_cpu(cpu_is_managed, policy->cpu) = 1;
+	*setspeed = policy->cur;
+	mutex_unlock(&userspace_mutex);
+	return 0;
+}
+
+static void cpufreq_userspace_policy_stop(struct cpufreq_policy *policy)
+{
+	unsigned int *setspeed = policy->governor_data;
+
+	pr_debug("managing cpu %u stopped\n", policy->cpu);
+
+	mutex_lock(&userspace_mutex);
+	per_cpu(cpu_is_managed, policy->cpu) = 0;
+	*setspeed = 0;
+	mutex_unlock(&userspace_mutex);
+}
+
+static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy)
+{
+	unsigned int *setspeed = policy->governor_data;
+
+	mutex_lock(&userspace_mutex);
+
+	pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n",
+		 policy->cpu, policy->min, policy->max, policy->cur, *setspeed);
+
+	if (policy->max < *setspeed)
+		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+	else if (policy->min > *setspeed)
+		__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
+	else
+		__cpufreq_driver_target(policy, *setspeed, CPUFREQ_RELATION_L);
+
+	mutex_unlock(&userspace_mutex);
 }
 
 static struct cpufreq_governor cpufreq_gov_userspace = {
 	.name		= "userspace",
-	.governor	= cpufreq_governor_userspace,
+	.init		= cpufreq_userspace_policy_init,
+	.exit		= cpufreq_userspace_policy_exit,
+	.start		= cpufreq_userspace_policy_start,
+	.stop		= cpufreq_userspace_policy_stop,
+	.limits		= cpufreq_userspace_policy_limits,
 	.store_setspeed	= cpufreq_set,
 	.show_setspeed	= show_speed,
 	.owner		= THIS_MODULE,
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index 7e336d20c184..b95a872800ec 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -38,26 +38,6 @@ struct davinci_cpufreq {
 };
 static struct davinci_cpufreq cpufreq;
 
-static int davinci_verify_speed(struct cpufreq_policy *policy)
-{
-	struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
-	struct cpufreq_frequency_table *freq_table = pdata->freq_table;
-	struct clk *armclk = cpufreq.armclk;
-
-	if (freq_table)
-		return cpufreq_frequency_table_verify(policy, freq_table);
-
-	if (policy->cpu)
-		return -EINVAL;
-
-	cpufreq_verify_within_cpu_limits(policy);
-	policy->min = clk_round_rate(armclk, policy->min * 1000) / 1000;
-	policy->max = clk_round_rate(armclk, policy->max * 1000) / 1000;
-	cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-						policy->cpuinfo.max_freq);
-	return 0;
-}
-
 static int davinci_target(struct cpufreq_policy *policy, unsigned int idx)
 {
 	struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
@@ -121,7 +101,7 @@ static int davinci_cpu_init(struct cpufreq_policy *policy)
 
 static struct cpufreq_driver davinci_driver = {
 	.flags		= CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
-	.verify		= davinci_verify_speed,
+	.verify		= cpufreq_generic_frequency_table_verify,
 	.target_index	= davinci_target,
 	.get		= cpufreq_generic_get,
 	.init		= davinci_cpu_init,
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index a8f1daffc9bc..3bbbf9e6960c 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -63,8 +63,6 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	else
 		return 0;
 }
-EXPORT_SYMBOL_GPL(cpufreq_frequency_table_cpuinfo);
-
 
 int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 				   struct cpufreq_frequency_table *table)
@@ -108,20 +106,16 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify);
  */
 int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy)
 {
-	struct cpufreq_frequency_table *table =
-		cpufreq_frequency_get_table(policy->cpu);
-	if (!table)
+	if (!policy->freq_table)
 		return -ENODEV;
 
-	return cpufreq_frequency_table_verify(policy, table);
+	return cpufreq_frequency_table_verify(policy, policy->freq_table);
 }
 EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify);
 
-int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-				   struct cpufreq_frequency_table *table,
-				   unsigned int target_freq,
-				   unsigned int relation,
-				   unsigned int *index)
+int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
+				 unsigned int target_freq,
+				 unsigned int relation)
 {
 	struct cpufreq_frequency_table optimal = {
 		.driver_data = ~0,
@@ -132,7 +126,9 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		.frequency = 0,
 	};
 	struct cpufreq_frequency_table *pos;
+	struct cpufreq_frequency_table *table = policy->freq_table;
 	unsigned int freq, diff, i = 0;
+	int index;
 
 	pr_debug("request for target %u kHz (relation: %u) for cpu %u\n",
 					target_freq, relation, policy->cpu);
@@ -196,25 +192,26 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 		}
 	}
 	if (optimal.driver_data > i) {
-		if (suboptimal.driver_data > i)
-			return -EINVAL;
-		*index = suboptimal.driver_data;
-	} else
-		*index = optimal.driver_data;
+		if (suboptimal.driver_data > i) {
+			WARN(1, "Invalid frequency table: %d\n", policy->cpu);
+			return 0;
+		}
 
-	pr_debug("target index is %u, freq is:%u kHz\n", *index,
-		 table[*index].frequency);
+		index = suboptimal.driver_data;
+	} else
+		index = optimal.driver_data;
 
-	return 0;
+	pr_debug("target index is %u, freq is:%u kHz\n", index,
+		 table[index].frequency);
+	return index;
 }
-EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target);
+EXPORT_SYMBOL_GPL(cpufreq_table_index_unsorted);
 
 int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy,
 		unsigned int freq)
 {
-	struct cpufreq_frequency_table *pos, *table;
+	struct cpufreq_frequency_table *pos, *table = policy->freq_table;
 
-	table = cpufreq_frequency_get_table(policy->cpu);
 	if (unlikely(!table)) {
 		pr_debug("%s: Unable to find frequency table\n", __func__);
 		return -ENOENT;
@@ -300,15 +297,72 @@ struct freq_attr *cpufreq_generic_attr[] = {
 };
 EXPORT_SYMBOL_GPL(cpufreq_generic_attr);
 
+static int set_freq_table_sorted(struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *pos, *table = policy->freq_table;
+	struct cpufreq_frequency_table *prev = NULL;
+	int ascending = 0;
+
+	policy->freq_table_sorted = CPUFREQ_TABLE_UNSORTED;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		if (!prev) {
+			prev = pos;
+			continue;
+		}
+
+		if (pos->frequency == prev->frequency) {
+			pr_warn("Duplicate freq-table entries: %u\n",
+				pos->frequency);
+			return -EINVAL;
+		}
+
+		/* Frequency increased from prev to pos */
+		if (pos->frequency > prev->frequency) {
+			/* But frequency was decreasing earlier */
+			if (ascending < 0) {
+				pr_debug("Freq table is unsorted\n");
+				return 0;
+			}
+
+			ascending++;
+		} else {
+			/* Frequency decreased from prev to pos */
+
+			/* But frequency was increasing earlier */
+			if (ascending > 0) {
+				pr_debug("Freq table is unsorted\n");
+				return 0;
+			}
+
+			ascending--;
+		}
+
+		prev = pos;
+	}
+
+	if (ascending > 0)
+		policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_ASCENDING;
+	else
+		policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_DESCENDING;
+
+	pr_debug("Freq table is sorted in %s order\n",
+		 ascending > 0 ? "ascending" : "descending");
+
+	return 0;
+}
+
 int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
 				      struct cpufreq_frequency_table *table)
 {
-	int ret = cpufreq_frequency_table_cpuinfo(policy, table);
+	int ret;
 
-	if (!ret)
-		policy->freq_table = table;
+	ret = cpufreq_frequency_table_cpuinfo(policy, table);
+	if (ret)
+		return ret;
 
-	return ret;
+	policy->freq_table = table;
+	return set_freq_table_sorted(policy);
 }
 EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show);
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 28690b284846..9ec033b4f2d9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -97,7 +97,6 @@ static inline u64 div_ext_fp(u64 x, u64 y)
  *			read from MPERF MSR between last and current sample
  * @tsc:		Difference of time stamp counter between last and
  *			current sample
- * @freq:		Effective frequency calculated from APERF/MPERF
  * @time:		Current time from scheduler
  *
  * This structure is used in the cpudata structure to store performance sample
@@ -109,7 +108,6 @@ struct sample {
 	u64 aperf;
 	u64 mperf;
 	u64 tsc;
-	int freq;
 	u64 time;
 };
 
@@ -282,9 +280,9 @@ struct cpu_defaults {
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
 
-static struct pstate_adjust_policy pid_params;
-static struct pstate_funcs pstate_funcs;
-static int hwp_active;
+static struct pstate_adjust_policy pid_params __read_mostly;
+static struct pstate_funcs pstate_funcs __read_mostly;
+static int hwp_active __read_mostly;
 
 #ifdef CONFIG_ACPI
 static bool acpi_ppc;
@@ -808,7 +806,8 @@ static void __init intel_pstate_sysfs_expose_params(void)
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
 	/* First disable HWP notification interrupt as we don't process them */
-	wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
+	if (static_cpu_has(X86_FEATURE_HWP_NOTIFY))
+		wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
 
 	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 }
@@ -945,7 +944,7 @@ static int core_get_max_pstate(void)
 			if (err)
 				goto skip_tar;
 
-			tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
+			tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3);
 			err = rdmsrl_safe(tdp_msr, &tdp_ratio);
 			if (err)
 				goto skip_tar;
@@ -973,7 +972,7 @@ static int core_get_turbo_pstate(void)
 	u64 value;
 	int nont, ret;
 
-	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
+	rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
 	nont = core_get_max_pstate();
 	ret = (value) & 255;
 	if (ret <= nont)
@@ -1002,7 +1001,7 @@ static int knl_get_turbo_pstate(void)
 	u64 value;
 	int nont, ret;
 
-	rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value);
+	rdmsrl(MSR_TURBO_RATIO_LIMIT, value);
 	nont = core_get_max_pstate();
 	ret = (((value) >> 8) & 0xFF);
 	if (ret <= nont)
@@ -1092,6 +1091,26 @@ static struct cpu_defaults knl_params = {
 	},
 };
 
+static struct cpu_defaults bxt_params = {
+	.pid_policy = {
+		.sample_rate_ms = 10,
+		.deadband = 0,
+		.setpoint = 60,
+		.p_gain_pct = 14,
+		.d_gain_pct = 0,
+		.i_gain_pct = 4,
+	},
+	.funcs = {
+		.get_max = core_get_max_pstate,
+		.get_max_physical = core_get_max_pstate_physical,
+		.get_min = core_get_min_pstate,
+		.get_turbo = core_get_turbo_pstate,
+		.get_scaling = core_get_scaling,
+		.get_val = core_get_val,
+		.get_target_pstate = get_target_pstate_use_cpu_load,
+	},
+};
+
 static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 {
 	int max_perf = cpu->pstate.turbo_pstate;
@@ -1114,17 +1133,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
 	*min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
-static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate)
-{
-	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
-	cpu->pstate.current_pstate = pstate;
-}
-
 static void intel_pstate_set_min_pstate(struct cpudata *cpu)
 {
 	int pstate = cpu->pstate.min_pstate;
 
-	intel_pstate_record_pstate(cpu, pstate);
+	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
+	cpu->pstate.current_pstate = pstate;
 	/*
 	 * Generally, there is no guarantee that this code will always run on
 	 * the CPU being updated, so force the register update to run on the
@@ -1284,10 +1298,11 @@ static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
 
 	intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
 	pstate = clamp_t(int, pstate, min_perf, max_perf);
+	trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
 	if (pstate == cpu->pstate.current_pstate)
 		return;
 
-	intel_pstate_record_pstate(cpu, pstate);
+	cpu->pstate.current_pstate = pstate;
 	wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
 }
 
@@ -1352,11 +1367,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	core_params),
 	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	core_params),
 	ICPU(INTEL_FAM6_XEON_PHI_KNL,		knl_params),
+	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		bxt_params),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
 
-static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
+static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
 	ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params),
 	{}
 };
@@ -1576,12 +1592,12 @@ static struct cpufreq_driver intel_pstate_driver = {
 	.name		= "intel_pstate",
 };
 
-static int __initdata no_load;
-static int __initdata no_hwp;
-static int __initdata hwp_only;
-static unsigned int force_load;
+static int no_load __initdata;
+static int no_hwp __initdata;
+static int hwp_only __initdata;
+static unsigned int force_load __initdata;
 
-static int intel_pstate_msrs_not_valid(void)
+static int __init intel_pstate_msrs_not_valid(void)
 {
 	if (!pstate_funcs.get_max() ||
 	    !pstate_funcs.get_min() ||
@@ -1591,7 +1607,7 @@ static int intel_pstate_msrs_not_valid(void)
 	return 0;
 }
 
-static void copy_pid_params(struct pstate_adjust_policy *policy)
+static void __init copy_pid_params(struct pstate_adjust_policy *policy)
 {
 	pid_params.sample_rate_ms = policy->sample_rate_ms;
 	pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
@@ -1602,7 +1618,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy)
 	pid_params.setpoint = policy->setpoint;
 }
 
-static void copy_cpu_funcs(struct pstate_funcs *funcs)
+static void __init copy_cpu_funcs(struct pstate_funcs *funcs)
 {
 	pstate_funcs.get_max   = funcs->get_max;
 	pstate_funcs.get_max_physical = funcs->get_max_physical;
@@ -1617,7 +1633,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 
 #ifdef CONFIG_ACPI
 
-static bool intel_pstate_no_acpi_pss(void)
+static bool __init intel_pstate_no_acpi_pss(void)
 {
 	int i;
 
@@ -1646,7 +1662,7 @@ static bool intel_pstate_no_acpi_pss(void)
 	return true;
 }
 
-static bool intel_pstate_has_acpi_ppc(void)
+static bool __init intel_pstate_has_acpi_ppc(void)
 {
 	int i;
 
@@ -1674,7 +1690,7 @@ struct hw_vendor_info {
 };
 
 /* Hardware vendor-specific info that has its own power management modes */
-static struct hw_vendor_info vendor_info[] = {
+static struct hw_vendor_info vendor_info[] __initdata = {
 	{1, "HP    ", "ProLiant", PSS},
 	{1, "ORACLE", "X4-2    ", PPC},
 	{1, "ORACLE", "X4-2L   ", PPC},
@@ -1693,7 +1709,7 @@ static struct hw_vendor_info vendor_info[] = {
 	{0, "", ""},
 };
 
-static bool intel_pstate_platform_pwr_mgmt_exists(void)
+static bool __init intel_pstate_platform_pwr_mgmt_exists(void)
 {
 	struct acpi_table_header hdr;
 	struct hw_vendor_info *v_info;
diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c
index e920889b9ac2..ed915ee85dd9 100644
--- a/drivers/cpufreq/mvebu-cpufreq.c
+++ b/drivers/cpufreq/mvebu-cpufreq.c
@@ -70,7 +70,7 @@ static int __init armada_xp_pmsu_cpufreq_init(void)
 			continue;
 		}
 
-		clk = clk_get(cpu_dev, 0);
+		clk = clk_get(cpu_dev, NULL);
 		if (IS_ERR(clk)) {
 			pr_err("Cannot get clock for CPU %d\n", cpu);
 			return PTR_ERR(clk);
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index a7ecb9a84c15..3f0ce2ae35ee 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->min = policy->cpuinfo.min_freq =
 		ioread32(&pcch_hdr->minimum_frequency) * 1000;
 
-	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
-
 	pr_debug("init: policy->max is %d, policy->min is %d\n",
 		policy->max, policy->min);
 out:
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 6bd715b7f11c..87796e0864e9 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -64,12 +64,14 @@
 /**
  * struct global_pstate_info -	Per policy data structure to maintain history of
  *				global pstates
- * @highest_lpstate:		The local pstate from which we are ramping down
+ * @highest_lpstate_idx:	The local pstate index from which we are
+ *				ramping down
  * @elapsed_time:		Time in ms spent in ramping down from
- *				highest_lpstate
+ *				highest_lpstate_idx
  * @last_sampled_time:		Time from boot in ms when global pstates were
  *				last set
- * @last_lpstate,last_gpstate:	Last set values for local and global pstates
+ * @last_lpstate_idx,		Last set value of local pstate and global
+ * last_gpstate_idx		pstate in terms of cpufreq table index
  * @timer:			Is used for ramping down if cpu goes idle for
  *				a long time with global pstate held high
  * @gpstate_lock:		A spinlock to maintain synchronization between
@@ -77,11 +79,11 @@
  *				governer's target_index calls
  */
 struct global_pstate_info {
-	int highest_lpstate;
+	int highest_lpstate_idx;
 	unsigned int elapsed_time;
 	unsigned int last_sampled_time;
-	int last_lpstate;
-	int last_gpstate;
+	int last_lpstate_idx;
+	int last_gpstate_idx;
 	spinlock_t gpstate_lock;
 	struct timer_list timer;
 };
@@ -124,29 +126,47 @@ static int nr_chips;
 static DEFINE_PER_CPU(struct chip *, chip_info);
 
 /*
- * Note: The set of pstates consists of contiguous integers, the
- * smallest of which is indicated by powernv_pstate_info.min, the
- * largest of which is indicated by powernv_pstate_info.max.
+ * Note:
+ * The set of pstates consists of contiguous integers.
+ * powernv_pstate_info stores the index of the frequency table for
+ * max, min and nominal frequencies. It also stores number of
+ * available frequencies.
  *
- * The nominal pstate is the highest non-turbo pstate in this
- * platform. This is indicated by powernv_pstate_info.nominal.
+ * powernv_pstate_info.nominal indicates the index to the highest
+ * non-turbo frequency.
  */
 static struct powernv_pstate_info {
-	int min;
-	int max;
-	int nominal;
-	int nr_pstates;
+	unsigned int min;
+	unsigned int max;
+	unsigned int nominal;
+	unsigned int nr_pstates;
 } powernv_pstate_info;
 
+/* Use following macros for conversions between pstate_id and index */
+static inline int idx_to_pstate(unsigned int i)
+{
+	return powernv_freqs[i].driver_data;
+}
+
+static inline unsigned int pstate_to_idx(int pstate)
+{
+	/*
+	 * abs() is deliberately used so that is works with
+	 * both monotonically increasing and decreasing
+	 * pstate values
+	 */
+	return abs(pstate - idx_to_pstate(powernv_pstate_info.max));
+}
+
 static inline void reset_gpstates(struct cpufreq_policy *policy)
 {
 	struct global_pstate_info *gpstates = policy->driver_data;
 
-	gpstates->highest_lpstate = 0;
+	gpstates->highest_lpstate_idx = 0;
 	gpstates->elapsed_time = 0;
 	gpstates->last_sampled_time = 0;
-	gpstates->last_lpstate = 0;
-	gpstates->last_gpstate = 0;
+	gpstates->last_lpstate_idx = 0;
+	gpstates->last_gpstate_idx = 0;
 }
 
 /*
@@ -156,9 +176,10 @@ static inline void reset_gpstates(struct cpufreq_policy *policy)
 static int init_powernv_pstates(void)
 {
 	struct device_node *power_mgt;
-	int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0;
+	int i, nr_pstates = 0;
 	const __be32 *pstate_ids, *pstate_freqs;
 	u32 len_ids, len_freqs;
+	u32 pstate_min, pstate_max, pstate_nominal;
 
 	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
 	if (!power_mgt) {
@@ -208,6 +229,7 @@ static int init_powernv_pstates(void)
 		return -ENODEV;
 	}
 
+	powernv_pstate_info.nr_pstates = nr_pstates;
 	pr_debug("NR PStates %d\n", nr_pstates);
 	for (i = 0; i < nr_pstates; i++) {
 		u32 id = be32_to_cpu(pstate_ids[i]);
@@ -216,15 +238,17 @@ static int init_powernv_pstates(void)
 		pr_debug("PState id %d freq %d MHz\n", id, freq);
 		powernv_freqs[i].frequency = freq * 1000; /* kHz */
 		powernv_freqs[i].driver_data = id;
+
+		if (id == pstate_max)
+			powernv_pstate_info.max = i;
+		else if (id == pstate_nominal)
+			powernv_pstate_info.nominal = i;
+		else if (id == pstate_min)
+			powernv_pstate_info.min = i;
 	}
+
 	/* End of list marker entry */
 	powernv_freqs[i].frequency = CPUFREQ_TABLE_END;
-
-	powernv_pstate_info.min = pstate_min;
-	powernv_pstate_info.max = pstate_max;
-	powernv_pstate_info.nominal = pstate_nominal;
-	powernv_pstate_info.nr_pstates = nr_pstates;
-
 	return 0;
 }
 
@@ -233,12 +257,12 @@ static unsigned int pstate_id_to_freq(int pstate_id)
 {
 	int i;
 
-	i = powernv_pstate_info.max - pstate_id;
+	i = pstate_to_idx(pstate_id);
 	if (i >= powernv_pstate_info.nr_pstates || i < 0) {
 		pr_warn("PState id %d outside of PState table, "
 			"reporting nominal id %d instead\n",
-			pstate_id, powernv_pstate_info.nominal);
-		i = powernv_pstate_info.max - powernv_pstate_info.nominal;
+			pstate_id, idx_to_pstate(powernv_pstate_info.nominal));
+		i = powernv_pstate_info.nominal;
 	}
 
 	return powernv_freqs[i].frequency;
@@ -252,7 +276,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy,
 					char *buf)
 {
 	return sprintf(buf, "%u\n",
-		pstate_id_to_freq(powernv_pstate_info.nominal));
+		powernv_freqs[powernv_pstate_info.nominal].frequency);
 }
 
 struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq =
@@ -426,7 +450,7 @@ static void set_pstate(void *data)
  */
 static inline unsigned int get_nominal_index(void)
 {
-	return powernv_pstate_info.max - powernv_pstate_info.nominal;
+	return powernv_pstate_info.nominal;
 }
 
 static void powernv_cpufreq_throttle_check(void *data)
@@ -435,20 +459,22 @@ static void powernv_cpufreq_throttle_check(void *data)
 	unsigned int cpu = smp_processor_id();
 	unsigned long pmsr;
 	int pmsr_pmax;
+	unsigned int pmsr_pmax_idx;
 
 	pmsr = get_pmspr(SPRN_PMSR);
 	chip = this_cpu_read(chip_info);
 
 	/* Check for Pmax Capping */
 	pmsr_pmax = (s8)PMSR_MAX(pmsr);
-	if (pmsr_pmax != powernv_pstate_info.max) {
+	pmsr_pmax_idx = pstate_to_idx(pmsr_pmax);
+	if (pmsr_pmax_idx != powernv_pstate_info.max) {
 		if (chip->throttled)
 			goto next;
 		chip->throttled = true;
-		if (pmsr_pmax < powernv_pstate_info.nominal) {
-			pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n",
+		if (pmsr_pmax_idx > powernv_pstate_info.nominal) {
+			pr_warn_once("CPU %d on Chip %u has Pmax(%d) reduced below nominal frequency(%d)\n",
 				     cpu, chip->id, pmsr_pmax,
-				     powernv_pstate_info.nominal);
+				     idx_to_pstate(powernv_pstate_info.nominal));
 			chip->throttle_sub_turbo++;
 		} else {
 			chip->throttle_turbo++;
@@ -484,34 +510,35 @@ next:
 
 /**
  * calc_global_pstate - Calculate global pstate
- * @elapsed_time:	Elapsed time in milliseconds
- * @local_pstate:	New local pstate
- * @highest_lpstate:	pstate from which its ramping down
+ * @elapsed_time:		Elapsed time in milliseconds
+ * @local_pstate_idx:		New local pstate
+ * @highest_lpstate_idx:	pstate from which its ramping down
  *
  * Finds the appropriate global pstate based on the pstate from which its
  * ramping down and the time elapsed in ramping down. It follows a quadratic
  * equation which ensures that it reaches ramping down to pmin in 5sec.
  */
 static inline int calc_global_pstate(unsigned int elapsed_time,
-				     int highest_lpstate, int local_pstate)
+				     int highest_lpstate_idx,
+				     int local_pstate_idx)
 {
-	int pstate_diff;
+	int index_diff;
 
 	/*
 	 * Using ramp_down_percent we get the percentage of rampdown
 	 * that we are expecting to be dropping. Difference between
-	 * highest_lpstate and powernv_pstate_info.min will give a absolute
+	 * highest_lpstate_idx and powernv_pstate_info.min will give a absolute
 	 * number of how many pstates we will drop eventually by the end of
 	 * 5 seconds, then just scale it get the number pstates to be dropped.
 	 */
-	pstate_diff =  ((int)ramp_down_percent(elapsed_time) *
-			(highest_lpstate - powernv_pstate_info.min)) / 100;
+	index_diff =  ((int)ramp_down_percent(elapsed_time) *
+			(powernv_pstate_info.min - highest_lpstate_idx)) / 100;
 
 	/* Ensure that global pstate is >= to local pstate */
-	if (highest_lpstate - pstate_diff < local_pstate)
-		return local_pstate;
+	if (highest_lpstate_idx + index_diff >= local_pstate_idx)
+		return local_pstate_idx;
 	else
-		return highest_lpstate - pstate_diff;
+		return highest_lpstate_idx + index_diff;
 }
 
 static inline void  queue_gpstate_timer(struct global_pstate_info *gpstates)
@@ -546,7 +573,7 @@ void gpstate_timer_handler(unsigned long data)
 {
 	struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
 	struct global_pstate_info *gpstates = policy->driver_data;
-	int gpstate_id;
+	int gpstate_idx;
 	unsigned int time_diff = jiffies_to_msecs(jiffies)
 					- gpstates->last_sampled_time;
 	struct powernv_smp_call_data freq_data;
@@ -556,29 +583,29 @@ void gpstate_timer_handler(unsigned long data)
 
 	gpstates->last_sampled_time += time_diff;
 	gpstates->elapsed_time += time_diff;
-	freq_data.pstate_id = gpstates->last_lpstate;
+	freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx);
 
-	if ((gpstates->last_gpstate == freq_data.pstate_id) ||
+	if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) ||
 	    (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) {
-		gpstate_id = freq_data.pstate_id;
+		gpstate_idx = pstate_to_idx(freq_data.pstate_id);
 		reset_gpstates(policy);
-		gpstates->highest_lpstate = freq_data.pstate_id;
+		gpstates->highest_lpstate_idx = gpstate_idx;
 	} else {
-		gpstate_id = calc_global_pstate(gpstates->elapsed_time,
-						gpstates->highest_lpstate,
-						freq_data.pstate_id);
+		gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
+						 gpstates->highest_lpstate_idx,
+						 freq_data.pstate_id);
 	}
 
 	/*
 	 * If local pstate is equal to global pstate, rampdown is over
 	 * So timer is not required to be queued.
 	 */
-	if (gpstate_id != freq_data.pstate_id)
+	if (gpstate_idx != gpstates->last_lpstate_idx)
 		queue_gpstate_timer(gpstates);
 
-	freq_data.gpstate_id = gpstate_id;
-	gpstates->last_gpstate = freq_data.gpstate_id;
-	gpstates->last_lpstate = freq_data.pstate_id;
+	freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
+	gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id);
+	gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id);
 
 	spin_unlock(&gpstates->gpstate_lock);
 
@@ -595,7 +622,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 					unsigned int new_index)
 {
 	struct powernv_smp_call_data freq_data;
-	unsigned int cur_msec, gpstate_id;
+	unsigned int cur_msec, gpstate_idx;
 	struct global_pstate_info *gpstates = policy->driver_data;
 
 	if (unlikely(rebooting) && new_index != get_nominal_index())
@@ -607,15 +634,15 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 	cur_msec = jiffies_to_msecs(get_jiffies_64());
 
 	spin_lock(&gpstates->gpstate_lock);
-	freq_data.pstate_id = powernv_freqs[new_index].driver_data;
+	freq_data.pstate_id = idx_to_pstate(new_index);
 
 	if (!gpstates->last_sampled_time) {
-		gpstate_id = freq_data.pstate_id;
-		gpstates->highest_lpstate = freq_data.pstate_id;
+		gpstate_idx = new_index;
+		gpstates->highest_lpstate_idx = new_index;
 		goto gpstates_done;
 	}
 
-	if (gpstates->last_gpstate > freq_data.pstate_id) {
+	if (gpstates->last_gpstate_idx < new_index) {
 		gpstates->elapsed_time += cur_msec -
 						 gpstates->last_sampled_time;
 
@@ -626,34 +653,34 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
 		 */
 		if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) {
 			reset_gpstates(policy);
-			gpstates->highest_lpstate = freq_data.pstate_id;
-			gpstate_id = freq_data.pstate_id;
+			gpstates->highest_lpstate_idx = new_index;
+			gpstate_idx = new_index;
 		} else {
 		/* Elaspsed_time is less than 5 seconds, continue to rampdown */
-			gpstate_id = calc_global_pstate(gpstates->elapsed_time,
-							gpstates->highest_lpstate,
-							freq_data.pstate_id);
+			gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
+							 gpstates->highest_lpstate_idx,
+							 new_index);
 		}
 	} else {
 		reset_gpstates(policy);
-		gpstates->highest_lpstate = freq_data.pstate_id;
-		gpstate_id = freq_data.pstate_id;
+		gpstates->highest_lpstate_idx = new_index;
+		gpstate_idx = new_index;
 	}
 
 	/*
 	 * If local pstate is equal to global pstate, rampdown is over
 	 * So timer is not required to be queued.
 	 */
-	if (gpstate_id != freq_data.pstate_id)
+	if (gpstate_idx != new_index)
 		queue_gpstate_timer(gpstates);
 	else
 		del_timer_sync(&gpstates->timer);
 
 gpstates_done:
-	freq_data.gpstate_id = gpstate_id;
+	freq_data.gpstate_id = idx_to_pstate(gpstate_idx);
 	gpstates->last_sampled_time = cur_msec;
-	gpstates->last_gpstate = freq_data.gpstate_id;
-	gpstates->last_lpstate = freq_data.pstate_id;
+	gpstates->last_gpstate_idx = gpstate_idx;
+	gpstates->last_lpstate_idx = new_index;
 
 	spin_unlock(&gpstates->gpstate_lock);
 
@@ -759,9 +786,7 @@ void powernv_cpufreq_work_fn(struct work_struct *work)
 		struct cpufreq_policy policy;
 
 		cpufreq_get_policy(&policy, cpu);
-		cpufreq_frequency_table_target(&policy, policy.freq_table,
-					       policy.cur,
-					       CPUFREQ_RELATION_C, &index);
+		index = cpufreq_table_find_index_c(&policy, policy.cur);
 		powernv_cpufreq_target_index(&policy, index);
 		cpumask_andnot(&mask, &mask, policy.cpus);
 	}
@@ -847,8 +872,8 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 	struct powernv_smp_call_data freq_data;
 	struct global_pstate_info *gpstates = policy->driver_data;
 
-	freq_data.pstate_id = powernv_pstate_info.min;
-	freq_data.gpstate_id = powernv_pstate_info.min;
+	freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min);
+	freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min);
 	smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1);
 	del_timer_sync(&gpstates->timer);
 }
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index 7c4cd5c634f2..dc112481a408 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -94,7 +94,7 @@ static int pmi_notifier(struct notifier_block *nb,
 				       unsigned long event, void *data)
 {
 	struct cpufreq_policy *policy = data;
-	struct cpufreq_frequency_table *cbe_freqs;
+	struct cpufreq_frequency_table *cbe_freqs = policy->freq_table;
 	u8 node;
 
 	/* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY
@@ -103,7 +103,6 @@ static int pmi_notifier(struct notifier_block *nb,
 	if (event == CPUFREQ_START)
 		return 0;
 
-	cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
 	node = cbe_cpu_to_node(policy->cpu);
 
 	pr_debug("got notified, event=%lu, node=%u\n", event, node);
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index ae8eaed77b70..7b596fa38ad2 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -293,12 +293,8 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 		     __func__, policy, target_freq, relation);
 
 	if (ftab) {
-		if (cpufreq_frequency_table_target(policy, ftab,
-						   target_freq, relation,
-						   &index)) {
-			s3c_freq_dbg("%s: table failed\n", __func__);
-			return -EINVAL;
-		}
+		index = cpufreq_frequency_table_target(policy, target_freq,
+						       relation);
 
 		s3c_freq_dbg("%s: adjust %d to entry %d (%u)\n", __func__,
 			     target_freq, index, ftab[index].frequency);
@@ -315,7 +311,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 		pll = NULL;
 	} else {
 		struct cpufreq_policy tmp_policy;
-		int ret;
 
 		/* we keep the cpu pll table in Hz, to ensure we get an
 		 * accurate value for the PLL output. */
@@ -323,20 +318,14 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 		tmp_policy.min = policy->min * 1000;
 		tmp_policy.max = policy->max * 1000;
 		tmp_policy.cpu = policy->cpu;
+		tmp_policy.freq_table = pll_reg;
 
-		/* cpufreq_frequency_table_target uses a pointer to 'index'
-		 * which is the number of the table entry, not the value of
+		/* cpufreq_frequency_table_target returns the index
+		 * of the table entry, not the value of
 		 * the table entry's index field. */
 
-		ret = cpufreq_frequency_table_target(&tmp_policy, pll_reg,
-						     target_freq, relation,
-						     &index);
-
-		if (ret < 0) {
-			pr_err("%s: no PLL available\n", __func__);
-			goto err_notpossible;
-		}
-
+		index = cpufreq_frequency_table_target(&tmp_policy, target_freq,
+						       relation);
 		pll = pll_reg + index;
 
 		s3c_freq_dbg("%s: target %u => %u\n",
@@ -346,10 +335,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy,
 	}
 
 	return s3c_cpufreq_settarget(policy, target_freq, pll);
-
- err_notpossible:
-	pr_err("no compatible settings for %d\n", target_freq);
-	return -EINVAL;
 }
 
 struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name)
@@ -571,11 +556,7 @@ static int s3c_cpufreq_build_freq(void)
 {
 	int size, ret;
 
-	if (!cpu_cur.info->calc_freqtable)
-		return -EINVAL;
-
 	kfree(ftab);
-	ftab = NULL;
 
 	size = cpu_cur.info->calc_freqtable(&cpu_cur, NULL, 0);
 	size++;
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 06d85917b6d5..9e07588ea9f5 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -246,12 +246,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
 	new_freq = s5pv210_freq_table[index].frequency;
 
 	/* Finding current running level index */
-	if (cpufreq_frequency_table_target(policy, s5pv210_freq_table,
-					   old_freq, CPUFREQ_RELATION_H,
-					   &priv_index)) {
-		ret = -EINVAL;
-		goto exit;
-	}
+	priv_index = cpufreq_table_find_index_h(policy, old_freq);
 
 	arm_volt = dvs_conf[index].arm_volt;
 	int_volt = dvs_conf[index].int_volt;
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 78dac0e9da11..a5be56ec57f2 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -75,7 +75,7 @@ config DEVFREQ_GOV_PASSIVE
 comment "DEVFREQ Drivers"
 
 config ARM_EXYNOS_BUS_DEVFREQ
-	bool "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
+	tristate "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
 	depends on ARCH_EXYNOS
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	select DEVFREQ_GOV_PASSIVE
diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c
index 39b048eda2ce..9aea2c7ecbe6 100644
--- a/drivers/devfreq/devfreq-event.c
+++ b/drivers/devfreq/devfreq-event.c
@@ -15,7 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/of.h>
@@ -481,13 +481,3 @@ static int __init devfreq_event_init(void)
 	return 0;
 }
 subsys_initcall(devfreq_event_init);
-
-static void __exit devfreq_event_exit(void)
-{
-	class_destroy(devfreq_event_class);
-}
-module_exit(devfreq_event_exit);
-
-MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
-MODULE_DESCRIPTION("DEVFREQ-Event class support");
-MODULE_LICENSE("GPL");
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index e92418facc92..478006b7764a 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -15,7 +15,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/pm_opp.h>
@@ -707,10 +707,12 @@ struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index)
 		if (devfreq->dev.parent
 			&& devfreq->dev.parent->of_node == node) {
 			mutex_unlock(&devfreq_list_lock);
+			of_node_put(node);
 			return devfreq;
 		}
 	}
 	mutex_unlock(&devfreq_list_lock);
+	of_node_put(node);
 
 	return ERR_PTR(-EPROBE_DEFER);
 }
@@ -1199,13 +1201,6 @@ static int __init devfreq_init(void)
 }
 subsys_initcall(devfreq_init);
 
-static void __exit devfreq_exit(void)
-{
-	class_destroy(devfreq_class);
-	destroy_workqueue(devfreq_wq);
-}
-module_exit(devfreq_exit);
-
 /*
  * The followings are helper functions for devfreq user device drivers with
  * OPP framework.
@@ -1471,7 +1466,3 @@ void devm_devfreq_unregister_notifier(struct device *dev,
 			       devm_devfreq_dev_match, devfreq));
 }
 EXPORT_SYMBOL(devm_devfreq_unregister_notifier);
-
-MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
-MODULE_DESCRIPTION("devfreq class support");
-MODULE_LICENSE("GPL");
diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index 1e8b4f469f38..eb6f74a2b6b9 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig
@@ -14,7 +14,7 @@ menuconfig PM_DEVFREQ_EVENT
 if PM_DEVFREQ_EVENT
 
 config DEVFREQ_EVENT_EXYNOS_NOCP
-	bool "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
+	tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
 	depends on ARCH_EXYNOS
 	select PM_OPP
 	help
@@ -22,7 +22,7 @@ config DEVFREQ_EVENT_EXYNOS_NOCP
 	  (Network on Chip) Probe counters to measure the bandwidth of AXI bus.
 
 config DEVFREQ_EVENT_EXYNOS_PPMU
-	bool "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
+	tristate "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
 	depends on ARCH_EXYNOS
 	select PM_OPP
 	help
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index f312485f1451..845bf25fb9fb 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -482,7 +482,8 @@ static int exynos_ppmu_probe(struct platform_device *pdev)
 	if (!info->edev) {
 		dev_err(&pdev->dev,
 			"failed to allocate memory devfreq-event devices\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err;
 	}
 	edev = info->edev;
 	platform_set_drvdata(pdev, info);
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 2363d0a189b7..29866f7e6d7e 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -383,7 +383,7 @@ err_clk:
 static int exynos_bus_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np = dev->of_node;
+	struct device_node *np = dev->of_node, *node;
 	struct devfreq_dev_profile *profile;
 	struct devfreq_simple_ondemand_data *ondemand_data;
 	struct devfreq_passive_data *passive_data;
@@ -407,7 +407,7 @@ static int exynos_bus_probe(struct platform_device *pdev)
 	/* Parse the device-tree to get the resource information */
 	ret = exynos_bus_parse_of(np, bus);
 	if (ret < 0)
-		goto err;
+		return ret;
 
 	profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL);
 	if (!profile) {
@@ -415,10 +415,13 @@ static int exynos_bus_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	if (of_parse_phandle(dev->of_node, "devfreq", 0))
+	node = of_parse_phandle(dev->of_node, "devfreq", 0);
+	if (node) {
+		of_node_put(node);
 		goto passive;
-	else
+	} else {
 		ret = exynos_bus_parent_parse_of(np, bus);
+	}
 
 	if (ret < 0)
 		goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 252edba16e36..892d60fb225b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -421,29 +421,6 @@ static int acp_suspend(void *handle)
 
 static int acp_resume(void *handle)
 {
-	int i, ret;
-	struct acp_pm_domain *apd;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	/* return early if no ACP */
-	if (!adev->acp.acp_genpd)
-		return 0;
-
-	/* SMU block will power on ACP irrespective of ACP runtime status.
-	 * Power off explicitly based on genpd ACP runtime status so that ACP
-	 * hw and ACP-genpd status are in sync.
-	 * 'suspend_power_off' represents "Power status before system suspend"
-	*/
-	if (adev->acp.acp_genpd->gpd.suspend_power_off == true) {
-		apd = container_of(&adev->acp.acp_genpd->gpd,
-					struct acp_pm_domain, gpd);
-
-		for (i = 4; i >= 0 ; i--) {
-			ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
-			if (ret)
-				pr_err("ACP tile %d tile suspend failed\n", i);
-		}
-	}
 	return 0;
 }
 
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b5dd41d13d3d..9b2ef248788d 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -46,8 +46,6 @@
  * to avoid complications with the lapic timer workaround.
  * Have not seen issues with suspend, but may need same workaround here.
  *
- * There is currently no kernel-based automatic probing/loading mechanism
- * if the driver is built as a module.
  */
 
 /* un-comment DEBUG to enable pr_debug() statements */
@@ -60,7 +58,7 @@
 #include <linux/sched.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
 #include <asm/mwait.h>
@@ -828,6 +826,35 @@ static struct cpuidle_state bxt_cstates[] = {
 		.enter = NULL }
 };
 
+static struct cpuidle_state dnv_cstates[] = {
+	{
+		.name = "C1-DNV",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00),
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C1E-DNV",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01),
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.name = "C6-DNV",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 50,
+		.target_residency = 500,
+		.enter = &intel_idle,
+		.enter_freeze = intel_idle_freeze, },
+	{
+		.enter = NULL }
+};
+
 /**
  * intel_idle
  * @dev: cpuidle_device
@@ -1017,6 +1044,11 @@ static const struct idle_cpu idle_cpu_bxt = {
 	.disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_dnv = {
+	.state_table = dnv_cstates,
+	.disable_promotion_to_c1e = true,
+};
+
 #define ICPU(model, cpu) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -1053,9 +1085,9 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 	ICPU(INTEL_FAM6_SKYLAKE_X,		idle_cpu_skx),
 	ICPU(INTEL_FAM6_XEON_PHI_KNL,		idle_cpu_knl),
 	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		idle_cpu_bxt),
+	ICPU(INTEL_FAM6_ATOM_DENVERTON,		idle_cpu_dnv),
 	{}
 };
-MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
 
 /*
  * intel_idle_probe()
@@ -1155,7 +1187,10 @@ static unsigned long long irtl_2_usec(unsigned long long irtl)
 {
 	unsigned long long ns;
 
-	ns = irtl_ns_units[(irtl >> 10) & 0x3];
+	if (!irtl)
+		return 0;
+
+	ns = irtl_ns_units[(irtl >> 10) & 0x7];
 
 	return div64_u64((irtl & 0x3FF) * ns, 1000);
 }
@@ -1168,43 +1203,39 @@ static unsigned long long irtl_2_usec(unsigned long long irtl)
 static void bxt_idle_state_table_update(void)
 {
 	unsigned long long msr;
+	unsigned int usec;
 
 	rdmsrl(MSR_PKGC6_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[2].exit_latency = usec;
 		bxt_cstates[2].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC7_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[3].exit_latency = usec;
 		bxt_cstates[3].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC8_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[4].exit_latency = usec;
 		bxt_cstates[4].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC9_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[5].exit_latency = usec;
 		bxt_cstates[5].target_residency = usec;
 	}
 
 	rdmsrl(MSR_PKGC10_IRTL, msr);
-	if (msr) {
-		unsigned int usec = irtl_2_usec(msr);
-
+	usec = irtl_2_usec(msr);
+	if (usec) {
 		bxt_cstates[6].exit_latency = usec;
 		bxt_cstates[6].target_residency = usec;
 	}
@@ -1416,34 +1447,12 @@ static int __init intel_idle_init(void)
 
 	return 0;
 }
+device_initcall(intel_idle_init);
 
-static void __exit intel_idle_exit(void)
-{
-	struct cpuidle_device *dev;
-	int i;
-
-	cpu_notifier_register_begin();
-
-	if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE)
-		on_each_cpu(__setup_broadcast_timer, (void *)false, 1);
-	__unregister_cpu_notifier(&cpu_hotplug_notifier);
-
-	for_each_possible_cpu(i) {
-		dev = per_cpu_ptr(intel_idle_cpuidle_devices, i);
-		cpuidle_unregister_device(dev);
-	}
-
-	cpu_notifier_register_done();
-
-	cpuidle_unregister_driver(&intel_idle_driver);
-	free_percpu(intel_idle_cpuidle_devices);
-}
-
-module_init(intel_idle_init);
-module_exit(intel_idle_exit);
-
+/*
+ * We are not really modular, but we used to support that.  Meaning we also
+ * support "intel_idle.max_cstate=..." at boot and also a read-only export of
+ * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param
+ * is the easiest way (currently) to continue doing that.
+ */
 module_param(max_cstate, int, 0444);
-
-MODULE_AUTHOR("Len Brown <len.brown@intel.com>");
-MODULE_DESCRIPTION("Cpuidle driver for Intel Hardware v" INTEL_IDLE_VERSION);
-MODULE_LICENSE("GPL");
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c8b4dbdd1bdd..badbddc683f0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -530,8 +530,8 @@ static const struct pci_platform_pm_ops *pci_platform_pm;
 
 int pci_set_platform_pm(const struct pci_platform_pm_ops *ops)
 {
-	if (!ops->is_manageable || !ops->set_state || !ops->choose_state
-	    || !ops->sleep_wake)
+	if (!ops->is_manageable || !ops->set_state || !ops->choose_state ||
+	    !ops->sleep_wake || !ops->run_wake || !ops->need_resume)
 		return -EINVAL;
 	pci_platform_pm = ops;
 	return 0;
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 2e8f2be5b6f9..fbab29dfa793 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -336,14 +336,14 @@ static int release_zone(struct powercap_zone *power_zone)
 
 static int find_nr_power_limit(struct rapl_domain *rd)
 {
-	int i;
+	int i, nr_pl = 0;
 
 	for (i = 0; i < NR_POWER_LIMITS; i++) {
-		if (rd->rpl[i].name == NULL)
-			break;
+		if (rd->rpl[i].name)
+			nr_pl++;
 	}
 
-	return i;
+	return nr_pl;
 }
 
 static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
@@ -426,15 +426,38 @@ static const struct powercap_zone_ops zone_ops[] = {
 	},
 };
 
-static int set_power_limit(struct powercap_zone *power_zone, int id,
+
+/*
+ * Constraint index used by powercap can be different than power limit (PL)
+ * index in that some  PLs maybe missing due to non-existant MSRs. So we
+ * need to convert here by finding the valid PLs only (name populated).
+ */
+static int contraint_to_pl(struct rapl_domain *rd, int cid)
+{
+	int i, j;
+
+	for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) {
+		if ((rd->rpl[i].name) && j++ == cid) {
+			pr_debug("%s: index %d\n", __func__, i);
+			return i;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int set_power_limit(struct powercap_zone *power_zone, int cid,
 			u64 power_limit)
 {
 	struct rapl_domain *rd;
 	struct rapl_package *rp;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
+
 	rp = rd->rp;
 
 	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
@@ -461,16 +484,18 @@ set_exit:
 	return ret;
 }
 
-static int get_current_power_limit(struct powercap_zone *power_zone, int id,
+static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
 					u64 *data)
 {
 	struct rapl_domain *rd;
 	u64 val;
 	int prim;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
 	switch (rd->rpl[id].prim_id) {
 	case PL1_ENABLE:
 		prim = POWER_LIMIT1;
@@ -492,14 +517,17 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int id,
 	return ret;
 }
 
-static int set_time_window(struct powercap_zone *power_zone, int id,
+static int set_time_window(struct powercap_zone *power_zone, int cid,
 								u64 window)
 {
 	struct rapl_domain *rd;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
+
 	switch (rd->rpl[id].prim_id) {
 	case PL1_ENABLE:
 		rapl_write_data_raw(rd, TIME_WINDOW1, window);
@@ -514,14 +542,17 @@ static int set_time_window(struct powercap_zone *power_zone, int id,
 	return ret;
 }
 
-static int get_time_window(struct powercap_zone *power_zone, int id, u64 *data)
+static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data)
 {
 	struct rapl_domain *rd;
 	u64 val;
 	int ret = 0;
+	int id;
 
 	get_online_cpus();
 	rd = power_zone_to_rapl_domain(power_zone);
+	id = contraint_to_pl(rd, cid);
+
 	switch (rd->rpl[id].prim_id) {
 	case PL1_ENABLE:
 		ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val);
@@ -540,15 +571,17 @@ static int get_time_window(struct powercap_zone *power_zone, int id, u64 *data)
 	return ret;
 }
 
-static const char *get_constraint_name(struct powercap_zone *power_zone, int id)
+static const char *get_constraint_name(struct powercap_zone *power_zone, int cid)
 {
-	struct rapl_power_limit *rpl;
 	struct rapl_domain *rd;
+	int id;
 
 	rd = power_zone_to_rapl_domain(power_zone);
-	rpl = (struct rapl_power_limit *) &rd->rpl[id];
+	id = contraint_to_pl(rd, cid);
+	if (id >= 0)
+		return rd->rpl[id].name;
 
-	return rpl->name;
+	return NULL;
 }
 
 
@@ -1101,6 +1134,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
 	RAPL_CPU(INTEL_FAM6_SANDYBRIDGE_X,	rapl_defaults_core),
 
 	RAPL_CPU(INTEL_FAM6_IVYBRIDGE,		rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_IVYBRIDGE_X,	rapl_defaults_core),
 
 	RAPL_CPU(INTEL_FAM6_HASWELL_CORE,	rapl_defaults_core),
 	RAPL_CPU(INTEL_FAM6_HASWELL_ULT,	rapl_defaults_core),
@@ -1123,6 +1157,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
 	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD1,	rapl_defaults_tng),
 	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD2,	rapl_defaults_ann),
 	RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT,	rapl_defaults_core),
+	RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON,	rapl_defaults_core),
 
 	RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL,	rapl_defaults_hsw_server),
 	{}
@@ -1381,6 +1416,37 @@ static int rapl_check_domain(int cpu, int domain)
 	return 0;
 }
 
+
+/*
+ * Check if power limits are available. Two cases when they are not available:
+ * 1. Locked by BIOS, in this case we still provide read-only access so that
+ *    users can see what limit is set by the BIOS.
+ * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
+ *    exist at all. In this case, we do not show the contraints in powercap.
+ *
+ * Called after domains are detected and initialized.
+ */
+static void rapl_detect_powerlimit(struct rapl_domain *rd)
+{
+	u64 val64;
+	int i;
+
+	/* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
+	if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
+		if (val64) {
+			pr_info("RAPL package %d domain %s locked by BIOS\n",
+				rd->rp->id, rd->name);
+			rd->state |= DOMAIN_STATE_BIOS_LOCKED;
+		}
+	}
+	/* check if power limit MSRs exists, otherwise domain is monitoring only */
+	for (i = 0; i < NR_POWER_LIMITS; i++) {
+		int prim = rd->rpl[i].prim_id;
+		if (rapl_read_data_raw(rd, prim, false, &val64))
+			rd->rpl[i].name = NULL;
+	}
+}
+
 /* Detect active and valid domains for the given CPU, caller must
  * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
  */
@@ -1389,7 +1455,6 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
 	int i;
 	int ret = 0;
 	struct rapl_domain *rd;
-	u64 locked;
 
 	for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
 		/* use physical package id to read counters */
@@ -1400,7 +1465,7 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
 	}
 	rp->nr_domains = bitmap_weight(&rp->domain_map,	RAPL_DOMAIN_MAX);
 	if (!rp->nr_domains) {
-		pr_err("no valid rapl domains found in package %d\n", rp->id);
+		pr_debug("no valid rapl domains found in package %d\n", rp->id);
 		ret = -ENODEV;
 		goto done;
 	}
@@ -1414,17 +1479,9 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
 	}
 	rapl_init_domains(rp);
 
-	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
-		/* check if the domain is locked by BIOS */
-		ret = rapl_read_data_raw(rd, FW_LOCK, false, &locked);
-		if (ret)
-			return ret;
-		if (locked) {
-			pr_info("RAPL package %d domain %s locked by BIOS\n",
-				rp->id, rd->name);
-			rd->state |= DOMAIN_STATE_BIOS_LOCKED;
-		}
-	}
+	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++)
+		rapl_detect_powerlimit(rd);
+
 
 
 done:
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 5b4b47ed948b..3788ed74c9ab 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -787,22 +787,34 @@ __cpufreq_cooling_register(struct device_node *np,
 			const struct cpumask *clip_cpus, u32 capacitance,
 			get_static_t plat_static_func)
 {
+	struct cpufreq_policy *policy;
 	struct thermal_cooling_device *cool_dev;
 	struct cpufreq_cooling_device *cpufreq_dev;
 	char dev_name[THERMAL_NAME_LENGTH];
 	struct cpufreq_frequency_table *pos, *table;
+	struct cpumask temp_mask;
 	unsigned int freq, i, num_cpus;
 	int ret;
 
-	table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
+	cpumask_and(&temp_mask, clip_cpus, cpu_online_mask);
+	policy = cpufreq_cpu_get(cpumask_first(&temp_mask));
+	if (!policy) {
+		pr_debug("%s: CPUFreq policy not found\n", __func__);
+		return ERR_PTR(-EPROBE_DEFER);
+	}
+
+	table = policy->freq_table;
 	if (!table) {
 		pr_debug("%s: CPUFreq table not found\n", __func__);
-		return ERR_PTR(-EPROBE_DEFER);
+		cool_dev = ERR_PTR(-ENODEV);
+		goto put_policy;
 	}
 
 	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
-	if (!cpufreq_dev)
-		return ERR_PTR(-ENOMEM);
+	if (!cpufreq_dev) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto put_policy;
+	}
 
 	num_cpus = cpumask_weight(clip_cpus);
 	cpufreq_dev->time_in_idle = kcalloc(num_cpus,
@@ -892,7 +904,7 @@ __cpufreq_cooling_register(struct device_node *np,
 					  CPUFREQ_POLICY_NOTIFIER);
 	mutex_unlock(&cooling_cpufreq_lock);
 
-	return cool_dev;
+	goto put_policy;
 
 remove_idr:
 	release_idr(&cpufreq_idr, cpufreq_dev->id);
@@ -906,6 +918,8 @@ free_time_in_idle:
 	kfree(cpufreq_dev->time_in_idle);
 free_cdev:
 	kfree(cpufreq_dev);
+put_policy:
+	cpufreq_cpu_put(policy);
 
 	return cool_dev;
 }
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 4e81e08db752..631ba33bbe9f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -36,6 +36,12 @@
 
 struct cpufreq_governor;
 
+enum cpufreq_table_sorting {
+	CPUFREQ_TABLE_UNSORTED,
+	CPUFREQ_TABLE_SORTED_ASCENDING,
+	CPUFREQ_TABLE_SORTED_DESCENDING
+};
+
 struct cpufreq_freqs {
 	unsigned int cpu;	/* cpu nr */
 	unsigned int old;
@@ -87,6 +93,7 @@ struct cpufreq_policy {
 
 	struct cpufreq_user_policy user_policy;
 	struct cpufreq_frequency_table	*freq_table;
+	enum cpufreq_table_sorting freq_table_sorted;
 
 	struct list_head        policy_list;
 	struct kobject		kobj;
@@ -113,6 +120,10 @@ struct cpufreq_policy {
 	bool			fast_switch_possible;
 	bool			fast_switch_enabled;
 
+	 /* Cached frequency lookup from cpufreq_driver_resolve_freq. */
+	unsigned int cached_target_freq;
+	int cached_resolved_idx;
+
 	/* Synchronization for frequency transitions */
 	bool			transition_ongoing; /* Tracks transition status */
 	spinlock_t		transition_lock;
@@ -185,6 +196,18 @@ static inline unsigned int cpufreq_quick_get_max(unsigned int cpu)
 static inline void disable_cpufreq(void) { }
 #endif
 
+#ifdef CONFIG_CPU_FREQ_STAT
+void cpufreq_stats_create_table(struct cpufreq_policy *policy);
+void cpufreq_stats_free_table(struct cpufreq_policy *policy);
+void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
+				     unsigned int new_freq);
+#else
+static inline void cpufreq_stats_create_table(struct cpufreq_policy *policy) { }
+static inline void cpufreq_stats_free_table(struct cpufreq_policy *policy) { }
+static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy,
+						   unsigned int new_freq) { }
+#endif /* CONFIG_CPU_FREQ_STAT */
+
 /*********************************************************************
  *                      CPUFREQ DRIVER INTERFACE                     *
  *********************************************************************/
@@ -251,6 +274,16 @@ struct cpufreq_driver {
 					unsigned int index);
 	unsigned int	(*fast_switch)(struct cpufreq_policy *policy,
 				       unsigned int target_freq);
+
+	/*
+	 * Caches and returns the lowest driver-supported frequency greater than
+	 * or equal to the target frequency, subject to any driver limitations.
+	 * Does not set the frequency. Only to be implemented for drivers with
+	 * target().
+	 */
+	unsigned int	(*resolve_freq)(struct cpufreq_policy *policy,
+					unsigned int target_freq);
+
 	/*
 	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
 	 * unset.
@@ -455,18 +488,13 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
 #define MIN_LATENCY_MULTIPLIER		(20)
 #define TRANSITION_LATENCY_LIMIT	(10 * 1000 * 1000)
 
-/* Governor Events */
-#define CPUFREQ_GOV_START	1
-#define CPUFREQ_GOV_STOP	2
-#define CPUFREQ_GOV_LIMITS	3
-#define CPUFREQ_GOV_POLICY_INIT	4
-#define CPUFREQ_GOV_POLICY_EXIT	5
-
 struct cpufreq_governor {
 	char	name[CPUFREQ_NAME_LEN];
-	int	initialized;
-	int	(*governor)	(struct cpufreq_policy *policy,
-				 unsigned int event);
+	int	(*init)(struct cpufreq_policy *policy);
+	void	(*exit)(struct cpufreq_policy *policy);
+	int	(*start)(struct cpufreq_policy *policy);
+	void	(*stop)(struct cpufreq_policy *policy);
+	void	(*limits)(struct cpufreq_policy *policy);
 	ssize_t	(*show_setspeed)	(struct cpufreq_policy *policy,
 					 char *buf);
 	int	(*store_setspeed)	(struct cpufreq_policy *policy,
@@ -487,12 +515,22 @@ int cpufreq_driver_target(struct cpufreq_policy *policy,
 int __cpufreq_driver_target(struct cpufreq_policy *policy,
 				   unsigned int target_freq,
 				   unsigned int relation);
+unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
+					 unsigned int target_freq);
 int cpufreq_register_governor(struct cpufreq_governor *governor);
 void cpufreq_unregister_governor(struct cpufreq_governor *governor);
 
 struct cpufreq_governor *cpufreq_default_governor(void);
 struct cpufreq_governor *cpufreq_fallback_governor(void);
 
+static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy)
+{
+	if (policy->max < policy->cur)
+		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+	else if (policy->min > policy->cur)
+		__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
+}
+
 /* Governor attribute set */
 struct gov_attr_set {
 	struct kobject kobj;
@@ -582,11 +620,9 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
 				   struct cpufreq_frequency_table *table);
 int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy);
 
-int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-				   struct cpufreq_frequency_table *table,
-				   unsigned int target_freq,
-				   unsigned int relation,
-				   unsigned int *index);
+int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
+				 unsigned int target_freq,
+				 unsigned int relation);
 int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy,
 		unsigned int freq);
 
@@ -597,6 +633,227 @@ int cpufreq_boost_trigger_state(int state);
 int cpufreq_boost_enabled(void);
 int cpufreq_enable_boost_support(void);
 bool policy_has_boost_freq(struct cpufreq_policy *policy);
+
+/* Find lowest freq at or above target in a table in ascending order */
+static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq >= target_freq)
+			return i;
+
+		best = i;
+	}
+
+	return best;
+}
+
+/* Find lowest freq at or above target in a table in descending order */
+static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq > target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found above target_freq */
+		if (best == -1)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy,
+					     unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
+		return cpufreq_table_find_index_al(policy, target_freq);
+	else
+		return cpufreq_table_find_index_dl(policy, target_freq);
+}
+
+/* Find highest freq at or below target in a table in ascending order */
+static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq < target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found below target_freq */
+		if (best == -1)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Find highest freq at or below target in a table in descending order */
+static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq <= target_freq)
+			return i;
+
+		best = i;
+	}
+
+	return best;
+}
+
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy,
+					     unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
+		return cpufreq_table_find_index_ah(policy, target_freq);
+	else
+		return cpufreq_table_find_index_dh(policy, target_freq);
+}
+
+/* Find closest freq to target in a table in ascending order */
+static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq < target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found below target_freq */
+		if (best == -1)
+			return i;
+
+		/* Choose the closest freq */
+		if (target_freq - table[best].frequency > freq - target_freq)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Find closest freq to target in a table in descending order */
+static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy,
+					      unsigned int target_freq)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int freq;
+	int i, best = -1;
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		freq = table[i].frequency;
+
+		if (freq == target_freq)
+			return i;
+
+		if (freq > target_freq) {
+			best = i;
+			continue;
+		}
+
+		/* No freq found above target_freq */
+		if (best == -1)
+			return i;
+
+		/* Choose the closest freq */
+		if (table[best].frequency - target_freq > target_freq - freq)
+			return i;
+
+		return best;
+	}
+
+	return best;
+}
+
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
+					     unsigned int target_freq)
+{
+	target_freq = clamp_val(target_freq, policy->min, policy->max);
+
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
+		return cpufreq_table_find_index_ac(policy, target_freq);
+	else
+		return cpufreq_table_find_index_dc(policy, target_freq);
+}
+
+static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
+						 unsigned int target_freq,
+						 unsigned int relation)
+{
+	if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))
+		return cpufreq_table_index_unsorted(policy, target_freq,
+						    relation);
+
+	switch (relation) {
+	case CPUFREQ_RELATION_L:
+		return cpufreq_table_find_index_l(policy, target_freq);
+	case CPUFREQ_RELATION_H:
+		return cpufreq_table_find_index_h(policy, target_freq);
+	case CPUFREQ_RELATION_C:
+		return cpufreq_table_find_index_c(policy, target_freq);
+	default:
+		pr_err("%s: Invalid relation: %d\n", __func__, relation);
+		return -EINVAL;
+	}
+}
 #else
 static inline int cpufreq_boost_trigger_state(int state)
 {
@@ -617,8 +874,6 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
 	return false;
 }
 #endif
-/* the following funtion is for cpufreq core use only */
-struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu);
 
 /* the following are really really optional */
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 308d6044f153..09779b0ae720 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -42,6 +42,7 @@ extern int pm_clk_create(struct device *dev);
 extern void pm_clk_destroy(struct device *dev);
 extern int pm_clk_add(struct device *dev, const char *con_id);
 extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
+extern int of_pm_clk_add_clk(struct device *dev, const char *name);
 extern int of_pm_clk_add_clks(struct device *dev);
 extern void pm_clk_remove(struct device *dev, const char *con_id);
 extern void pm_clk_remove_clk(struct device *dev, struct clk *clk);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 39285c7bd3f5..31fec858088c 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -57,7 +57,6 @@ struct generic_pm_domain {
 	unsigned int device_count;	/* Number of devices */
 	unsigned int suspended_count;	/* System suspend device counter */
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
-	bool suspend_power_off;	/* Power status before system suspend */
 	int (*power_off)(struct generic_pm_domain *domain);
 	int (*power_on)(struct generic_pm_domain *domain);
 	struct gpd_dev_ops dev_ops;
@@ -128,8 +127,8 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
 				  struct generic_pm_domain *new_subdomain);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
-extern void pm_genpd_init(struct generic_pm_domain *genpd,
-			  struct dev_power_governor *gov, bool is_off);
+extern int pm_genpd_init(struct generic_pm_domain *genpd,
+			 struct dev_power_governor *gov, bool is_off);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -164,9 +163,10 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline void pm_genpd_init(struct generic_pm_domain *genpd,
-				 struct dev_power_governor *gov, bool is_off)
+static inline int pm_genpd_init(struct generic_pm_domain *genpd,
+				struct dev_power_governor *gov, bool is_off)
 {
+	return -ENOSYS;
 }
 #endif
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 8b6ec7ef0854..7693e39b14fe 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -18,12 +18,11 @@ static inline void pm_set_vt_switch(int do_switch)
 #endif
 
 #ifdef CONFIG_VT_CONSOLE_SLEEP
-extern int pm_prepare_console(void);
+extern void pm_prepare_console(void);
 extern void pm_restore_console(void);
 #else
-static inline int pm_prepare_console(void)
+static inline void pm_prepare_console(void)
 {
-	return 0;
 }
 
 static inline void pm_restore_console(void)
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index cb880a14cc39..eb4f717705ba 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -1,6 +1,8 @@
 
 ccflags-$(CONFIG_PM_DEBUG)	:= -DDEBUG
 
+KASAN_SANITIZE_snapshot.o	:= n
+
 obj-y				+= qos.o
 obj-$(CONFIG_PM)		+= main.o
 obj-$(CONFIG_VT_CONSOLE_SLEEP)	+= console.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index aba9c545a0e3..0e781798b0b3 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -126,17 +126,17 @@ out:
 	return ret;
 }
 
-int pm_prepare_console(void)
+void pm_prepare_console(void)
 {
 	if (!pm_vt_switch())
-		return 0;
+		return;
 
 	orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
 	if (orig_fgconsole < 0)
-		return 1;
+		return;
 
 	orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE);
-	return 0;
+	return;
 }
 
 void pm_restore_console(void)
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 9021387c6ff4..a881c6a7ba74 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -52,6 +52,7 @@ enum {
 #ifdef CONFIG_SUSPEND
 	HIBERNATION_SUSPEND,
 #endif
+	HIBERNATION_TEST_RESUME,
 	/* keep last */
 	__HIBERNATION_AFTER_LAST
 };
@@ -409,6 +410,11 @@ int hibernation_snapshot(int platform_mode)
 	goto Close;
 }
 
+int __weak hibernate_resume_nonboot_cpu_disable(void)
+{
+	return disable_nonboot_cpus();
+}
+
 /**
  * resume_target_kernel - Restore system state from a hibernation image.
  * @platform_mode: Whether or not to use the platform driver.
@@ -433,7 +439,7 @@ static int resume_target_kernel(bool platform_mode)
 	if (error)
 		goto Cleanup;
 
-	error = disable_nonboot_cpus();
+	error = hibernate_resume_nonboot_cpu_disable();
 	if (error)
 		goto Enable_cpus;
 
@@ -642,12 +648,39 @@ static void power_down(void)
 		cpu_relax();
 }
 
+static int load_image_and_restore(void)
+{
+	int error;
+	unsigned int flags;
+
+	pr_debug("PM: Loading hibernation image.\n");
+
+	lock_device_hotplug();
+	error = create_basic_memory_bitmaps();
+	if (error)
+		goto Unlock;
+
+	error = swsusp_read(&flags);
+	swsusp_close(FMODE_READ);
+	if (!error)
+		hibernation_restore(flags & SF_PLATFORM_MODE);
+
+	printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
+	swsusp_free();
+	free_basic_memory_bitmaps();
+ Unlock:
+	unlock_device_hotplug();
+
+	return error;
+}
+
 /**
  * hibernate - Carry out system hibernation, including saving the image.
  */
 int hibernate(void)
 {
-	int error;
+	int error, nr_calls = 0;
+	bool snapshot_test = false;
 
 	if (!hibernation_available()) {
 		pr_debug("PM: Hibernation not available.\n");
@@ -662,9 +695,11 @@ int hibernate(void)
 	}
 
 	pm_prepare_console();
-	error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
-	if (error)
+	error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
+	if (error) {
+		nr_calls--;
 		goto Exit;
+	}
 
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
@@ -697,8 +732,12 @@ int hibernate(void)
 		pr_debug("PM: writing image.\n");
 		error = swsusp_write(flags);
 		swsusp_free();
-		if (!error)
-			power_down();
+		if (!error) {
+			if (hibernation_mode == HIBERNATION_TEST_RESUME)
+				snapshot_test = true;
+			else
+				power_down();
+		}
 		in_suspend = 0;
 		pm_restore_gfp_mask();
 	} else {
@@ -709,12 +748,18 @@ int hibernate(void)
 	free_basic_memory_bitmaps();
  Thaw:
 	unlock_device_hotplug();
+	if (snapshot_test) {
+		pr_debug("PM: Checking hibernation image\n");
+		error = swsusp_check();
+		if (!error)
+			error = load_image_and_restore();
+	}
 	thaw_processes();
 
 	/* Don't bother checking whether freezer_test_done is true */
 	freezer_test_done = false;
  Exit:
-	pm_notifier_call_chain(PM_POST_HIBERNATION);
+	__pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL);
 	pm_restore_console();
 	atomic_inc(&snapshot_device_available);
  Unlock:
@@ -740,8 +785,7 @@ int hibernate(void)
  */
 static int software_resume(void)
 {
-	int error;
-	unsigned int flags;
+	int error, nr_calls = 0;
 
 	/*
 	 * If the user said "noresume".. bail out early.
@@ -827,35 +871,20 @@ static int software_resume(void)
 	}
 
 	pm_prepare_console();
-	error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
-	if (error)
+	error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
+	if (error) {
+		nr_calls--;
 		goto Close_Finish;
+	}
 
 	pr_debug("PM: Preparing processes for restore.\n");
 	error = freeze_processes();
 	if (error)
 		goto Close_Finish;
-
-	pr_debug("PM: Loading hibernation image.\n");
-
-	lock_device_hotplug();
-	error = create_basic_memory_bitmaps();
-	if (error)
-		goto Thaw;
-
-	error = swsusp_read(&flags);
-	swsusp_close(FMODE_READ);
-	if (!error)
-		hibernation_restore(flags & SF_PLATFORM_MODE);
-
-	printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
-	swsusp_free();
-	free_basic_memory_bitmaps();
- Thaw:
-	unlock_device_hotplug();
+	error = load_image_and_restore();
 	thaw_processes();
  Finish:
-	pm_notifier_call_chain(PM_POST_RESTORE);
+	__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
 	pm_restore_console();
 	atomic_inc(&snapshot_device_available);
 	/* For success case, the suspend path will release the lock */
@@ -878,6 +907,7 @@ static const char * const hibernation_modes[] = {
 #ifdef CONFIG_SUSPEND
 	[HIBERNATION_SUSPEND]	= "suspend",
 #endif
+	[HIBERNATION_TEST_RESUME]	= "test_resume",
 };
 
 /*
@@ -924,6 +954,7 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
 #ifdef CONFIG_SUSPEND
 		case HIBERNATION_SUSPEND:
 #endif
+		case HIBERNATION_TEST_RESUME:
 			break;
 		case HIBERNATION_PLATFORM:
 			if (hibernation_ops)
@@ -970,6 +1001,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 #ifdef CONFIG_SUSPEND
 		case HIBERNATION_SUSPEND:
 #endif
+		case HIBERNATION_TEST_RESUME:
 			hibernation_mode = mode;
 			break;
 		case HIBERNATION_PLATFORM:
@@ -1115,13 +1147,16 @@ static int __init resume_offset_setup(char *str)
 
 static int __init hibernate_setup(char *str)
 {
-	if (!strncmp(str, "noresume", 8))
+	if (!strncmp(str, "noresume", 8)) {
 		noresume = 1;
-	else if (!strncmp(str, "nocompress", 10))
+	} else if (!strncmp(str, "nocompress", 10)) {
 		nocompress = 1;
-	else if (!strncmp(str, "no", 2)) {
+	} else if (!strncmp(str, "no", 2)) {
 		noresume = 1;
 		nohibernate = 1;
+	} else if (IS_ENABLED(CONFIG_DEBUG_RODATA)
+		   && !strncmp(str, "protect_image", 13)) {
+		enable_restore_image_protection();
 	}
 	return 1;
 }
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 27946975eff0..5ea50b1b7595 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -38,12 +38,19 @@ int unregister_pm_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(unregister_pm_notifier);
 
-int pm_notifier_call_chain(unsigned long val)
+int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls)
 {
-	int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL);
+	int ret;
+
+	ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL,
+						nr_to_call, nr_calls);
 
 	return notifier_to_errno(ret);
 }
+int pm_notifier_call_chain(unsigned long val)
+{
+	return __pm_notifier_call_chain(val, -1, NULL);
+}
 
 /* If set, devices may be suspended and resumed asynchronously. */
 int pm_async_enabled = 1;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index efe1b3b17c88..242d8b827dd5 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -38,6 +38,8 @@ static inline char *check_image_kernel(struct swsusp_info *info)
 }
 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
 
+extern int hibernate_resume_nonboot_cpu_disable(void);
+
 /*
  * Keep some memory free so that I/O operations can succeed without paging
  * [Might this be more than 4 MB?]
@@ -59,6 +61,13 @@ extern int hibernation_snapshot(int platform_mode);
 extern int hibernation_restore(int platform_mode);
 extern int hibernation_platform_enter(void);
 
+#ifdef CONFIG_DEBUG_RODATA
+/* kernel/power/snapshot.c */
+extern void enable_restore_image_protection(void);
+#else
+static inline void enable_restore_image_protection(void) {}
+#endif /* CONFIG_DEBUG_RODATA */
+
 #else /* !CONFIG_HIBERNATION */
 
 static inline void hibernate_reserved_size_init(void) {}
@@ -200,6 +209,8 @@ static inline void suspend_test_finish(const char *label) {}
 
 #ifdef CONFIG_PM_SLEEP
 /* kernel/power/main.c */
+extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call,
+				    int *nr_calls);
 extern int pm_notifier_call_chain(unsigned long val);
 #endif
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0c2ee9761d57..8f27d5a8adf6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -89,6 +89,9 @@ static int try_to_freeze_tasks(bool user_only)
 		       elapsed_msecs / 1000, elapsed_msecs % 1000,
 		       todo - wq_busy, wq_busy);
 
+		if (wq_busy)
+			show_workqueue_state();
+
 		if (!wakeup) {
 			read_lock(&tasklist_lock);
 			for_each_process_thread(g, p) {
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3a970604308f..d90df926b59f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -38,6 +38,43 @@
 
 #include "power.h"
 
+#ifdef CONFIG_DEBUG_RODATA
+static bool hibernate_restore_protection;
+static bool hibernate_restore_protection_active;
+
+void enable_restore_image_protection(void)
+{
+	hibernate_restore_protection = true;
+}
+
+static inline void hibernate_restore_protection_begin(void)
+{
+	hibernate_restore_protection_active = hibernate_restore_protection;
+}
+
+static inline void hibernate_restore_protection_end(void)
+{
+	hibernate_restore_protection_active = false;
+}
+
+static inline void hibernate_restore_protect_page(void *page_address)
+{
+	if (hibernate_restore_protection_active)
+		set_memory_ro((unsigned long)page_address, 1);
+}
+
+static inline void hibernate_restore_unprotect_page(void *page_address)
+{
+	if (hibernate_restore_protection_active)
+		set_memory_rw((unsigned long)page_address, 1);
+}
+#else
+static inline void hibernate_restore_protection_begin(void) {}
+static inline void hibernate_restore_protection_end(void) {}
+static inline void hibernate_restore_protect_page(void *page_address) {}
+static inline void hibernate_restore_unprotect_page(void *page_address) {}
+#endif /* CONFIG_DEBUG_RODATA */
+
 static int swsusp_page_is_free(struct page *);
 static void swsusp_set_page_forbidden(struct page *);
 static void swsusp_unset_page_forbidden(struct page *);
@@ -67,25 +104,32 @@ void __init hibernate_image_size_init(void)
 	image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
 }
 
-/* List of PBEs needed for restoring the pages that were allocated before
+/*
+ * List of PBEs needed for restoring the pages that were allocated before
  * the suspend and included in the suspend image, but have also been
  * allocated by the "resume" kernel, so their contents cannot be written
  * directly to their "original" page frames.
  */
 struct pbe *restore_pblist;
 
-/* Pointer to an auxiliary buffer (1 page) */
-static void *buffer;
+/* struct linked_page is used to build chains of pages */
 
-/**
- *	@safe_needed - on resume, for storing the PBE list and the image,
- *	we can only use memory pages that do not conflict with the pages
- *	used before suspend.  The unsafe pages have PageNosaveFree set
- *	and we count them using unsafe_pages.
- *
- *	Each allocated image page is marked as PageNosave and PageNosaveFree
- *	so that swsusp_free() can release it.
+#define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
+
+struct linked_page {
+	struct linked_page *next;
+	char data[LINKED_PAGE_DATA_SIZE];
+} __packed;
+
+/*
+ * List of "safe" pages (ie. pages that were not used by the image kernel
+ * before hibernation) that may be used as temporary storage for image kernel
+ * memory contents.
  */
+static struct linked_page *safe_pages_list;
+
+/* Pointer to an auxiliary buffer (1 page) */
+static void *buffer;
 
 #define PG_ANY		0
 #define PG_SAFE		1
@@ -94,6 +138,19 @@ static void *buffer;
 
 static unsigned int allocated_unsafe_pages;
 
+/**
+ * get_image_page - Allocate a page for a hibernation image.
+ * @gfp_mask: GFP mask for the allocation.
+ * @safe_needed: Get pages that were not used before hibernation (restore only)
+ *
+ * During image restoration, for storing the PBE list and the image data, we can
+ * only use memory pages that do not conflict with the pages used before
+ * hibernation.  The "unsafe" pages have PageNosaveFree set and we count them
+ * using allocated_unsafe_pages.
+ *
+ * Each allocated image page is marked as PageNosave and PageNosaveFree so that
+ * swsusp_free() can release it.
+ */
 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 {
 	void *res;
@@ -113,9 +170,21 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed)
 	return res;
 }
 
+static void *__get_safe_page(gfp_t gfp_mask)
+{
+	if (safe_pages_list) {
+		void *ret = safe_pages_list;
+
+		safe_pages_list = safe_pages_list->next;
+		memset(ret, 0, PAGE_SIZE);
+		return ret;
+	}
+	return get_image_page(gfp_mask, PG_SAFE);
+}
+
 unsigned long get_safe_page(gfp_t gfp_mask)
 {
-	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
+	return (unsigned long)__get_safe_page(gfp_mask);
 }
 
 static struct page *alloc_image_page(gfp_t gfp_mask)
@@ -130,11 +199,22 @@ static struct page *alloc_image_page(gfp_t gfp_mask)
 	return page;
 }
 
+static void recycle_safe_page(void *page_address)
+{
+	struct linked_page *lp = page_address;
+
+	lp->next = safe_pages_list;
+	safe_pages_list = lp;
+}
+
 /**
- *	free_image_page - free page represented by @addr, allocated with
- *	get_image_page (page flags set by it must be cleared)
+ * free_image_page - Free a page allocated for hibernation image.
+ * @addr: Address of the page to free.
+ * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page.
+ *
+ * The page to free should have been allocated by get_image_page() (page flags
+ * set by it are affected).
  */
-
 static inline void free_image_page(void *addr, int clear_nosave_free)
 {
 	struct page *page;
@@ -150,17 +230,8 @@ static inline void free_image_page(void *addr, int clear_nosave_free)
 	__free_page(page);
 }
 
-/* struct linked_page is used to build chains of pages */
-
-#define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
-
-struct linked_page {
-	struct linked_page *next;
-	char data[LINKED_PAGE_DATA_SIZE];
-} __packed;
-
-static inline void
-free_list_of_pages(struct linked_page *list, int clear_page_nosave)
+static inline void free_list_of_pages(struct linked_page *list,
+				      int clear_page_nosave)
 {
 	while (list) {
 		struct linked_page *lp = list->next;
@@ -170,30 +241,28 @@ free_list_of_pages(struct linked_page *list, int clear_page_nosave)
 	}
 }
 
-/**
-  *	struct chain_allocator is used for allocating small objects out of
-  *	a linked list of pages called 'the chain'.
-  *
-  *	The chain grows each time when there is no room for a new object in
-  *	the current page.  The allocated objects cannot be freed individually.
-  *	It is only possible to free them all at once, by freeing the entire
-  *	chain.
-  *
-  *	NOTE: The chain allocator may be inefficient if the allocated objects
-  *	are not much smaller than PAGE_SIZE.
-  */
-
+/*
+ * struct chain_allocator is used for allocating small objects out of
+ * a linked list of pages called 'the chain'.
+ *
+ * The chain grows each time when there is no room for a new object in
+ * the current page.  The allocated objects cannot be freed individually.
+ * It is only possible to free them all at once, by freeing the entire
+ * chain.
+ *
+ * NOTE: The chain allocator may be inefficient if the allocated objects
+ * are not much smaller than PAGE_SIZE.
+ */
 struct chain_allocator {
 	struct linked_page *chain;	/* the chain */
 	unsigned int used_space;	/* total size of objects allocated out
-					 * of the current page
-					 */
+					   of the current page */
 	gfp_t gfp_mask;		/* mask for allocating pages */
 	int safe_needed;	/* if set, only "safe" pages are allocated */
 };
 
-static void
-chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
+static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask,
+		       int safe_needed)
 {
 	ca->chain = NULL;
 	ca->used_space = LINKED_PAGE_DATA_SIZE;
@@ -208,7 +277,8 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 	if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
 		struct linked_page *lp;
 
-		lp = get_image_page(ca->gfp_mask, ca->safe_needed);
+		lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) :
+					get_image_page(ca->gfp_mask, PG_ANY);
 		if (!lp)
 			return NULL;
 
@@ -222,44 +292,44 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
 }
 
 /**
- *	Data types related to memory bitmaps.
+ * Data types related to memory bitmaps.
  *
- *	Memory bitmap is a structure consiting of many linked lists of
- *	objects.  The main list's elements are of type struct zone_bitmap
- *	and each of them corresonds to one zone.  For each zone bitmap
- *	object there is a list of objects of type struct bm_block that
- *	represent each blocks of bitmap in which information is stored.
+ * Memory bitmap is a structure consiting of many linked lists of
+ * objects.  The main list's elements are of type struct zone_bitmap
+ * and each of them corresonds to one zone.  For each zone bitmap
+ * object there is a list of objects of type struct bm_block that
+ * represent each blocks of bitmap in which information is stored.
  *
- *	struct memory_bitmap contains a pointer to the main list of zone
- *	bitmap objects, a struct bm_position used for browsing the bitmap,
- *	and a pointer to the list of pages used for allocating all of the
- *	zone bitmap objects and bitmap block objects.
+ * struct memory_bitmap contains a pointer to the main list of zone
+ * bitmap objects, a struct bm_position used for browsing the bitmap,
+ * and a pointer to the list of pages used for allocating all of the
+ * zone bitmap objects and bitmap block objects.
  *
- *	NOTE: It has to be possible to lay out the bitmap in memory
- *	using only allocations of order 0.  Additionally, the bitmap is
- *	designed to work with arbitrary number of zones (this is over the
- *	top for now, but let's avoid making unnecessary assumptions ;-).
+ * NOTE: It has to be possible to lay out the bitmap in memory
+ * using only allocations of order 0.  Additionally, the bitmap is
+ * designed to work with arbitrary number of zones (this is over the
+ * top for now, but let's avoid making unnecessary assumptions ;-).
  *
- *	struct zone_bitmap contains a pointer to a list of bitmap block
- *	objects and a pointer to the bitmap block object that has been
- *	most recently used for setting bits.  Additionally, it contains the
- *	pfns that correspond to the start and end of the represented zone.
+ * struct zone_bitmap contains a pointer to a list of bitmap block
+ * objects and a pointer to the bitmap block object that has been
+ * most recently used for setting bits.  Additionally, it contains the
+ * PFNs that correspond to the start and end of the represented zone.
  *
- *	struct bm_block contains a pointer to the memory page in which
- *	information is stored (in the form of a block of bitmap)
- *	It also contains the pfns that correspond to the start and end of
- *	the represented memory area.
+ * struct bm_block contains a pointer to the memory page in which
+ * information is stored (in the form of a block of bitmap)
+ * It also contains the pfns that correspond to the start and end of
+ * the represented memory area.
  *
- *	The memory bitmap is organized as a radix tree to guarantee fast random
- *	access to the bits. There is one radix tree for each zone (as returned
- *	from create_mem_extents).
+ * The memory bitmap is organized as a radix tree to guarantee fast random
+ * access to the bits. There is one radix tree for each zone (as returned
+ * from create_mem_extents).
  *
- *	One radix tree is represented by one struct mem_zone_bm_rtree. There are
- *	two linked lists for the nodes of the tree, one for the inner nodes and
- *	one for the leave nodes. The linked leave nodes are used for fast linear
- *	access of the memory bitmap.
+ * One radix tree is represented by one struct mem_zone_bm_rtree. There are
+ * two linked lists for the nodes of the tree, one for the inner nodes and
+ * one for the leave nodes. The linked leave nodes are used for fast linear
+ * access of the memory bitmap.
  *
- *	The struct rtree_node represents one node of the radix tree.
+ * The struct rtree_node represents one node of the radix tree.
  */
 
 #define BM_END_OF_MAP	(~0UL)
@@ -305,9 +375,8 @@ struct bm_position {
 struct memory_bitmap {
 	struct list_head zones;
 	struct linked_page *p_list;	/* list of pages used to store zone
-					 * bitmap objects and bitmap block
-					 * objects
-					 */
+					   bitmap objects and bitmap block
+					   objects */
 	struct bm_position cur;	/* most recently used bit position */
 };
 
@@ -321,12 +390,12 @@ struct memory_bitmap {
 #endif
 #define BM_RTREE_LEVEL_MASK	((1UL << BM_RTREE_LEVEL_SHIFT) - 1)
 
-/*
- *	alloc_rtree_node - Allocate a new node and add it to the radix tree.
+/**
+ * alloc_rtree_node - Allocate a new node and add it to the radix tree.
  *
- *	This function is used to allocate inner nodes as well as the
- *	leave nodes of the radix tree. It also adds the node to the
- *	corresponding linked list passed in by the *list parameter.
+ * This function is used to allocate inner nodes as well as the
+ * leave nodes of the radix tree. It also adds the node to the
+ * corresponding linked list passed in by the *list parameter.
  */
 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
 					   struct chain_allocator *ca,
@@ -347,12 +416,12 @@ static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed,
 	return node;
 }
 
-/*
- *	add_rtree_block - Add a new leave node to the radix tree
+/**
+ * add_rtree_block - Add a new leave node to the radix tree.
  *
- *	The leave nodes need to be allocated in order to keep the leaves
- *	linked list in order. This is guaranteed by the zone->blocks
- *	counter.
+ * The leave nodes need to be allocated in order to keep the leaves
+ * linked list in order. This is guaranteed by the zone->blocks
+ * counter.
  */
 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
 			   int safe_needed, struct chain_allocator *ca)
@@ -417,17 +486,18 @@ static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask,
 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 			       int clear_nosave_free);
 
-/*
- *	create_zone_bm_rtree - create a radix tree for one zone
+/**
+ * create_zone_bm_rtree - Create a radix tree for one zone.
  *
- *	Allocated the mem_zone_bm_rtree structure and initializes it.
- *	This function also allocated and builds the radix tree for the
- *	zone.
+ * Allocated the mem_zone_bm_rtree structure and initializes it.
+ * This function also allocated and builds the radix tree for the
+ * zone.
  */
-static struct mem_zone_bm_rtree *
-create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed,
-		     struct chain_allocator *ca,
-		     unsigned long start, unsigned long end)
+static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask,
+						      int safe_needed,
+						      struct chain_allocator *ca,
+						      unsigned long start,
+						      unsigned long end)
 {
 	struct mem_zone_bm_rtree *zone;
 	unsigned int i, nr_blocks;
@@ -454,12 +524,12 @@ create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed,
 	return zone;
 }
 
-/*
- *	free_zone_bm_rtree - Free the memory of the radix tree
+/**
+ * free_zone_bm_rtree - Free the memory of the radix tree.
  *
- *	Free all node pages of the radix tree. The mem_zone_bm_rtree
- *	structure itself is not freed here nor are the rtree_node
- *	structs.
+ * Free all node pages of the radix tree. The mem_zone_bm_rtree
+ * structure itself is not freed here nor are the rtree_node
+ * structs.
  */
 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone,
 			       int clear_nosave_free)
@@ -492,8 +562,8 @@ struct mem_extent {
 };
 
 /**
- *	free_mem_extents - free a list of memory extents
- *	@list - list of extents to empty
+ * free_mem_extents - Free a list of memory extents.
+ * @list: List of extents to free.
  */
 static void free_mem_extents(struct list_head *list)
 {
@@ -506,10 +576,11 @@ static void free_mem_extents(struct list_head *list)
 }
 
 /**
- *	create_mem_extents - create a list of memory extents representing
- *	                     contiguous ranges of PFNs
- *	@list - list to put the extents into
- *	@gfp_mask - mask to use for memory allocations
+ * create_mem_extents - Create a list of memory extents.
+ * @list: List to put the extents into.
+ * @gfp_mask: Mask to use for memory allocations.
+ *
+ * The extents represent contiguous ranges of PFNs.
  */
 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 {
@@ -565,10 +636,10 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
 }
 
 /**
-  *	memory_bm_create - allocate memory for a memory bitmap
-  */
-static int
-memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
+ * memory_bm_create - Allocate memory for a memory bitmap.
+ */
+static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask,
+			    int safe_needed)
 {
 	struct chain_allocator ca;
 	struct list_head mem_extents;
@@ -607,8 +678,9 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
 }
 
 /**
-  *	memory_bm_free - free memory occupied by the memory bitmap @bm
-  */
+ * memory_bm_free - Free memory occupied by the memory bitmap.
+ * @bm: Memory bitmap.
+ */
 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 {
 	struct mem_zone_bm_rtree *zone;
@@ -622,14 +694,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
 }
 
 /**
- *	memory_bm_find_bit - Find the bit for pfn in the memory
- *			     bitmap
+ * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap.
  *
- *	Find the bit in the bitmap @bm that corresponds to given pfn.
- *	The cur.zone, cur.block and cur.node_pfn member of @bm are
- *	updated.
- *	It walks the radix tree to find the page which contains the bit for
- *	pfn and returns the bit position in **addr and *bit_nr.
+ * Find the bit in memory bitmap @bm that corresponds to the given PFN.
+ * The cur.zone, cur.block and cur.node_pfn members of @bm are updated.
+ *
+ * Walk the radix tree to find the page containing the bit that represents @pfn
+ * and return the position of the bit in @addr and @bit_nr.
  */
 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 			      void **addr, unsigned int *bit_nr)
@@ -658,10 +729,9 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 
 zone_found:
 	/*
-	 * We have a zone. Now walk the radix tree to find the leave
-	 * node for our pfn.
+	 * We have found the zone. Now walk the radix tree to find the leaf node
+	 * for our PFN.
 	 */
-
 	node = bm->cur.node;
 	if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn)
 		goto node_found;
@@ -754,14 +824,14 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
 }
 
 /*
- *	rtree_next_node - Jumps to the next leave node
+ * rtree_next_node - Jump to the next leaf node.
  *
- *	Sets the position to the beginning of the next node in the
- *	memory bitmap. This is either the next node in the current
- *	zone's radix tree or the first node in the radix tree of the
- *	next zone.
+ * Set the position to the beginning of the next node in the
+ * memory bitmap. This is either the next node in the current
+ * zone's radix tree or the first node in the radix tree of the
+ * next zone.
  *
- *	Returns true if there is a next node, false otherwise.
+ * Return true if there is a next node, false otherwise.
  */
 static bool rtree_next_node(struct memory_bitmap *bm)
 {
@@ -790,14 +860,15 @@ static bool rtree_next_node(struct memory_bitmap *bm)
 }
 
 /**
- *	memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm
+ * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap.
+ * @bm: Memory bitmap.
  *
- *	Starting from the last returned position this function searches
- *	for the next set bit in the memory bitmap and returns its
- *	number. If no more bit is set BM_END_OF_MAP is returned.
+ * Starting from the last returned position this function searches for the next
+ * set bit in @bm and returns the PFN represented by it.  If no more bits are
+ * set, BM_END_OF_MAP is returned.
  *
- *	It is required to run memory_bm_position_reset() before the
- *	first call to this function.
+ * It is required to run memory_bm_position_reset() before the first call to
+ * this function for the given memory bitmap.
  */
 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 {
@@ -819,11 +890,10 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
 	return BM_END_OF_MAP;
 }
 
-/**
- *	This structure represents a range of page frames the contents of which
- *	should not be saved during the suspend.
+/*
+ * This structure represents a range of page frames the contents of which
+ * should not be saved during hibernation.
  */
-
 struct nosave_region {
 	struct list_head list;
 	unsigned long start_pfn;
@@ -832,15 +902,42 @@ struct nosave_region {
 
 static LIST_HEAD(nosave_regions);
 
+static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone)
+{
+	struct rtree_node *node;
+
+	list_for_each_entry(node, &zone->nodes, list)
+		recycle_safe_page(node->data);
+
+	list_for_each_entry(node, &zone->leaves, list)
+		recycle_safe_page(node->data);
+}
+
+static void memory_bm_recycle(struct memory_bitmap *bm)
+{
+	struct mem_zone_bm_rtree *zone;
+	struct linked_page *p_list;
+
+	list_for_each_entry(zone, &bm->zones, list)
+		recycle_zone_bm_rtree(zone);
+
+	p_list = bm->p_list;
+	while (p_list) {
+		struct linked_page *lp = p_list;
+
+		p_list = lp->next;
+		recycle_safe_page(lp);
+	}
+}
+
 /**
- *	register_nosave_region - register a range of page frames the contents
- *	of which should not be saved during the suspend (to be used in the early
- *	initialization code)
+ * register_nosave_region - Register a region of unsaveable memory.
+ *
+ * Register a range of page frames the contents of which should not be saved
+ * during hibernation (to be used in the early initialization code).
  */
-
-void __init
-__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
-			 int use_kmalloc)
+void __init __register_nosave_region(unsigned long start_pfn,
+				     unsigned long end_pfn, int use_kmalloc)
 {
 	struct nosave_region *region;
 
@@ -857,12 +954,13 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
 		}
 	}
 	if (use_kmalloc) {
-		/* during init, this shouldn't fail */
+		/* During init, this shouldn't fail */
 		region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
 		BUG_ON(!region);
-	} else
+	} else {
 		/* This allocation cannot fail */
 		region = memblock_virt_alloc(sizeof(struct nosave_region), 0);
+	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
 	list_add_tail(&region->list, &nosave_regions);
@@ -923,10 +1021,12 @@ static void swsusp_unset_page_forbidden(struct page *page)
 }
 
 /**
- *	mark_nosave_pages - set bits corresponding to the page frames the
- *	contents of which should not be saved in a given bitmap.
+ * mark_nosave_pages - Mark pages that should not be saved.
+ * @bm: Memory bitmap.
+ *
+ * Set the bits in @bm that correspond to the page frames the contents of which
+ * should not be saved.
  */
-
 static void mark_nosave_pages(struct memory_bitmap *bm)
 {
 	struct nosave_region *region;
@@ -956,13 +1056,13 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
 }
 
 /**
- *	create_basic_memory_bitmaps - create bitmaps needed for marking page
- *	frames that should not be saved and free page frames.  The pointers
- *	forbidden_pages_map and free_pages_map are only modified if everything
- *	goes well, because we don't want the bits to be used before both bitmaps
- *	are set up.
+ * create_basic_memory_bitmaps - Create bitmaps to hold basic page information.
+ *
+ * Create bitmaps needed for marking page frames that should not be saved and
+ * free page frames.  The forbidden_pages_map and free_pages_map pointers are
+ * only modified if everything goes well, because we don't want the bits to be
+ * touched before both bitmaps are set up.
  */
-
 int create_basic_memory_bitmaps(void)
 {
 	struct memory_bitmap *bm1, *bm2;
@@ -1007,12 +1107,12 @@ int create_basic_memory_bitmaps(void)
 }
 
 /**
- *	free_basic_memory_bitmaps - free memory bitmaps allocated by
- *	create_basic_memory_bitmaps().  The auxiliary pointers are necessary
- *	so that the bitmaps themselves are not referred to while they are being
- *	freed.
+ * free_basic_memory_bitmaps - Free memory bitmaps holding basic information.
+ *
+ * Free memory bitmaps allocated by create_basic_memory_bitmaps().  The
+ * auxiliary pointers are necessary so that the bitmaps themselves are not
+ * referred to while they are being freed.
  */
-
 void free_basic_memory_bitmaps(void)
 {
 	struct memory_bitmap *bm1, *bm2;
@@ -1033,11 +1133,13 @@ void free_basic_memory_bitmaps(void)
 }
 
 /**
- *	snapshot_additional_pages - estimate the number of additional pages
- *	be needed for setting up the suspend image data structures for given
- *	zone (usually the returned value is greater than the exact number)
+ * snapshot_additional_pages - Estimate the number of extra pages needed.
+ * @zone: Memory zone to carry out the computation for.
+ *
+ * Estimate the number of additional pages needed for setting up a hibernation
+ * image data structures for @zone (usually, the returned value is greater than
+ * the exact number).
  */
-
 unsigned int snapshot_additional_pages(struct zone *zone)
 {
 	unsigned int rtree, nodes;
@@ -1055,10 +1157,10 @@ unsigned int snapshot_additional_pages(struct zone *zone)
 
 #ifdef CONFIG_HIGHMEM
 /**
- *	count_free_highmem_pages - compute the total number of free highmem
- *	pages, system-wide.
+ * count_free_highmem_pages - Compute the total number of free highmem pages.
+ *
+ * The returned number is system-wide.
  */
-
 static unsigned int count_free_highmem_pages(void)
 {
 	struct zone *zone;
@@ -1072,11 +1174,12 @@ static unsigned int count_free_highmem_pages(void)
 }
 
 /**
- *	saveable_highmem_page - Determine whether a highmem page should be
- *	included in the suspend image.
+ * saveable_highmem_page - Check if a highmem page is saveable.
  *
- *	We should save the page if it isn't Nosave or NosaveFree, or Reserved,
- *	and it isn't a part of a free chunk of pages.
+ * Determine whether a highmem page should be included in a hibernation image.
+ *
+ * We should save the page if it isn't Nosave or NosaveFree, or Reserved,
+ * and it isn't part of a free chunk of pages.
  */
 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 {
@@ -1102,10 +1205,8 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
 }
 
 /**
- *	count_highmem_pages - compute the total number of saveable highmem
- *	pages.
+ * count_highmem_pages - Compute the total number of saveable highmem pages.
  */
-
 static unsigned int count_highmem_pages(void)
 {
 	struct zone *zone;
@@ -1133,12 +1234,14 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
 #endif /* CONFIG_HIGHMEM */
 
 /**
- *	saveable_page - Determine whether a non-highmem page should be included
- *	in the suspend image.
+ * saveable_page - Check if the given page is saveable.
  *
- *	We should save the page if it isn't Nosave, and is not in the range
- *	of pages statically defined as 'unsaveable', and it isn't a part of
- *	a free chunk of pages.
+ * Determine whether a non-highmem page should be included in a hibernation
+ * image.
+ *
+ * We should save the page if it isn't Nosave, and is not in the range
+ * of pages statically defined as 'unsaveable', and it isn't part of
+ * a free chunk of pages.
  */
 static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 {
@@ -1167,10 +1270,8 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn)
 }
 
 /**
- *	count_data_pages - compute the total number of saveable non-highmem
- *	pages.
+ * count_data_pages - Compute the total number of saveable non-highmem pages.
  */
-
 static unsigned int count_data_pages(void)
 {
 	struct zone *zone;
@@ -1190,7 +1291,8 @@ static unsigned int count_data_pages(void)
 	return n;
 }
 
-/* This is needed, because copy_page and memcpy are not usable for copying
+/*
+ * This is needed, because copy_page and memcpy are not usable for copying
  * task structs.
  */
 static inline void do_copy_page(long *dst, long *src)
@@ -1201,12 +1303,12 @@ static inline void do_copy_page(long *dst, long *src)
 		*dst++ = *src++;
 }
 
-
 /**
- *	safe_copy_page - check if the page we are going to copy is marked as
- *		present in the kernel page tables (this always is the case if
- *		CONFIG_DEBUG_PAGEALLOC is not set and in that case
- *		kernel_page_present() always returns 'true').
+ * safe_copy_page - Copy a page in a safe way.
+ *
+ * Check if the page we are going to copy is marked as present in the kernel
+ * page tables (this always is the case if CONFIG_DEBUG_PAGEALLOC is not set
+ * and in that case kernel_page_present() always returns 'true').
  */
 static void safe_copy_page(void *dst, struct page *s_page)
 {
@@ -1219,10 +1321,8 @@ static void safe_copy_page(void *dst, struct page *s_page)
 	}
 }
 
-
 #ifdef CONFIG_HIGHMEM
-static inline struct page *
-page_is_saveable(struct zone *zone, unsigned long pfn)
+static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn)
 {
 	return is_highmem(zone) ?
 		saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
@@ -1243,7 +1343,8 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 		kunmap_atomic(src);
 	} else {
 		if (PageHighMem(d_page)) {
-			/* Page pointed to by src may contain some kernel
+			/*
+			 * The page pointed to by src may contain some kernel
 			 * data modified by kmap_atomic()
 			 */
 			safe_copy_page(buffer, s_page);
@@ -1265,8 +1366,8 @@ static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void
-copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
+static void copy_data_pages(struct memory_bitmap *copy_bm,
+			    struct memory_bitmap *orig_bm)
 {
 	struct zone *zone;
 	unsigned long pfn;
@@ -1315,12 +1416,11 @@ static struct memory_bitmap orig_bm;
 static struct memory_bitmap copy_bm;
 
 /**
- *	swsusp_free - free pages allocated for the suspend.
+ * swsusp_free - Free pages allocated for hibernation image.
  *
- *	Suspend pages are alocated before the atomic copy is made, so we
- *	need to release them after the resume.
+ * Image pages are alocated before snapshot creation, so they need to be
+ * released after resume.
  */
-
 void swsusp_free(void)
 {
 	unsigned long fb_pfn, fr_pfn;
@@ -1351,6 +1451,7 @@ loop:
 
 		memory_bm_clear_current(forbidden_pages_map);
 		memory_bm_clear_current(free_pages_map);
+		hibernate_restore_unprotect_page(page_address(page));
 		__free_page(page);
 		goto loop;
 	}
@@ -1362,6 +1463,7 @@ out:
 	buffer = NULL;
 	alloc_normal = 0;
 	alloc_highmem = 0;
+	hibernate_restore_protection_end();
 }
 
 /* Helper functions used for the shrinking of memory. */
@@ -1369,7 +1471,7 @@ out:
 #define GFP_IMAGE	(GFP_KERNEL | __GFP_NOWARN)
 
 /**
- * preallocate_image_pages - Allocate a number of pages for hibernation image
+ * preallocate_image_pages - Allocate a number of pages for hibernation image.
  * @nr_pages: Number of page frames to allocate.
  * @mask: GFP flags to use for the allocation.
  *
@@ -1419,7 +1521,7 @@ static unsigned long preallocate_image_highmem(unsigned long nr_pages)
 }
 
 /**
- *  __fraction - Compute (an approximation of) x * (multiplier / base)
+ *  __fraction - Compute (an approximation of) x * (multiplier / base).
  */
 static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
 {
@@ -1429,8 +1531,8 @@ static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
 }
 
 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
-						unsigned long highmem,
-						unsigned long total)
+						  unsigned long highmem,
+						  unsigned long total)
 {
 	unsigned long alloc = __fraction(nr_pages, highmem, total);
 
@@ -1443,15 +1545,15 @@ static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
 }
 
 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
-						unsigned long highmem,
-						unsigned long total)
+							 unsigned long highmem,
+							 unsigned long total)
 {
 	return 0;
 }
 #endif /* CONFIG_HIGHMEM */
 
 /**
- * free_unnecessary_pages - Release preallocated pages not needed for the image
+ * free_unnecessary_pages - Release preallocated pages not needed for the image.
  */
 static unsigned long free_unnecessary_pages(void)
 {
@@ -1505,7 +1607,7 @@ static unsigned long free_unnecessary_pages(void)
 }
 
 /**
- * minimum_image_size - Estimate the minimum acceptable size of an image
+ * minimum_image_size - Estimate the minimum acceptable size of an image.
  * @saveable: Number of saveable pages in the system.
  *
  * We want to avoid attempting to free too much memory too hard, so estimate the
@@ -1535,7 +1637,7 @@ static unsigned long minimum_image_size(unsigned long saveable)
 }
 
 /**
- * hibernate_preallocate_memory - Preallocate memory for hibernation image
+ * hibernate_preallocate_memory - Preallocate memory for hibernation image.
  *
  * To create a hibernation image it is necessary to make a copy of every page
  * frame in use.  We also need a number of page frames to be free during
@@ -1708,10 +1810,11 @@ int hibernate_preallocate_memory(void)
 
 #ifdef CONFIG_HIGHMEM
 /**
-  *	count_pages_for_highmem - compute the number of non-highmem pages
-  *	that will be necessary for creating copies of highmem pages.
-  */
-
+ * count_pages_for_highmem - Count non-highmem pages needed for copying highmem.
+ *
+ * Compute the number of non-highmem pages that will be necessary for creating
+ * copies of highmem pages.
+ */
 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
 {
 	unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
@@ -1724,15 +1827,12 @@ static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
 	return nr_highmem;
 }
 #else
-static unsigned int
-count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
+static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
 #endif /* CONFIG_HIGHMEM */
 
 /**
- *	enough_free_mem - Make sure we have enough free memory for the
- *	snapshot image.
+ * enough_free_mem - Check if there is enough free memory for the image.
  */
-
 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 {
 	struct zone *zone;
@@ -1751,10 +1851,11 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
 
 #ifdef CONFIG_HIGHMEM
 /**
- *	get_highmem_buffer - if there are some highmem pages in the suspend
- *	image, we may need the buffer to copy them and/or load their data.
+ * get_highmem_buffer - Allocate a buffer for highmem pages.
+ *
+ * If there are some highmem pages in the hibernation image, we may need a
+ * buffer to copy them and/or load their data.
  */
-
 static inline int get_highmem_buffer(int safe_needed)
 {
 	buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
@@ -1762,13 +1863,13 @@ static inline int get_highmem_buffer(int safe_needed)
 }
 
 /**
- *	alloc_highmem_image_pages - allocate some highmem pages for the image.
- *	Try to allocate as many pages as needed, but if the number of free
- *	highmem pages is lesser than that, allocate them all.
+ * alloc_highmem_image_pages - Allocate some highmem pages for the image.
+ *
+ * Try to allocate as many pages as needed, but if the number of free highmem
+ * pages is less than that, allocate them all.
  */
-
-static inline unsigned int
-alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
+static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
+					       unsigned int nr_highmem)
 {
 	unsigned int to_alloc = count_free_highmem_pages();
 
@@ -1787,25 +1888,24 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
 #else
 static inline int get_highmem_buffer(int safe_needed) { return 0; }
 
-static inline unsigned int
-alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
+static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm,
+					       unsigned int n) { return 0; }
 #endif /* CONFIG_HIGHMEM */
 
 /**
- *	swsusp_alloc - allocate memory for the suspend image
+ * swsusp_alloc - Allocate memory for hibernation image.
  *
- *	We first try to allocate as many highmem pages as there are
- *	saveable highmem pages in the system.  If that fails, we allocate
- *	non-highmem pages for the copies of the remaining highmem ones.
+ * We first try to allocate as many highmem pages as there are
+ * saveable highmem pages in the system.  If that fails, we allocate
+ * non-highmem pages for the copies of the remaining highmem ones.
  *
- *	In this approach it is likely that the copies of highmem pages will
- *	also be located in the high memory, because of the way in which
- *	copy_data_pages() works.
+ * In this approach it is likely that the copies of highmem pages will
+ * also be located in the high memory, because of the way in which
+ * copy_data_pages() works.
  */
-
-static int
-swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
-		unsigned int nr_pages, unsigned int nr_highmem)
+static int swsusp_alloc(struct memory_bitmap *orig_bm,
+			struct memory_bitmap *copy_bm,
+			unsigned int nr_pages, unsigned int nr_highmem)
 {
 	if (nr_highmem > 0) {
 		if (get_highmem_buffer(PG_ANY))
@@ -1855,7 +1955,8 @@ asmlinkage __visible int swsusp_save(void)
 		return -ENOMEM;
 	}
 
-	/* During allocating of suspend pagedir, new cold pages may appear.
+	/*
+	 * During allocating of suspend pagedir, new cold pages may appear.
 	 * Kill them.
 	 */
 	drain_local_pages(NULL);
@@ -1918,12 +2019,14 @@ static int init_header(struct swsusp_info *info)
 }
 
 /**
- *	pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
- *	are stored in the array @buf[] (1 page at a time)
+ * pack_pfns - Prepare PFNs for saving.
+ * @bm: Memory bitmap.
+ * @buf: Memory buffer to store the PFNs in.
+ *
+ * PFNs corresponding to set bits in @bm are stored in the area of memory
+ * pointed to by @buf (1 page at a time).
  */
-
-static inline void
-pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
 	int j;
 
@@ -1937,22 +2040,21 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
 }
 
 /**
- *	snapshot_read_next - used for reading the system memory snapshot.
+ * snapshot_read_next - Get the address to read the next image page from.
+ * @handle: Snapshot handle to be used for the reading.
  *
- *	On the first call to it @handle should point to a zeroed
- *	snapshot_handle structure.  The structure gets updated and a pointer
- *	to it should be passed to this function every next time.
+ * On the first call, @handle should point to a zeroed snapshot_handle
+ * structure.  The structure gets populated then and a pointer to it should be
+ * passed to this function every next time.
  *
- *	On success the function returns a positive number.  Then, the caller
- *	is allowed to read up to the returned number of bytes from the memory
- *	location computed by the data_of() macro.
+ * On success, the function returns a positive number.  Then, the caller
+ * is allowed to read up to the returned number of bytes from the memory
+ * location computed by the data_of() macro.
  *
- *	The function returns 0 to indicate the end of data stream condition,
- *	and a negative number is returned on error.  In such cases the
- *	structure pointed to by @handle is not updated and should not be used
- *	any more.
+ * The function returns 0 to indicate the end of the data stream condition,
+ * and negative numbers are returned on errors.  If that happens, the structure
+ * pointed to by @handle is not updated and should not be used any more.
  */
-
 int snapshot_read_next(struct snapshot_handle *handle)
 {
 	if (handle->cur > nr_meta_pages + nr_copy_pages)
@@ -1981,7 +2083,8 @@ int snapshot_read_next(struct snapshot_handle *handle)
 
 		page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
 		if (PageHighMem(page)) {
-			/* Highmem pages are copied to the buffer,
+			/*
+			 * Highmem pages are copied to the buffer,
 			 * because we can't return with a kmapped
 			 * highmem page (we may not be called again).
 			 */
@@ -1999,53 +2102,41 @@ int snapshot_read_next(struct snapshot_handle *handle)
 	return PAGE_SIZE;
 }
 
-/**
- *	mark_unsafe_pages - mark the pages that cannot be used for storing
- *	the image during resume, because they conflict with the pages that
- *	had been used before suspend
- */
-
-static int mark_unsafe_pages(struct memory_bitmap *bm)
+static void duplicate_memory_bitmap(struct memory_bitmap *dst,
+				    struct memory_bitmap *src)
 {
-	struct zone *zone;
-	unsigned long pfn, max_zone_pfn;
+	unsigned long pfn;
 
-	/* Clear page flags */
-	for_each_populated_zone(zone) {
-		max_zone_pfn = zone_end_pfn(zone);
-		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
-			if (pfn_valid(pfn))
-				swsusp_unset_page_free(pfn_to_page(pfn));
+	memory_bm_position_reset(src);
+	pfn = memory_bm_next_pfn(src);
+	while (pfn != BM_END_OF_MAP) {
+		memory_bm_set_bit(dst, pfn);
+		pfn = memory_bm_next_pfn(src);
 	}
-
-	/* Mark pages that correspond to the "original" pfns as "unsafe" */
-	memory_bm_position_reset(bm);
-	do {
-		pfn = memory_bm_next_pfn(bm);
-		if (likely(pfn != BM_END_OF_MAP)) {
-			if (likely(pfn_valid(pfn)))
-				swsusp_set_page_free(pfn_to_page(pfn));
-			else
-				return -EFAULT;
-		}
-	} while (pfn != BM_END_OF_MAP);
-
-	allocated_unsafe_pages = 0;
-
-	return 0;
 }
 
-static void
-duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
+/**
+ * mark_unsafe_pages - Mark pages that were used before hibernation.
+ *
+ * Mark the pages that cannot be used for storing the image during restoration,
+ * because they conflict with the pages that had been used before hibernation.
+ */
+static void mark_unsafe_pages(struct memory_bitmap *bm)
 {
 	unsigned long pfn;
 
-	memory_bm_position_reset(src);
-	pfn = memory_bm_next_pfn(src);
+	/* Clear the "free"/"unsafe" bit for all PFNs */
+	memory_bm_position_reset(free_pages_map);
+	pfn = memory_bm_next_pfn(free_pages_map);
 	while (pfn != BM_END_OF_MAP) {
-		memory_bm_set_bit(dst, pfn);
-		pfn = memory_bm_next_pfn(src);
+		memory_bm_clear_current(free_pages_map);
+		pfn = memory_bm_next_pfn(free_pages_map);
 	}
+
+	/* Mark pages that correspond to the "original" PFNs as "unsafe" */
+	duplicate_memory_bitmap(free_pages_map, bm);
+
+	allocated_unsafe_pages = 0;
 }
 
 static int check_header(struct swsusp_info *info)
@@ -2063,11 +2154,9 @@ static int check_header(struct swsusp_info *info)
 }
 
 /**
- *	load header - check the image header and copy data from it
+ * load header - Check the image header and copy the data from it.
  */
-
-static int
-load_header(struct swsusp_info *info)
+static int load_header(struct swsusp_info *info)
 {
 	int error;
 
@@ -2081,8 +2170,12 @@ load_header(struct swsusp_info *info)
 }
 
 /**
- *	unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
- *	the corresponding bit in the memory bitmap @bm
+ * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
+ * @bm: Memory bitmap.
+ * @buf: Area of memory containing the PFNs.
+ *
+ * For each element of the array pointed to by @buf (1 page at a time), set the
+ * corresponding bit in @bm.
  */
 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 {
@@ -2095,7 +2188,7 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 		/* Extract and buffer page key for data page (s390 only). */
 		page_key_memorize(buf + j);
 
-		if (memory_bm_pfn_present(bm, buf[j]))
+		if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j]))
 			memory_bm_set_bit(bm, buf[j]);
 		else
 			return -EFAULT;
@@ -2104,13 +2197,9 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
 	return 0;
 }
 
-/* List of "safe" pages that may be used to store data loaded from the suspend
- * image
- */
-static struct linked_page *safe_pages_list;
-
 #ifdef CONFIG_HIGHMEM
-/* struct highmem_pbe is used for creating the list of highmem pages that
+/*
+ * struct highmem_pbe is used for creating the list of highmem pages that
  * should be restored atomically during the resume from disk, because the page
  * frames they have occupied before the suspend are in use.
  */
@@ -2120,7 +2209,8 @@ struct highmem_pbe {
 	struct highmem_pbe *next;
 };
 
-/* List of highmem PBEs needed for restoring the highmem pages that were
+/*
+ * List of highmem PBEs needed for restoring the highmem pages that were
  * allocated before the suspend and included in the suspend image, but have
  * also been allocated by the "resume" kernel, so their contents cannot be
  * written directly to their "original" page frames.
@@ -2128,11 +2218,11 @@ struct highmem_pbe {
 static struct highmem_pbe *highmem_pblist;
 
 /**
- *	count_highmem_image_pages - compute the number of highmem pages in the
- *	suspend image.  The bits in the memory bitmap @bm that correspond to the
- *	image pages are assumed to be set.
+ * count_highmem_image_pages - Compute the number of highmem pages in the image.
+ * @bm: Memory bitmap.
+ *
+ * The bits in @bm that correspond to image pages are assumed to be set.
  */
-
 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
 {
 	unsigned long pfn;
@@ -2149,24 +2239,25 @@ static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
 	return cnt;
 }
 
-/**
- *	prepare_highmem_image - try to allocate as many highmem pages as
- *	there are highmem image pages (@nr_highmem_p points to the variable
- *	containing the number of highmem image pages).  The pages that are
- *	"safe" (ie. will not be overwritten when the suspend image is
- *	restored) have the corresponding bits set in @bm (it must be
- *	unitialized).
- *
- *	NOTE: This function should not be called if there are no highmem
- *	image pages.
- */
-
 static unsigned int safe_highmem_pages;
 
 static struct memory_bitmap *safe_highmem_bm;
 
-static int
-prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
+/**
+ * prepare_highmem_image - Allocate memory for loading highmem data from image.
+ * @bm: Pointer to an uninitialized memory bitmap structure.
+ * @nr_highmem_p: Pointer to the number of highmem image pages.
+ *
+ * Try to allocate as many highmem pages as there are highmem image pages
+ * (@nr_highmem_p points to the variable containing the number of highmem image
+ * pages).  The pages that are "safe" (ie. will not be overwritten when the
+ * hibernation image is restored entirely) have the corresponding bits set in
+ * @bm (it must be unitialized).
+ *
+ * NOTE: This function should not be called if there are no highmem image pages.
+ */
+static int prepare_highmem_image(struct memory_bitmap *bm,
+				 unsigned int *nr_highmem_p)
 {
 	unsigned int to_alloc;
 
@@ -2201,39 +2292,42 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
 	return 0;
 }
 
+static struct page *last_highmem_page;
+
 /**
- *	get_highmem_page_buffer - for given highmem image page find the buffer
- *	that suspend_write_next() should set for its caller to write to.
+ * get_highmem_page_buffer - Prepare a buffer to store a highmem image page.
  *
- *	If the page is to be saved to its "original" page frame or a copy of
- *	the page is to be made in the highmem, @buffer is returned.  Otherwise,
- *	the copy of the page is to be made in normal memory, so the address of
- *	the copy is returned.
+ * For a given highmem image page get a buffer that suspend_write_next() should
+ * return to its caller to write to.
  *
- *	If @buffer is returned, the caller of suspend_write_next() will write
- *	the page's contents to @buffer, so they will have to be copied to the
- *	right location on the next call to suspend_write_next() and it is done
- *	with the help of copy_last_highmem_page().  For this purpose, if
- *	@buffer is returned, @last_highmem page is set to the page to which
- *	the data will have to be copied from @buffer.
+ * If the page is to be saved to its "original" page frame or a copy of
+ * the page is to be made in the highmem, @buffer is returned.  Otherwise,
+ * the copy of the page is to be made in normal memory, so the address of
+ * the copy is returned.
+ *
+ * If @buffer is returned, the caller of suspend_write_next() will write
+ * the page's contents to @buffer, so they will have to be copied to the
+ * right location on the next call to suspend_write_next() and it is done
+ * with the help of copy_last_highmem_page().  For this purpose, if
+ * @buffer is returned, @last_highmem_page is set to the page to which
+ * the data will have to be copied from @buffer.
  */
-
-static struct page *last_highmem_page;
-
-static void *
-get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+static void *get_highmem_page_buffer(struct page *page,
+				     struct chain_allocator *ca)
 {
 	struct highmem_pbe *pbe;
 	void *kaddr;
 
 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
-		/* We have allocated the "original" page frame and we can
+		/*
+		 * We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
 		last_highmem_page = page;
 		return buffer;
 	}
-	/* The "original" page frame has not been allocated and we have to
+	/*
+	 * The "original" page frame has not been allocated and we have to
 	 * use a "safe" page frame to store the loaded page.
 	 */
 	pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
@@ -2263,11 +2357,12 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
 }
 
 /**
- *	copy_last_highmem_page - copy the contents of a highmem image from
- *	@buffer, where the caller of snapshot_write_next() has place them,
- *	to the right location represented by @last_highmem_page .
+ * copy_last_highmem_page - Copy most the most recent highmem image page.
+ *
+ * Copy the contents of a highmem image from @buffer, where the caller of
+ * snapshot_write_next() has stored them, to the right location represented by
+ * @last_highmem_page .
  */
-
 static void copy_last_highmem_page(void)
 {
 	if (last_highmem_page) {
@@ -2294,17 +2389,13 @@ static inline void free_highmem_data(void)
 		free_image_page(buffer, PG_UNSAFE_CLEAR);
 }
 #else
-static unsigned int
-count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
+static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
 
-static inline int
-prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
-{
-	return 0;
-}
+static inline int prepare_highmem_image(struct memory_bitmap *bm,
+					unsigned int *nr_highmem_p) { return 0; }
 
-static inline void *
-get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
+static inline void *get_highmem_page_buffer(struct page *page,
+					    struct chain_allocator *ca)
 {
 	return ERR_PTR(-EINVAL);
 }
@@ -2314,27 +2405,27 @@ static inline int last_highmem_page_copied(void) { return 1; }
 static inline void free_highmem_data(void) {}
 #endif /* CONFIG_HIGHMEM */
 
+#define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
+
 /**
- *	prepare_image - use the memory bitmap @bm to mark the pages that will
- *	be overwritten in the process of restoring the system memory state
- *	from the suspend image ("unsafe" pages) and allocate memory for the
- *	image.
+ * prepare_image - Make room for loading hibernation image.
+ * @new_bm: Unitialized memory bitmap structure.
+ * @bm: Memory bitmap with unsafe pages marked.
+ *
+ * Use @bm to mark the pages that will be overwritten in the process of
+ * restoring the system memory state from the suspend image ("unsafe" pages)
+ * and allocate memory for the image.
  *
- *	The idea is to allocate a new memory bitmap first and then allocate
- *	as many pages as needed for the image data, but not to assign these
- *	pages to specific tasks initially.  Instead, we just mark them as
- *	allocated and create a lists of "safe" pages that will be used
- *	later.  On systems with high memory a list of "safe" highmem pages is
- *	also created.
+ * The idea is to allocate a new memory bitmap first and then allocate
+ * as many pages as needed for image data, but without specifying what those
+ * pages will be used for just yet.  Instead, we mark them all as allocated and
+ * create a lists of "safe" pages to be used later.  On systems with high
+ * memory a list of "safe" highmem pages is created too.
  */
-
-#define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
-
-static int
-prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 {
 	unsigned int nr_pages, nr_highmem;
-	struct linked_page *sp_list, *lp;
+	struct linked_page *lp;
 	int error;
 
 	/* If there is no highmem, the buffer will not be necessary */
@@ -2342,9 +2433,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 	buffer = NULL;
 
 	nr_highmem = count_highmem_image_pages(bm);
-	error = mark_unsafe_pages(bm);
-	if (error)
-		goto Free;
+	mark_unsafe_pages(bm);
 
 	error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
 	if (error)
@@ -2357,14 +2446,15 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		if (error)
 			goto Free;
 	}
-	/* Reserve some safe pages for potential later use.
+	/*
+	 * Reserve some safe pages for potential later use.
 	 *
 	 * NOTE: This way we make sure there will be enough safe pages for the
 	 * chain_alloc() in get_buffer().  It is a bit wasteful, but
 	 * nr_copy_pages cannot be greater than 50% of the memory anyway.
+	 *
+	 * nr_copy_pages cannot be less than allocated_unsafe_pages too.
 	 */
-	sp_list = NULL;
-	/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
 	while (nr_pages > 0) {
@@ -2373,12 +2463,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 			error = -ENOMEM;
 			goto Free;
 		}
-		lp->next = sp_list;
-		sp_list = lp;
+		lp->next = safe_pages_list;
+		safe_pages_list = lp;
 		nr_pages--;
 	}
 	/* Preallocate memory for the image */
-	safe_pages_list = NULL;
 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
 	while (nr_pages > 0) {
 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
@@ -2396,12 +2485,6 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 		swsusp_set_page_free(virt_to_page(lp));
 		nr_pages--;
 	}
-	/* Free the reserved safe pages so that chain_alloc() can use them */
-	while (sp_list) {
-		lp = sp_list->next;
-		free_image_page(sp_list, PG_UNSAFE_CLEAR);
-		sp_list = lp;
-	}
 	return 0;
 
  Free:
@@ -2410,10 +2493,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 }
 
 /**
- *	get_buffer - compute the address that snapshot_write_next() should
- *	set for its caller to write to.
+ * get_buffer - Get the address to store the next image data page.
+ *
+ * Get the address that snapshot_write_next() should return to its caller to
+ * write to.
  */
-
 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 {
 	struct pbe *pbe;
@@ -2428,12 +2512,14 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 		return get_highmem_page_buffer(page, ca);
 
 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
-		/* We have allocated the "original" page frame and we can
+		/*
+		 * We have allocated the "original" page frame and we can
 		 * use it directly to store the loaded page.
 		 */
 		return page_address(page);
 
-	/* The "original" page frame has not been allocated and we have to
+	/*
+	 * The "original" page frame has not been allocated and we have to
 	 * use a "safe" page frame to store the loaded page.
 	 */
 	pbe = chain_alloc(ca, sizeof(struct pbe));
@@ -2450,22 +2536,21 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
 }
 
 /**
- *	snapshot_write_next - used for writing the system memory snapshot.
+ * snapshot_write_next - Get the address to store the next image page.
+ * @handle: Snapshot handle structure to guide the writing.
  *
- *	On the first call to it @handle should point to a zeroed
- *	snapshot_handle structure.  The structure gets updated and a pointer
- *	to it should be passed to this function every next time.
+ * On the first call, @handle should point to a zeroed snapshot_handle
+ * structure.  The structure gets populated then and a pointer to it should be
+ * passed to this function every next time.
  *
- *	On success the function returns a positive number.  Then, the caller
- *	is allowed to write up to the returned number of bytes to the memory
- *	location computed by the data_of() macro.
+ * On success, the function returns a positive number.  Then, the caller
+ * is allowed to write up to the returned number of bytes to the memory
+ * location computed by the data_of() macro.
  *
- *	The function returns 0 to indicate the "end of file" condition,
- *	and a negative number is returned on error.  In such cases the
- *	structure pointed to by @handle is not updated and should not be used
- *	any more.
+ * The function returns 0 to indicate the "end of file" condition.  Negative
+ * numbers are returned on errors, in which cases the structure pointed to by
+ * @handle is not updated and should not be used any more.
  */
-
 int snapshot_write_next(struct snapshot_handle *handle)
 {
 	static struct chain_allocator ca;
@@ -2491,6 +2576,8 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		safe_pages_list = NULL;
+
 		error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
 		if (error)
 			return error;
@@ -2500,6 +2587,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		if (error)
 			return error;
 
+		hibernate_restore_protection_begin();
 	} else if (handle->cur <= nr_meta_pages + 1) {
 		error = unpack_orig_pfns(buffer, &copy_bm);
 		if (error)
@@ -2522,6 +2610,7 @@ int snapshot_write_next(struct snapshot_handle *handle)
 		copy_last_highmem_page();
 		/* Restore page key for data page (s390 only). */
 		page_key_write(handle->buffer);
+		hibernate_restore_protect_page(handle->buffer);
 		handle->buffer = get_buffer(&orig_bm, &ca);
 		if (IS_ERR(handle->buffer))
 			return PTR_ERR(handle->buffer);
@@ -2533,22 +2622,23 @@ int snapshot_write_next(struct snapshot_handle *handle)
 }
 
 /**
- *	snapshot_write_finalize - must be called after the last call to
- *	snapshot_write_next() in case the last page in the image happens
- *	to be a highmem page and its contents should be stored in the
- *	highmem.  Additionally, it releases the memory that will not be
- *	used any more.
+ * snapshot_write_finalize - Complete the loading of a hibernation image.
+ *
+ * Must be called after the last call to snapshot_write_next() in case the last
+ * page in the image happens to be a highmem page and its contents should be
+ * stored in highmem.  Additionally, it recycles bitmap memory that's not
+ * necessary any more.
  */
-
 void snapshot_write_finalize(struct snapshot_handle *handle)
 {
 	copy_last_highmem_page();
 	/* Restore page key for data page (s390 only). */
 	page_key_write(handle->buffer);
 	page_key_free();
-	/* Free only if we have loaded the image entirely */
+	hibernate_restore_protect_page(handle->buffer);
+	/* Do that only if we have loaded the image entirely */
 	if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
-		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
+		memory_bm_recycle(&orig_bm);
 		free_highmem_data();
 	}
 }
@@ -2561,8 +2651,8 @@ int snapshot_image_loaded(struct snapshot_handle *handle)
 
 #ifdef CONFIG_HIGHMEM
 /* Assumes that @buf is ready and points to a "safe" page */
-static inline void
-swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
+static inline void swap_two_pages_data(struct page *p1, struct page *p2,
+				       void *buf)
 {
 	void *kaddr1, *kaddr2;
 
@@ -2576,15 +2666,15 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
 }
 
 /**
- *	restore_highmem - for each highmem page that was allocated before
- *	the suspend and included in the suspend image, and also has been
- *	allocated by the "resume" kernel swap its current (ie. "before
- *	resume") contents with the previous (ie. "before suspend") one.
+ * restore_highmem - Put highmem image pages into their original locations.
+ *
+ * For each highmem page that was in use before hibernation and is included in
+ * the image, and also has been allocated by the "restore" kernel, swap its
+ * current contents with the previous (ie. "before hibernation") ones.
  *
- *	If the resume eventually fails, we can call this function once
- *	again and restore the "before resume" highmem state.
+ * If the restore eventually fails, we can call this function once again and
+ * restore the highmem state as seen by the restore kernel.
  */
-
 int restore_highmem(void)
 {
 	struct highmem_pbe *pbe = highmem_pblist;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 5b70d64b871e..0acab9d7f96f 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -266,16 +266,18 @@ static int suspend_test(int level)
  */
 static int suspend_prepare(suspend_state_t state)
 {
-	int error;
+	int error, nr_calls = 0;
 
 	if (!sleep_state_supported(state))
 		return -EPERM;
 
 	pm_prepare_console();
 
-	error = pm_notifier_call_chain(PM_SUSPEND_PREPARE);
-	if (error)
+	error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls);
+	if (error) {
+		nr_calls--;
 		goto Finish;
+	}
 
 	trace_suspend_resume(TPS("freeze_processes"), 0, true);
 	error = suspend_freeze_processes();
@@ -286,7 +288,7 @@ static int suspend_prepare(suspend_state_t state)
 	suspend_stats.failed_freeze++;
 	dpm_save_failed_step(SUSPEND_FREEZE);
  Finish:
-	pm_notifier_call_chain(PM_POST_SUSPEND);
+	__pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL);
 	pm_restore_console();
 	return error;
 }
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index c1aaac431055..a3b1e617bcdc 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -350,6 +350,12 @@ static int swsusp_swap_check(void)
 	if (res < 0)
 		blkdev_put(hib_resume_bdev, FMODE_WRITE);
 
+	/*
+	 * Update the resume device to the one actually used,
+	 * so the test_resume mode can use it in case it is
+	 * invoked from hibernate() to test the snapshot.
+	 */
+	swsusp_resume_device = hib_resume_bdev->bd_dev;
 	return res;
 }
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 526e8911460a..35310b627388 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -47,7 +47,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1);
 static int snapshot_open(struct inode *inode, struct file *filp)
 {
 	struct snapshot_data *data;
-	int error;
+	int error, nr_calls = 0;
 
 	if (!hibernation_available())
 		return -EPERM;
@@ -74,9 +74,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 			swap_type_of(swsusp_resume_device, 0, NULL) : -1;
 		data->mode = O_RDONLY;
 		data->free_bitmaps = false;
-		error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
+		error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls);
 		if (error)
-			pm_notifier_call_chain(PM_POST_HIBERNATION);
+			__pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL);
 	} else {
 		/*
 		 * Resuming.  We may need to wait for the image device to
@@ -86,13 +86,15 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 
 		data->swap = -1;
 		data->mode = O_WRONLY;
-		error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
+		error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls);
 		if (!error) {
 			error = create_basic_memory_bitmaps();
 			data->free_bitmaps = !error;
-		}
+		} else
+			nr_calls--;
+
 		if (error)
-			pm_notifier_call_chain(PM_POST_RESTORE);
+			__pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL);
 	}
 	if (error)
 		atomic_inc(&snapshot_device_available);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 14c4aa25cc45..a84641b222c1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -47,6 +47,8 @@ struct sugov_cpu {
 	struct update_util_data update_util;
 	struct sugov_policy *sg_policy;
 
+	unsigned int cached_raw_freq;
+
 	/* The fields below are only needed when sharing a policy. */
 	unsigned long util;
 	unsigned long max;
@@ -106,7 +108,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
 
 /**
  * get_next_freq - Compute a new frequency for a given cpufreq policy.
- * @policy: cpufreq policy object to compute the new frequency for.
+ * @sg_cpu: schedutil cpu object to compute the new frequency for.
  * @util: Current CPU utilization.
  * @max: CPU capacity.
  *
@@ -121,14 +123,25 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
  * next_freq = C * curr_freq * util_raw / max
  *
  * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
+ *
+ * The lowest driver-supported frequency which is equal or greater than the raw
+ * next_freq (as calculated above) is returned, subject to policy min/max and
+ * cpufreq driver limitations.
  */
-static unsigned int get_next_freq(struct cpufreq_policy *policy,
-				  unsigned long util, unsigned long max)
+static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
+				  unsigned long max)
 {
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
 
-	return (freq + (freq >> 2)) * util / max;
+	freq = (freq + (freq >> 2)) * util / max;
+
+	if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
+		return sg_policy->next_freq;
+	sg_cpu->cached_raw_freq = freq;
+	return cpufreq_driver_resolve_freq(policy, freq);
 }
 
 static void sugov_update_single(struct update_util_data *hook, u64 time,
@@ -143,13 +156,14 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 		return;
 
 	next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
-			get_next_freq(policy, util, max);
+			get_next_freq(sg_cpu, util, max);
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
 					   unsigned long util, unsigned long max)
 {
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int max_f = policy->cpuinfo.max_freq;
 	u64 last_freq_update_time = sg_policy->last_freq_update_time;
@@ -189,7 +203,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy,
 		}
 	}
 
-	return get_next_freq(policy, util, max);
+	return get_next_freq(sg_cpu, util, max);
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,
@@ -206,7 +220,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_policy, util, max);
+		next_f = sugov_next_freq_shared(sg_cpu, util, max);
 		sugov_update_commit(sg_policy, time, next_f);
 	}
 
@@ -394,7 +408,7 @@ static int sugov_init(struct cpufreq_policy *policy)
 	return ret;
 }
 
-static int sugov_exit(struct cpufreq_policy *policy)
+static void sugov_exit(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 	struct sugov_tunables *tunables = sg_policy->tunables;
@@ -412,7 +426,6 @@ static int sugov_exit(struct cpufreq_policy *policy)
 	mutex_unlock(&global_tunables_lock);
 
 	sugov_policy_free(sg_policy);
-	return 0;
 }
 
 static int sugov_start(struct cpufreq_policy *policy)
@@ -434,6 +447,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 			sg_cpu->util = ULONG_MAX;
 			sg_cpu->max = 0;
 			sg_cpu->last_update = 0;
+			sg_cpu->cached_raw_freq = 0;
 			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 						     sugov_update_shared);
 		} else {
@@ -444,7 +458,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static int sugov_stop(struct cpufreq_policy *policy)
+static void sugov_stop(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 	unsigned int cpu;
@@ -456,53 +470,29 @@ static int sugov_stop(struct cpufreq_policy *policy)
 
 	irq_work_sync(&sg_policy->irq_work);
 	cancel_work_sync(&sg_policy->work);
-	return 0;
 }
 
-static int sugov_limits(struct cpufreq_policy *policy)
+static void sugov_limits(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 
 	if (!policy->fast_switch_enabled) {
 		mutex_lock(&sg_policy->work_lock);
-
-		if (policy->max < policy->cur)
-			__cpufreq_driver_target(policy, policy->max,
-						CPUFREQ_RELATION_H);
-		else if (policy->min > policy->cur)
-			__cpufreq_driver_target(policy, policy->min,
-						CPUFREQ_RELATION_L);
-
+		cpufreq_policy_apply_limits(policy);
 		mutex_unlock(&sg_policy->work_lock);
 	}
 
 	sg_policy->need_freq_update = true;
-	return 0;
-}
-
-int sugov_governor(struct cpufreq_policy *policy, unsigned int event)
-{
-	if (event == CPUFREQ_GOV_POLICY_INIT) {
-		return sugov_init(policy);
-	} else if (policy->governor_data) {
-		switch (event) {
-		case CPUFREQ_GOV_POLICY_EXIT:
-			return sugov_exit(policy);
-		case CPUFREQ_GOV_START:
-			return sugov_start(policy);
-		case CPUFREQ_GOV_STOP:
-			return sugov_stop(policy);
-		case CPUFREQ_GOV_LIMITS:
-			return sugov_limits(policy);
-		}
-	}
-	return -EINVAL;
 }
 
 static struct cpufreq_governor schedutil_gov = {
 	.name = "schedutil",
-	.governor = sugov_governor,
 	.owner = THIS_MODULE,
+	.init = sugov_init,
+	.exit = sugov_exit,
+	.start = sugov_start,
+	.stop = sugov_stop,
+	.limits = sugov_limits,
 };
 
 static int __init sugov_module_init(void)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 97e7b793df35..d12bd958077e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4369,8 +4369,8 @@ static void show_pwq(struct pool_workqueue *pwq)
 /**
  * show_workqueue_state - dump workqueue state
  *
- * Called from a sysrq handler and prints out all busy workqueues and
- * pools.
+ * Called from a sysrq handler or try_to_freeze_tasks() and prints out
+ * all busy workqueues and pools.
  */
 void show_workqueue_state(void)
 {
diff --git a/scripts/analyze_suspend.py b/scripts/analyze_suspend.py
index 93e1fd40f430..a0ba48fa2c5e 100755
--- a/scripts/analyze_suspend.py
+++ b/scripts/analyze_suspend.py
@@ -19,6 +19,17 @@
 # Authors:
 #	 Todd Brandt <todd.e.brandt@linux.intel.com>
 #
+# Links:
+#	 Home Page
+#	   https://01.org/suspendresume
+#	 Source repo
+#	   https://github.com/01org/suspendresume
+#	 Documentation
+#	   Getting Started
+#	     https://01.org/suspendresume/documentation/getting-started
+#	   Command List:
+#	     https://01.org/suspendresume/documentation/command-list
+#
 # Description:
 #	 This tool is designed to assist kernel and OS developers in optimizing
 #	 their linux stack's suspend/resume time. Using a kernel image built
@@ -35,6 +46,8 @@
 #		 CONFIG_FTRACE=y
 #		 CONFIG_FUNCTION_TRACER=y
 #		 CONFIG_FUNCTION_GRAPH_TRACER=y
+#		 CONFIG_KPROBES=y
+#		 CONFIG_KPROBES_ON_FTRACE=y
 #
 #	 For kernel versions older than 3.15:
 #	 The following additional kernel parameters are required:
@@ -52,6 +65,7 @@ import re
 import platform
 from datetime import datetime
 import struct
+import ConfigParser
 
 # ----------------- CLASSES --------------------
 
@@ -60,8 +74,15 @@ import struct
 #	 A global, single-instance container used to
 #	 store system values and test parameters
 class SystemValues:
-	version = 3.0
+	ansi = False
+	version = '4.2'
 	verbose = False
+	addlogs = False
+	mindevlen = 0.001
+	mincglen = 1.0
+	srgap = 0
+	cgexp = False
+	outdir = ''
 	testdir = '.'
 	tpath = '/sys/kernel/debug/tracing/'
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
@@ -71,26 +92,21 @@ class SystemValues:
 		'device_pm_callback_end',
 		'device_pm_callback_start'
 	]
-	modename = {
-		'freeze': 'Suspend-To-Idle (S0)',
-		'standby': 'Power-On Suspend (S1)',
-		'mem': 'Suspend-to-RAM (S3)',
-		'disk': 'Suspend-to-disk (S4)'
-	}
+	testcommand = ''
 	mempath = '/dev/mem'
 	powerfile = '/sys/power/state'
 	suspendmode = 'mem'
 	hostname = 'localhost'
 	prefix = 'test'
 	teststamp = ''
+	dmesgstart = 0.0
 	dmesgfile = ''
 	ftracefile = ''
 	htmlfile = ''
+	embedded = False
 	rtcwake = False
 	rtcwaketime = 10
 	rtcpath = ''
-	android = False
-	adb = 'adb'
 	devicefilter = []
 	stamp = 0
 	execcount = 1
@@ -98,16 +114,90 @@ class SystemValues:
 	usecallgraph = False
 	usetraceevents = False
 	usetraceeventsonly = False
+	usetracemarkers = True
+	usekprobes = True
+	usedevsrc = False
 	notestrun = False
-	altdevname = dict()
+	devprops = dict()
 	postresumetime = 0
+	devpropfmt = '# Device Properties: .*'
 	tracertypefmt = '# tracer: (?P<t>.*)'
 	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
 	postresumefmt = '# post resume time (?P<t>[0-9]*)$'
 	stampfmt = '# suspend-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
 				'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
 				' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
+	kprobecolor = 'rgba(204,204,204,0.5)'
+	synccolor = 'rgba(204,204,204,0.5)'
+	debugfuncs = []
+	tracefuncs = {
+		'sys_sync': dict(),
+		'pm_prepare_console': dict(),
+		'pm_notifier_call_chain': dict(),
+		'freeze_processes': dict(),
+		'freeze_kernel_threads': dict(),
+		'pm_restrict_gfp_mask': dict(),
+		'acpi_suspend_begin': dict(),
+		'suspend_console': dict(),
+		'acpi_pm_prepare': dict(),
+		'syscore_suspend': dict(),
+		'arch_enable_nonboot_cpus_end': dict(),
+		'syscore_resume': dict(),
+		'acpi_pm_finish': dict(),
+		'resume_console': dict(),
+		'acpi_pm_end': dict(),
+		'pm_restore_gfp_mask': dict(),
+		'thaw_processes': dict(),
+		'pm_restore_console': dict(),
+		'CPU_OFF': {
+			'func':'_cpu_down',
+			'args_x86_64': {'cpu':'%di:s32'},
+			'format': 'CPU_OFF[{cpu}]',
+			'mask': 'CPU_.*_DOWN'
+		},
+		'CPU_ON': {
+			'func':'_cpu_up',
+			'args_x86_64': {'cpu':'%di:s32'},
+			'format': 'CPU_ON[{cpu}]',
+			'mask': 'CPU_.*_UP'
+		},
+	}
+	dev_tracefuncs = {
+		# general wait/delay/sleep
+		'msleep': { 'args_x86_64': {'time':'%di:s32'} },
+		'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'} },
+		'acpi_os_stall': dict(),
+		# ACPI
+		'acpi_resume_power_resources': dict(),
+		'acpi_ps_parse_aml': dict(),
+		# filesystem
+		'ext4_sync_fs': dict(),
+		# ATA
+		'ata_eh_recover': { 'args_x86_64': {'port':'+36(%di):s32'} },
+		# i915
+		'i915_gem_restore_gtt_mappings': dict(),
+		'intel_opregion_setup': dict(),
+		'intel_dp_detect': dict(),
+		'intel_hdmi_detect': dict(),
+		'intel_opregion_init': dict(),
+	}
+	kprobes_postresume = [
+		{
+			'name': 'ataportrst',
+			'func': 'ata_eh_recover',
+			'args': {'port':'+36(%di):s32'},
+			'format': 'ata{port}_port_reset',
+			'mask': 'ata.*_port_reset'
+		}
+	]
+	kprobes = dict()
+	timeformat = '%.3f'
 	def __init__(self):
+		# if this is a phoronix test run, set some default options
+		if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ):
+			self.embedded = True
+			self.addlogs = True
+			self.htmlfile = os.environ['LOG_FILE']
 		self.hostname = platform.node()
 		if(self.hostname == ''):
 			self.hostname = 'localhost'
@@ -118,6 +208,12 @@ class SystemValues:
 		if os.path.exists(rtc) and os.path.exists(rtc+'/date') and \
 			os.path.exists(rtc+'/time') and os.path.exists(rtc+'/wakealarm'):
 			self.rtcpath = rtc
+		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
+			self.ansi = True
+	def setPrecision(self, num):
+		if num < 0 or num > 6:
+			return
+		self.timeformat = '%.{0}f'.format(num)
 	def setOutputFile(self):
 		if((self.htmlfile == '') and (self.dmesgfile != '')):
 			m = re.match('(?P<name>.*)_dmesg\.txt$', self.dmesgfile)
@@ -129,32 +225,37 @@ class SystemValues:
 				self.htmlfile = m.group('name')+'.html'
 		if(self.htmlfile == ''):
 			self.htmlfile = 'output.html'
-	def initTestOutput(self, subdir):
-		if(not self.android):
-			self.prefix = self.hostname
-			v = open('/proc/version', 'r').read().strip()
-			kver = string.split(v)[2]
-		else:
-			self.prefix = 'android'
-			v = os.popen(self.adb+' shell cat /proc/version').read().strip()
-			kver = string.split(v)[2]
-		testtime = datetime.now().strftime('suspend-%m%d%y-%H%M%S')
+	def initTestOutput(self, subdir, testpath=''):
+		self.prefix = self.hostname
+		v = open('/proc/version', 'r').read().strip()
+		kver = string.split(v)[2]
+		n = datetime.now()
+		testtime = n.strftime('suspend-%m%d%y-%H%M%S')
+		if not testpath:
+			testpath = n.strftime('suspend-%y%m%d-%H%M%S')
 		if(subdir != "."):
-			self.testdir = subdir+"/"+testtime
+			self.testdir = subdir+"/"+testpath
 		else:
-			self.testdir = testtime
+			self.testdir = testpath
 		self.teststamp = \
 			'# '+testtime+' '+self.prefix+' '+self.suspendmode+' '+kver
+		if(self.embedded):
+			self.dmesgfile = \
+				'/tmp/'+testtime+'_'+self.suspendmode+'_dmesg.txt'
+			self.ftracefile = \
+				'/tmp/'+testtime+'_'+self.suspendmode+'_ftrace.txt'
+			return
 		self.dmesgfile = \
 			self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_dmesg.txt'
 		self.ftracefile = \
 			self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_ftrace.txt'
 		self.htmlfile = \
 			self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html'
-		os.mkdir(self.testdir)
+		if not os.path.isdir(self.testdir):
+			os.mkdir(self.testdir)
 	def setDeviceFilter(self, devnames):
 		self.devicefilter = string.split(devnames)
-	def rtcWakeAlarm(self):
+	def rtcWakeAlarmOn(self):
 		os.system('echo 0 > '+self.rtcpath+'/wakealarm')
 		outD = open(self.rtcpath+'/date', 'r').read().strip()
 		outT = open(self.rtcpath+'/time', 'r').read().strip()
@@ -172,9 +273,361 @@ class SystemValues:
 			nowtime = int(datetime.now().strftime('%s'))
 		alarm = nowtime + self.rtcwaketime
 		os.system('echo %d > %s/wakealarm' % (alarm, self.rtcpath))
+	def rtcWakeAlarmOff(self):
+		os.system('echo 0 > %s/wakealarm' % self.rtcpath)
+	def initdmesg(self):
+		# get the latest time stamp from the dmesg log
+		fp = os.popen('dmesg')
+		ktime = '0'
+		for line in fp:
+			line = line.replace('\r\n', '')
+			idx = line.find('[')
+			if idx > 1:
+				line = line[idx:]
+			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+			if(m):
+				ktime = m.group('ktime')
+		fp.close()
+		self.dmesgstart = float(ktime)
+	def getdmesg(self):
+		# store all new dmesg lines since initdmesg was called
+		fp = os.popen('dmesg')
+		op = open(self.dmesgfile, 'a')
+		for line in fp:
+			line = line.replace('\r\n', '')
+			idx = line.find('[')
+			if idx > 1:
+				line = line[idx:]
+			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+			if(not m):
+				continue
+			ktime = float(m.group('ktime'))
+			if ktime > self.dmesgstart:
+				op.write(line)
+		fp.close()
+		op.close()
+	def addFtraceFilterFunctions(self, file):
+		fp = open(file)
+		list = fp.read().split('\n')
+		fp.close()
+		for i in list:
+			if len(i) < 2:
+				continue
+			self.tracefuncs[i] = dict()
+	def getFtraceFilterFunctions(self, current):
+		rootCheck(True)
+		if not current:
+			os.system('cat '+self.tpath+'available_filter_functions')
+			return
+		fp = open(self.tpath+'available_filter_functions')
+		master = fp.read().split('\n')
+		fp.close()
+		if len(self.debugfuncs) > 0:
+			for i in self.debugfuncs:
+				if i in master:
+					print i
+				else:
+					print self.colorText(i)
+		else:
+			for i in self.tracefuncs:
+				if 'func' in self.tracefuncs[i]:
+					i = self.tracefuncs[i]['func']
+				if i in master:
+					print i
+				else:
+					print self.colorText(i)
+	def setFtraceFilterFunctions(self, list):
+		fp = open(self.tpath+'available_filter_functions')
+		master = fp.read().split('\n')
+		fp.close()
+		flist = ''
+		for i in list:
+			if i not in master:
+				continue
+			if ' [' in i:
+				flist += i.split(' ')[0]+'\n'
+			else:
+				flist += i+'\n'
+		fp = open(self.tpath+'set_graph_function', 'w')
+		fp.write(flist)
+		fp.close()
+	def kprobeMatch(self, name, target):
+		if name not in self.kprobes:
+			return False
+		if re.match(self.kprobes[name]['mask'], target):
+			return True
+		return False
+	def basicKprobe(self, name):
+		self.kprobes[name] = {'name': name,'func': name,'args': dict(),'format': name,'mask': name}
+	def defaultKprobe(self, name, kdata):
+		k = kdata
+		for field in ['name', 'format', 'mask', 'func']:
+			if field not in k:
+				k[field] = name
+		archargs = 'args_'+platform.machine()
+		if archargs in k:
+			k['args'] = k[archargs]
+		else:
+			k['args'] = dict()
+			k['format'] = name
+		self.kprobes[name] = k
+	def kprobeColor(self, name):
+		if name not in self.kprobes or 'color' not in self.kprobes[name]:
+			return ''
+		return self.kprobes[name]['color']
+	def kprobeDisplayName(self, name, dataraw):
+		if name not in self.kprobes:
+			self.basicKprobe(name)
+		data = ''
+		quote=0
+		# first remvoe any spaces inside quotes, and the quotes
+		for c in dataraw:
+			if c == '"':
+				quote = (quote + 1) % 2
+			if quote and c == ' ':
+				data += '_'
+			elif c != '"':
+				data += c
+		fmt, args = self.kprobes[name]['format'], self.kprobes[name]['args']
+		arglist = dict()
+		# now process the args
+		for arg in sorted(args):
+			arglist[arg] = ''
+			m = re.match('.* '+arg+'=(?P<arg>.*) ', data);
+			if m:
+				arglist[arg] = m.group('arg')
+			else:
+				m = re.match('.* '+arg+'=(?P<arg>.*)', data);
+				if m:
+					arglist[arg] = m.group('arg')
+		out = fmt.format(**arglist)
+		out = out.replace(' ', '_').replace('"', '')
+		return out
+	def kprobeText(self, kprobe):
+		name, fmt, func, args = kprobe['name'], kprobe['format'], kprobe['func'], kprobe['args']
+		if re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', func):
+			doError('Kprobe "%s" has format info in the function name "%s"' % (name, func), False)
+		for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', fmt):
+			if arg not in args:
+				doError('Kprobe "%s" is missing argument "%s"' % (name, arg), False)
+		val = 'p:%s_cal %s' % (name, func)
+		for i in sorted(args):
+			val += ' %s=%s' % (i, args[i])
+		val += '\nr:%s_ret %s $retval\n' % (name, func)
+		return val
+	def addKprobes(self):
+		# first test each kprobe
+		print('INITIALIZING KPROBES...')
+		rejects = []
+		for name in sorted(self.kprobes):
+			if not self.testKprobe(self.kprobes[name]):
+				rejects.append(name)
+		# remove all failed ones from the list
+		for name in rejects:
+			vprint('Skipping KPROBE: %s' % name)
+			self.kprobes.pop(name)
+		self.fsetVal('', 'kprobe_events')
+		kprobeevents = ''
+		# set the kprobes all at once
+		for kp in self.kprobes:
+			val = self.kprobeText(self.kprobes[kp])
+			vprint('Adding KPROBE: %s\n%s' % (kp, val.strip()))
+			kprobeevents += self.kprobeText(self.kprobes[kp])
+		self.fsetVal(kprobeevents, 'kprobe_events')
+		# verify that the kprobes were set as ordered
+		check = self.fgetVal('kprobe_events')
+		linesout = len(kprobeevents.split('\n'))
+		linesack = len(check.split('\n'))
+		if linesack < linesout:
+			# if not, try appending the kprobes 1 by 1
+			for kp in self.kprobes:
+				kprobeevents = self.kprobeText(self.kprobes[kp])
+				self.fsetVal(kprobeevents, 'kprobe_events', 'a')
+		self.fsetVal('1', 'events/kprobes/enable')
+	def testKprobe(self, kprobe):
+		kprobeevents = self.kprobeText(kprobe)
+		if not kprobeevents:
+			return False
+		try:
+			self.fsetVal(kprobeevents, 'kprobe_events')
+			check = self.fgetVal('kprobe_events')
+		except:
+			return False
+		linesout = len(kprobeevents.split('\n'))
+		linesack = len(check.split('\n'))
+		if linesack < linesout:
+			return False
+		return True
+	def fsetVal(self, val, path, mode='w'):
+		file = self.tpath+path
+		if not os.path.exists(file):
+			return False
+		try:
+			fp = open(file, mode)
+			fp.write(val)
+			fp.close()
+		except:
+			pass
+		return True
+	def fgetVal(self, path):
+		file = self.tpath+path
+		res = ''
+		if not os.path.exists(file):
+			return res
+		try:
+			fp = open(file, 'r')
+			res = fp.read()
+			fp.close()
+		except:
+			pass
+		return res
+	def cleanupFtrace(self):
+		if(self.usecallgraph or self.usetraceevents):
+			self.fsetVal('0', 'events/kprobes/enable')
+			self.fsetVal('', 'kprobe_events')
+	def setupAllKprobes(self):
+		for name in self.tracefuncs:
+			self.defaultKprobe(name, self.tracefuncs[name])
+		for name in self.dev_tracefuncs:
+			self.defaultKprobe(name, self.dev_tracefuncs[name])
+	def isCallgraphFunc(self, name):
+		if len(self.debugfuncs) < 1 and self.suspendmode == 'command':
+			return True
+		if name in self.debugfuncs:
+			return True
+		funclist = []
+		for i in self.tracefuncs:
+			if 'func' in self.tracefuncs[i]:
+				funclist.append(self.tracefuncs[i]['func'])
+			else:
+				funclist.append(i)
+		if name in funclist:
+			return True
+		return False
+	def initFtrace(self, testing=False):
+		tp = self.tpath
+		print('INITIALIZING FTRACE...')
+		# turn trace off
+		self.fsetVal('0', 'tracing_on')
+		self.cleanupFtrace()
+		# set the trace clock to global
+		self.fsetVal('global', 'trace_clock')
+		# set trace buffer to a huge value
+		self.fsetVal('nop', 'current_tracer')
+		self.fsetVal('100000', 'buffer_size_kb')
+		# go no further if this is just a status check
+		if testing:
+			return
+		if self.usekprobes:
+			# add tracefunc kprobes so long as were not using full callgraph
+			if(not self.usecallgraph or len(self.debugfuncs) > 0):
+				for name in self.tracefuncs:
+					self.defaultKprobe(name, self.tracefuncs[name])
+				if self.usedevsrc:
+					for name in self.dev_tracefuncs:
+						self.defaultKprobe(name, self.dev_tracefuncs[name])
+			else:
+				self.usedevsrc = False
+			self.addKprobes()
+		# initialize the callgraph trace, unless this is an x2 run
+		if(self.usecallgraph):
+			# set trace type
+			self.fsetVal('function_graph', 'current_tracer')
+			self.fsetVal('', 'set_ftrace_filter')
+			# set trace format options
+			self.fsetVal('print-parent', 'trace_options')
+			self.fsetVal('funcgraph-abstime', 'trace_options')
+			self.fsetVal('funcgraph-cpu', 'trace_options')
+			self.fsetVal('funcgraph-duration', 'trace_options')
+			self.fsetVal('funcgraph-proc', 'trace_options')
+			self.fsetVal('funcgraph-tail', 'trace_options')
+			self.fsetVal('nofuncgraph-overhead', 'trace_options')
+			self.fsetVal('context-info', 'trace_options')
+			self.fsetVal('graph-time', 'trace_options')
+			self.fsetVal('0', 'max_graph_depth')
+			if len(self.debugfuncs) > 0:
+				self.setFtraceFilterFunctions(self.debugfuncs)
+			elif self.suspendmode == 'command':
+				self.fsetVal('', 'set_graph_function')
+			else:
+				cf = ['dpm_run_callback']
+				if(self.usetraceeventsonly):
+					cf += ['dpm_prepare', 'dpm_complete']
+				for fn in self.tracefuncs:
+					if 'func' in self.tracefuncs[fn]:
+						cf.append(self.tracefuncs[fn]['func'])
+					else:
+						cf.append(fn)
+				self.setFtraceFilterFunctions(cf)
+		if(self.usetraceevents):
+			# turn trace events on
+			events = iter(self.traceevents)
+			for e in events:
+				self.fsetVal('1', 'events/power/'+e+'/enable')
+		# clear the trace buffer
+		self.fsetVal('', 'trace')
+	def verifyFtrace(self):
+		# files needed for any trace data
+		files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock',
+				 'trace_marker', 'trace_options', 'tracing_on']
+		# files needed for callgraph trace data
+		tp = self.tpath
+		if(self.usecallgraph):
+			files += [
+				'available_filter_functions',
+				'set_ftrace_filter',
+				'set_graph_function'
+			]
+		for f in files:
+			if(os.path.exists(tp+f) == False):
+				return False
+		return True
+	def verifyKprobes(self):
+		# files needed for kprobes to work
+		files = ['kprobe_events', 'events']
+		tp = self.tpath
+		for f in files:
+			if(os.path.exists(tp+f) == False):
+				return False
+		return True
+	def colorText(self, str):
+		if not self.ansi:
+			return str
+		return '\x1B[31;40m'+str+'\x1B[m'
 
 sysvals = SystemValues()
 
+# Class: DevProps
+# Description:
+#	 Simple class which holds property values collected
+#	 for all the devices used in the timeline.
+class DevProps:
+	syspath = ''
+	altname = ''
+	async = True
+	xtraclass = ''
+	xtrainfo = ''
+	def out(self, dev):
+		return '%s,%s,%d;' % (dev, self.altname, self.async)
+	def debug(self, dev):
+		print '%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async)
+	def altName(self, dev):
+		if not self.altname or self.altname == dev:
+			return dev
+		return '%s [%s]' % (self.altname, dev)
+	def xtraClass(self):
+		if self.xtraclass:
+			return ' '+self.xtraclass
+		if not self.async:
+			return ' sync'
+		return ''
+	def xtraInfo(self):
+		if self.xtraclass:
+			return ' '+self.xtraclass
+		if self.async:
+			return ' async'
+		return ' sync'
+
 # Class: DeviceNode
 # Description:
 #	 A container used to create a device hierachy, with a single root node
@@ -228,6 +681,7 @@ class Data:
 	html_device_id = 0
 	stamp = 0
 	outfile = ''
+	dev_ubiquitous = ['msleep', 'udelay']
 	def __init__(self, num):
 		idchar = 'abcdefghijklmnopqrstuvwxyz'
 		self.testnumber = num
@@ -257,6 +711,9 @@ class Data:
 								'row': 0, 'color': '#FFFFCC', 'order': 9}
 		}
 		self.phases = self.sortedPhases()
+		self.devicegroups = []
+		for phase in self.phases:
+			self.devicegroups.append([phase])
 	def getStart(self):
 		return self.dmesg[self.phases[0]]['start']
 	def setStart(self, time):
@@ -273,51 +730,61 @@ class Data:
 			for dev in list:
 				d = list[dev]
 				if(d['pid'] == pid and time >= d['start'] and
-					time <= d['end']):
+					time < d['end']):
 					return False
 		return True
-	def addIntraDevTraceEvent(self, action, name, pid, time):
-		if(action == 'mutex_lock_try'):
-			color = 'red'
-		elif(action == 'mutex_lock_pass'):
-			color = 'green'
-		elif(action == 'mutex_unlock'):
-			color = 'blue'
-		else:
-			# create separate colors based on the name
-			v1 = len(name)*10 % 256
-			v2 = string.count(name, 'e')*100 % 256
-			v3 = ord(name[0])*20 % 256
-			color = '#%06X' % ((v1*0x10000) + (v2*0x100) + v3)
-		for phase in self.phases:
-			list = self.dmesg[phase]['list']
-			for dev in list:
-				d = list[dev]
-				if(d['pid'] == pid and time >= d['start'] and
-					time <= d['end']):
-					e = TraceEvent(action, name, color, time)
-					if('traceevents' not in d):
-						d['traceevents'] = []
-					d['traceevents'].append(e)
-					return d
-					break
-		return 0
-	def capIntraDevTraceEvent(self, action, name, pid, time):
-		for phase in self.phases:
+	def targetDevice(self, phaselist, start, end, pid=-1):
+		tgtdev = ''
+		for phase in phaselist:
 			list = self.dmesg[phase]['list']
-			for dev in list:
-				d = list[dev]
-				if(d['pid'] == pid and time >= d['start'] and
-					time <= d['end']):
-					if('traceevents' not in d):
-						return
-					for e in d['traceevents']:
-						if(e.action == action and
-							e.name == name and not e.ready):
-							e.length = time - e.time
-							e.ready = True
-							break
-					return
+			for devname in list:
+				dev = list[devname]
+				if(pid >= 0 and dev['pid'] != pid):
+					continue
+				devS = dev['start']
+				devE = dev['end']
+				if(start < devS or start >= devE or end <= devS or end > devE):
+					continue
+				tgtdev = dev
+				break
+		return tgtdev
+	def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata):
+		machstart = self.dmesg['suspend_machine']['start']
+		machend = self.dmesg['resume_machine']['end']
+		tgtdev = self.targetDevice(self.phases, start, end, pid)
+		if not tgtdev and start >= machstart and end < machend:
+			# device calls in machine phases should be serial
+			tgtdev = self.targetDevice(['suspend_machine', 'resume_machine'], start, end)
+		if not tgtdev:
+			if 'scsi_eh' in proc:
+				self.newActionGlobal(proc, start, end, pid)
+				self.addDeviceFunctionCall(displayname, kprobename, proc, pid, start, end, cdata, rdata)
+			else:
+				vprint('IGNORE: %s[%s](%d) [%f - %f] | %s | %s | %s' % (displayname, kprobename,
+					pid, start, end, cdata, rdata, proc))
+			return False
+		# detail block fits within tgtdev
+		if('src' not in tgtdev):
+			tgtdev['src'] = []
+		title = cdata+' '+rdata
+		mstr = '\(.*\) *(?P<args>.*) *\((?P<caller>.*)\+.* arg1=(?P<ret>.*)'
+		m = re.match(mstr, title)
+		if m:
+			c = m.group('caller')
+			a = m.group('args').strip()
+			r = m.group('ret')
+			if len(r) > 6:
+				r = ''
+			else:
+				r = 'ret=%s ' % r
+			l = '%0.3fms' % ((end - start) * 1000)
+			if kprobename in self.dev_ubiquitous:
+				title = '%s(%s) <- %s, %s(%s)' % (displayname, a, c, r, l)
+			else:
+				title = '%s(%s) %s(%s)' % (displayname, a, r, l)
+		e = TraceEvent(title, kprobename, start, end - start)
+		tgtdev['src'].append(e)
+		return True
 	def trimTimeVal(self, t, t0, dT, left):
 		if left:
 			if(t > t0):
@@ -353,11 +820,11 @@ class Data:
 					cg.end = self.trimTimeVal(cg.end, t0, dT, left)
 					for line in cg.list:
 						line.time = self.trimTimeVal(line.time, t0, dT, left)
-				if('traceevents' in d):
-					for e in d['traceevents']:
+				if('src' in d):
+					for e in d['src']:
 						e.time = self.trimTimeVal(e.time, t0, dT, left)
 	def normalizeTime(self, tZero):
-		# first trim out any standby or freeze clock time
+		# trim out any standby or freeze clock time
 		if(self.tSuspended != self.tResumed):
 			if(self.tResumed > tZero):
 				self.trimTime(self.tSuspended, \
@@ -365,29 +832,6 @@ class Data:
 			else:
 				self.trimTime(self.tSuspended, \
 					self.tResumed-self.tSuspended, False)
-		# shift the timeline so that tZero is the new 0
-		self.tSuspended -= tZero
-		self.tResumed -= tZero
-		self.start -= tZero
-		self.end -= tZero
-		for phase in self.phases:
-			p = self.dmesg[phase]
-			p['start'] -= tZero
-			p['end'] -= tZero
-			list = p['list']
-			for name in list:
-				d = list[name]
-				d['start'] -= tZero
-				d['end'] -= tZero
-				if('ftrace' in d):
-					cg = d['ftrace']
-					cg.start -= tZero
-					cg.end -= tZero
-					for line in cg.list:
-						line.time -= tZero
-				if('traceevents' in d):
-					for e in d['traceevents']:
-						e.time -= tZero
 	def newPhaseWithSingleAction(self, phasename, devname, start, end, color):
 		for phase in self.phases:
 			self.dmesg[phase]['order'] += 1
@@ -417,6 +861,7 @@ class Data:
 			{'list': list, 'start': start, 'end': end,
 			'row': 0, 'color': color, 'order': order}
 		self.phases = self.sortedPhases()
+		self.devicegroups.append([phasename])
 	def setPhase(self, phase, ktime, isbegin):
 		if(isbegin):
 			self.dmesg[phase]['start'] = ktime
@@ -442,7 +887,10 @@ class Data:
 		for devname in phaselist:
 			dev = phaselist[devname]
 			if(dev['end'] < 0):
-				dev['end'] = end
+				for p in self.phases:
+					if self.dmesg[p]['end'] > dev['start']:
+						dev['end'] = self.dmesg[p]['end']
+						break
 				vprint('%s (%s): callback didnt return' % (devname, phase))
 	def deviceFilter(self, devicefilter):
 		# remove all by the relatives of the filter devnames
@@ -472,22 +920,58 @@ class Data:
 		# if any calls never returned, clip them at system resume end
 		for phase in self.phases:
 			self.fixupInitcalls(phase, self.getEnd())
-	def newActionGlobal(self, name, start, end):
+	def isInsideTimeline(self, start, end):
+		if(self.start <= start and self.end > start):
+			return True
+		return False
+	def phaseOverlap(self, phases):
+		rmgroups = []
+		newgroup = []
+		for group in self.devicegroups:
+			for phase in phases:
+				if phase not in group:
+					continue
+				for p in group:
+					if p not in newgroup:
+						newgroup.append(p)
+				if group not in rmgroups:
+					rmgroups.append(group)
+		for group in rmgroups:
+			self.devicegroups.remove(group)
+		self.devicegroups.append(newgroup)
+	def newActionGlobal(self, name, start, end, pid=-1, color=''):
+		# if event starts before timeline start, expand timeline
+		if(start < self.start):
+			self.setStart(start)
+		# if event ends after timeline end, expand the timeline
+		if(end > self.end):
+			self.setEnd(end)
 		# which phase is this device callback or action "in"
 		targetphase = "none"
+		htmlclass = ''
 		overlap = 0.0
+		phases = []
 		for phase in self.phases:
 			pstart = self.dmesg[phase]['start']
 			pend = self.dmesg[phase]['end']
 			o = max(0, min(end, pend) - max(start, pstart))
-			if(o > overlap):
+			if o > 0:
+				phases.append(phase)
+			if o > overlap:
+				if overlap > 0 and phase == 'post_resume':
+					continue
 				targetphase = phase
 				overlap = o
+		if pid == -2:
+			htmlclass = ' bg'
+		if len(phases) > 1:
+			htmlclass = ' bg'
+			self.phaseOverlap(phases)
 		if targetphase in self.phases:
-			self.newAction(targetphase, name, -1, '', start, end, '')
-			return True
+			newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color)
+			return (targetphase, newname)
 		return False
-	def newAction(self, phase, name, pid, parent, start, end, drv):
+	def newAction(self, phase, name, pid, parent, start, end, drv, htmlclass='', color=''):
 		# new device callback for a specific phase
 		self.html_device_id += 1
 		devid = '%s%d' % (self.idstr, self.html_device_id)
@@ -495,8 +979,19 @@ class Data:
 		length = -1.0
 		if(start >= 0 and end >= 0):
 			length = end - start
+		if pid == -2:
+			i = 2
+			origname = name
+			while(name in list):
+				name = '%s[%d]' % (origname, i)
+				i += 1
 		list[name] = {'start': start, 'end': end, 'pid': pid, 'par': parent,
 					  'length': length, 'row': 0, 'id': devid, 'drv': drv }
+		if htmlclass:
+			list[name]['htmlclass'] = htmlclass
+		if color:
+			list[name]['color'] = color
+		return name
 	def deviceIDs(self, devlist, phase):
 		idlist = []
 		list = self.dmesg[phase]['list']
@@ -536,10 +1031,21 @@ class Data:
 			vprint('    %16s: %f - %f (%d devices)' % (phase, \
 				self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc))
 		vprint('            test end: %f' % self.end)
+	def deviceChildrenAllPhases(self, devname):
+		devlist = []
+		for phase in self.phases:
+			list = self.deviceChildren(devname, phase)
+			for dev in list:
+				if dev not in devlist:
+					devlist.append(dev)
+		return devlist
 	def masterTopology(self, name, list, depth):
 		node = DeviceNode(name, depth)
 		for cname in list:
-			clist = self.deviceChildren(cname, 'resume')
+			# avoid recursions
+			if name == cname:
+				continue
+			clist = self.deviceChildrenAllPhases(cname)
 			cnode = self.masterTopology(cname, clist, depth+1)
 			node.children.append(cnode)
 		return node
@@ -580,7 +1086,8 @@ class Data:
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				pdev = list[dev]['par']
-				if(re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)):
+				pid = list[dev]['pid']
+				if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)):
 					continue
 				if pdev and pdev not in real and pdev not in rootlist:
 					rootlist.append(pdev)
@@ -589,22 +1096,33 @@ class Data:
 		rootlist = self.rootDeviceList()
 		master = self.masterTopology('', rootlist, 0)
 		return self.printTopology(master)
+	def selectTimelineDevices(self, widfmt, tTotal, mindevlen):
+		# only select devices that will actually show up in html
+		self.tdevlist = dict()
+		for phase in self.dmesg:
+			devlist = []
+			list = self.dmesg[phase]['list']
+			for dev in list:
+				length = (list[dev]['end'] - list[dev]['start']) * 1000
+				width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal)
+				if width != '0.000000' and length >= mindevlen:
+					devlist.append(dev)
+			self.tdevlist[phase] = devlist
 
 # Class: TraceEvent
 # Description:
 #	 A container for trace event data found in the ftrace file
 class TraceEvent:
-	ready = False
-	name = ''
+	text = ''
 	time = 0.0
-	color = '#FFFFFF'
 	length = 0.0
-	action = ''
-	def __init__(self, a, n, c, t):
-		self.action = a
-		self.name = n
-		self.color = c
+	title = ''
+	row = 0
+	def __init__(self, a, n, t, l):
+		self.title = a
+		self.text = n
 		self.time = t
+		self.length = l
 
 # Class: FTraceLine
 # Description:
@@ -623,11 +1141,14 @@ class FTraceLine:
 	fcall = False
 	freturn = False
 	fevent = False
+	fkprobe = False
 	depth = 0
 	name = ''
 	type = ''
-	def __init__(self, t, m, d):
+	def __init__(self, t, m='', d=''):
 		self.time = float(t)
+		if not m and not d:
+			return
 		# is this a trace event
 		if(d == 'traceevent' or re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m)):
 			if(d == 'traceevent'):
@@ -644,6 +1165,18 @@ class FTraceLine:
 				self.type = emm.group('call')
 			else:
 				self.name = msg
+			km = re.match('^(?P<n>.*)_cal$', self.type)
+			if km:
+				self.fcall = True
+				self.fkprobe = True
+				self.type = km.group('n')
+				return
+			km = re.match('^(?P<n>.*)_ret$', self.type)
+			if km:
+				self.freturn = True
+				self.fkprobe = True
+				self.type = km.group('n')
+				return
 			self.fevent = True
 			return
 		# convert the duration to seconds
@@ -662,7 +1195,7 @@ class FTraceLine:
 				# includes comment with function name
 				match = re.match('^} *\/\* *(?P<n>.*) *\*\/$', m)
 				if(match):
-					self.name = match.group('n')
+					self.name = match.group('n').strip()
 		# function call
 		else:
 			self.fcall = True
@@ -670,19 +1203,19 @@ class FTraceLine:
 			if(m[-1] == '{'):
 				match = re.match('^(?P<n>.*) *\(.*', m)
 				if(match):
-					self.name = match.group('n')
+					self.name = match.group('n').strip()
 			# function call with no children (leaf)
 			elif(m[-1] == ';'):
 				self.freturn = True
 				match = re.match('^(?P<n>.*) *\(.*', m)
 				if(match):
-					self.name = match.group('n')
+					self.name = match.group('n').strip()
 			# something else (possibly a trace marker)
 			else:
 				self.name = m
 	def getDepth(self, str):
 		return len(str)/2
-	def debugPrint(self, dev):
+	def debugPrint(self, dev=''):
 		if(self.freturn and self.fcall):
 			print('%s -- %f (%02d): %s(); (%.3f us)' % (dev, self.time, \
 				self.depth, self.name, self.length*1000000))
@@ -692,6 +1225,33 @@ class FTraceLine:
 		else:
 			print('%s -- %f (%02d): %s() { (%.3f us)' % (dev, self.time, \
 				self.depth, self.name, self.length*1000000))
+	def startMarker(self):
+		global sysvals
+		# Is this the starting line of a suspend?
+		if not self.fevent:
+			return False
+		if sysvals.usetracemarkers:
+			if(self.name == 'SUSPEND START'):
+				return True
+			return False
+		else:
+			if(self.type == 'suspend_resume' and
+				re.match('suspend_enter\[.*\] begin', self.name)):
+				return True
+			return False
+	def endMarker(self):
+		# Is this the ending line of a resume?
+		if not self.fevent:
+			return False
+		if sysvals.usetracemarkers:
+			if(self.name == 'RESUME COMPLETE'):
+				return True
+			return False
+		else:
+			if(self.type == 'suspend_resume' and
+				re.match('thaw_processes\[.*\] end', self.name)):
+				return True
+			return False
 
 # Class: FTraceCallGraph
 # Description:
@@ -705,54 +1265,124 @@ class FTraceCallGraph:
 	list = []
 	invalid = False
 	depth = 0
-	def __init__(self):
+	pid = 0
+	def __init__(self, pid):
 		self.start = -1.0
 		self.end = -1.0
 		self.list = []
 		self.depth = 0
-	def setDepth(self, line):
+		self.pid = pid
+	def addLine(self, line, debug=False):
+		# if this is already invalid, just leave
+		if(self.invalid):
+			return False
+		# invalidate on too much data or bad depth
+		if(len(self.list) >= 1000000 or self.depth < 0):
+			self.invalidate(line)
+			return False
+		# compare current depth with this lines pre-call depth
+		prelinedep = line.depth
+		if(line.freturn and not line.fcall):
+			prelinedep += 1
+		last = 0
+		lasttime = line.time
+		virtualfname = 'execution_misalignment'
+		if len(self.list) > 0:
+			last = self.list[-1]
+			lasttime = last.time
+		# handle low misalignments by inserting returns
+		if prelinedep < self.depth:
+			if debug and last:
+				print '-------- task %d --------' % self.pid
+				last.debugPrint()
+			idx = 0
+			# add return calls to get the depth down
+			while prelinedep < self.depth:
+				if debug:
+					print 'MISALIGN LOW (add returns): C%d - eC%d' % (self.depth, prelinedep)
+				self.depth -= 1
+				if idx == 0 and last and last.fcall and not last.freturn:
+					# special case, turn last call into a leaf
+					last.depth = self.depth
+					last.freturn = True
+					last.length = line.time - last.time
+					if debug:
+						last.debugPrint()
+				else:
+					vline = FTraceLine(lasttime)
+					vline.depth = self.depth
+					vline.name = virtualfname
+					vline.freturn = True
+					self.list.append(vline)
+					if debug:
+						vline.debugPrint()
+				idx += 1
+			if debug:
+				line.debugPrint()
+				print ''
+		# handle high misalignments by inserting calls
+		elif prelinedep > self.depth:
+			if debug and last:
+				print '-------- task %d --------' % self.pid
+				last.debugPrint()
+			idx = 0
+			# add calls to get the depth up
+			while prelinedep > self.depth:
+				if debug:
+					print 'MISALIGN HIGH (add calls): C%d - eC%d' % (self.depth, prelinedep)
+				if idx == 0 and line.freturn and not line.fcall:
+					# special case, turn this return into a leaf
+					line.fcall = True
+					prelinedep -= 1
+				else:
+					vline = FTraceLine(lasttime)
+					vline.depth = self.depth
+					vline.name = virtualfname
+					vline.fcall = True
+					if debug:
+						vline.debugPrint()
+					self.list.append(vline)
+					self.depth += 1
+					if not last:
+						self.start = vline.time
+				idx += 1
+			if debug:
+				line.debugPrint()
+				print ''
+		# process the call and set the new depth
 		if(line.fcall and not line.freturn):
-			line.depth = self.depth
 			self.depth += 1
 		elif(line.freturn and not line.fcall):
 			self.depth -= 1
-			line.depth = self.depth
-		else:
-			line.depth = self.depth
-	def addLine(self, line, match):
-		if(not self.invalid):
-			self.setDepth(line)
+		if len(self.list) < 1:
+			self.start = line.time
+		self.list.append(line)
 		if(line.depth == 0 and line.freturn):
 			if(self.start < 0):
 				self.start = line.time
 			self.end = line.time
-			self.list.append(line)
+			if line.fcall:
+				self.end += line.length
+			if self.list[0].name == virtualfname:
+				self.invalid = True
 			return True
-		if(self.invalid):
-			return False
-		if(len(self.list) >= 1000000 or self.depth < 0):
-			if(len(self.list) > 0):
-				first = self.list[0]
-				self.list = []
-				self.list.append(first)
-			self.invalid = True
-			if(not match):
-				return False
-			id = 'task %s cpu %s' % (match.group('pid'), match.group('cpu'))
-			window = '(%f - %f)' % (self.start, line.time)
-			if(self.depth < 0):
-				print('Too much data for '+id+\
-					' (buffer overflow), ignoring this callback')
-			else:
-				print('Too much data for '+id+\
-					' '+window+', ignoring this callback')
-			return False
-		self.list.append(line)
-		if(self.start < 0):
-			self.start = line.time
 		return False
+	def invalidate(self, line):
+		if(len(self.list) > 0):
+			first = self.list[0]
+			self.list = []
+			self.list.append(first)
+		self.invalid = True
+		id = 'task %s' % (self.pid)
+		window = '(%f - %f)' % (self.start, line.time)
+		if(self.depth < 0):
+			vprint('Too much data for '+id+\
+				' (buffer overflow), ignoring this callback')
+		else:
+			vprint('Too much data for '+id+\
+				' '+window+', ignoring this callback')
 	def slice(self, t0, tN):
-		minicg = FTraceCallGraph()
+		minicg = FTraceCallGraph(0)
 		count = -1
 		firstdepth = 0
 		for l in self.list:
@@ -764,13 +1394,26 @@ class FTraceCallGraph:
 				firstdepth = l.depth
 				count = 0
 			l.depth -= firstdepth
-			minicg.addLine(l, 0)
+			minicg.addLine(l)
 			if((count == 0 and l.freturn and l.fcall) or
 				(count > 0 and l.depth <= 0)):
 				break
 			count += 1
 		return minicg
-	def sanityCheck(self):
+	def repair(self, enddepth):
+		# bring the depth back to 0 with additional returns
+		fixed = False
+		last = self.list[-1]
+		for i in reversed(range(enddepth)):
+			t = FTraceLine(last.time)
+			t.depth = i
+			t.freturn = True
+			fixed = self.addLine(t)
+			if fixed:
+				self.end = last.time
+				return True
+		return False
+	def postProcess(self, debug=False):
 		stack = dict()
 		cnt = 0
 		for l in self.list:
@@ -779,98 +1422,317 @@ class FTraceCallGraph:
 				cnt += 1
 			elif(l.freturn and not l.fcall):
 				if(l.depth not in stack):
+					if debug:
+						print 'Post Process Error: Depth missing'
+						l.debugPrint()
 					return False
+				# transfer total time from return line to call line
 				stack[l.depth].length = l.length
-				stack[l.depth] = 0
+				stack.pop(l.depth)
 				l.length = 0
 				cnt -= 1
 		if(cnt == 0):
+			# trace caught the whole call tree
 			return True
-		return False
-	def debugPrint(self, filename):
-		if(filename == 'stdout'):
-			print('[%f - %f]') % (self.start, self.end)
-			for l in self.list:
-				if(l.freturn and l.fcall):
-					print('%f (%02d): %s(); (%.3f us)' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				elif(l.freturn):
-					print('%f (%02d): %s} (%.3f us)' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				else:
-					print('%f (%02d): %s() { (%.3f us)' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-			print(' ')
-		else:
-			fp = open(filename, 'w')
-			print(filename)
-			for l in self.list:
-				if(l.freturn and l.fcall):
-					fp.write('%f (%02d): %s(); (%.3f us)\n' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				elif(l.freturn):
-					fp.write('%f (%02d): %s} (%.3f us)\n' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-				else:
-					fp.write('%f (%02d): %s() { (%.3f us)\n' % (l.time, \
-						l.depth, l.name, l.length*1000000))
-			fp.close()
+		elif(cnt < 0):
+			if debug:
+				print 'Post Process Error: Depth is less than 0'
+			return False
+		# trace ended before call tree finished
+		return self.repair(cnt)
+	def deviceMatch(self, pid, data):
+		found = False
+		# add the callgraph data to the device hierarchy
+		borderphase = {
+			'dpm_prepare': 'suspend_prepare',
+			'dpm_complete': 'resume_complete'
+		}
+		if(self.list[0].name in borderphase):
+			p = borderphase[self.list[0].name]
+			list = data.dmesg[p]['list']
+			for devname in list:
+				dev = list[devname]
+				if(pid == dev['pid'] and
+					self.start <= dev['start'] and
+					self.end >= dev['end']):
+					dev['ftrace'] = self.slice(dev['start'], dev['end'])
+					found = True
+			return found
+		for p in data.phases:
+			if(data.dmesg[p]['start'] <= self.start and
+				self.start <= data.dmesg[p]['end']):
+				list = data.dmesg[p]['list']
+				for devname in list:
+					dev = list[devname]
+					if(pid == dev['pid'] and
+						self.start <= dev['start'] and
+						self.end >= dev['end']):
+						dev['ftrace'] = self
+						found = True
+						break
+				break
+		return found
+	def newActionFromFunction(self, data):
+		name = self.list[0].name
+		if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']:
+			return
+		fs = self.start
+		fe = self.end
+		if fs < data.start or fe > data.end:
+			return
+		phase = ''
+		for p in data.phases:
+			if(data.dmesg[p]['start'] <= self.start and
+				self.start < data.dmesg[p]['end']):
+				phase = p
+				break
+		if not phase:
+			return
+		out = data.newActionGlobal(name, fs, fe, -2)
+		if out:
+			phase, myname = out
+			data.dmesg[phase]['list'][myname]['ftrace'] = self
+	def debugPrint(self):
+		print('[%f - %f] %s (%d)') % (self.start, self.end, self.list[0].name, self.pid)
+		for l in self.list:
+			if(l.freturn and l.fcall):
+				print('%f (%02d): %s(); (%.3f us)' % (l.time, \
+					l.depth, l.name, l.length*1000000))
+			elif(l.freturn):
+				print('%f (%02d): %s} (%.3f us)' % (l.time, \
+					l.depth, l.name, l.length*1000000))
+			else:
+				print('%f (%02d): %s() { (%.3f us)' % (l.time, \
+					l.depth, l.name, l.length*1000000))
+		print(' ')
 
 # Class: Timeline
 # Description:
-#	 A container for a suspend/resume html timeline. In older versions
-#	 of the script there were multiple timelines, but in the latest
-#	 there is only one.
+#	 A container for a device timeline which calculates
+#	 all the html properties to display it correctly
 class Timeline:
 	html = {}
-	scaleH = 0.0 # height of the row as a percent of the timeline height
-	rowH = 0.0 # height of each row in percent of the timeline height
-	row_height_pixels = 30
-	maxrows = 0
-	height = 0
-	def __init__(self):
+	height = 0	# total timeline height
+	scaleH = 20	# timescale (top) row height
+	rowH = 30	# device row height
+	bodyH = 0	# body height
+	rows = 0	# total timeline rows
+	phases = []
+	rowmaxlines = dict()
+	rowcount = dict()
+	rowheight = dict()
+	def __init__(self, rowheight):
+		self.rowH = rowheight
 		self.html = {
+			'header': '',
 			'timeline': '',
 			'legend': '',
-			'scale': ''
 		}
-	def setRows(self, rows):
-		self.maxrows = int(rows)
-		self.scaleH = 100.0/float(self.maxrows)
-		self.height = self.maxrows*self.row_height_pixels
-		r = float(self.maxrows - 1)
-		if(r < 1.0):
-			r = 1.0
-		self.rowH = (100.0 - self.scaleH)/r
+	# Function: getDeviceRows
+	# Description:
+	#    determine how may rows the device funcs will take
+	# Arguments:
+	#	 rawlist: the list of devices/actions for a single phase
+	# Output:
+	#	 The total number of rows needed to display this phase of the timeline
+	def getDeviceRows(self, rawlist):
+		# clear all rows and set them to undefined
+		lendict = dict()
+		for item in rawlist:
+			item.row = -1
+			lendict[item] = item.length
+		list = []
+		for i in sorted(lendict, key=lendict.get, reverse=True):
+			list.append(i)
+		remaining = len(list)
+		rowdata = dict()
+		row = 1
+		# try to pack each row with as many ranges as possible
+		while(remaining > 0):
+			if(row not in rowdata):
+				rowdata[row] = []
+			for i in list:
+				if(i.row >= 0):
+					continue
+				s = i.time
+				e = i.time + i.length
+				valid = True
+				for ritem in rowdata[row]:
+					rs = ritem.time
+					re = ritem.time + ritem.length
+					if(not (((s <= rs) and (e <= rs)) or
+						((s >= re) and (e >= re)))):
+						valid = False
+						break
+				if(valid):
+					rowdata[row].append(i)
+					i.row = row
+					remaining -= 1
+			row += 1
+		return row
+	# Function: getPhaseRows
+	# Description:
+	#	 Organize the timeline entries into the smallest
+	#	 number of rows possible, with no entry overlapping
+	# Arguments:
+	#	 list: the list of devices/actions for a single phase
+	#	 devlist: string list of device names to use
+	# Output:
+	#	 The total number of rows needed to display this phase of the timeline
+	def getPhaseRows(self, dmesg, devlist):
+		# clear all rows and set them to undefined
+		remaining = len(devlist)
+		rowdata = dict()
+		row = 0
+		lendict = dict()
+		myphases = []
+		for item in devlist:
+			if item[0] not in self.phases:
+				self.phases.append(item[0])
+			if item[0] not in myphases:
+				myphases.append(item[0])
+				self.rowmaxlines[item[0]] = dict()
+				self.rowheight[item[0]] = dict()
+			dev = dmesg[item[0]]['list'][item[1]]
+			dev['row'] = -1
+			lendict[item] = float(dev['end']) - float(dev['start'])
+			if 'src' in dev:
+				dev['devrows'] = self.getDeviceRows(dev['src'])
+		lenlist = []
+		for i in sorted(lendict, key=lendict.get, reverse=True):
+			lenlist.append(i)
+		orderedlist = []
+		for item in lenlist:
+			dev = dmesg[item[0]]['list'][item[1]]
+			if dev['pid'] == -2:
+				orderedlist.append(item)
+		for item in lenlist:
+			if item not in orderedlist:
+				orderedlist.append(item)
+		# try to pack each row with as many ranges as possible
+		while(remaining > 0):
+			rowheight = 1
+			if(row not in rowdata):
+				rowdata[row] = []
+			for item in orderedlist:
+				dev = dmesg[item[0]]['list'][item[1]]
+				if(dev['row'] < 0):
+					s = dev['start']
+					e = dev['end']
+					valid = True
+					for ritem in rowdata[row]:
+						rs = ritem['start']
+						re = ritem['end']
+						if(not (((s <= rs) and (e <= rs)) or
+							((s >= re) and (e >= re)))):
+							valid = False
+							break
+					if(valid):
+						rowdata[row].append(dev)
+						dev['row'] = row
+						remaining -= 1
+						if 'devrows' in dev and dev['devrows'] > rowheight:
+							rowheight = dev['devrows']
+			for phase in myphases:
+				self.rowmaxlines[phase][row] = rowheight
+				self.rowheight[phase][row] = rowheight * self.rowH
+			row += 1
+		if(row > self.rows):
+			self.rows = int(row)
+		for phase in myphases:
+			self.rowcount[phase] = row
+		return row
+	def phaseRowHeight(self, phase, row):
+		return self.rowheight[phase][row]
+	def phaseRowTop(self, phase, row):
+		top = 0
+		for i in sorted(self.rowheight[phase]):
+			if i >= row:
+				break
+			top += self.rowheight[phase][i]
+		return top
+	# Function: calcTotalRows
+	# Description:
+	#	 Calculate the heights and offsets for the header and rows
+	def calcTotalRows(self):
+		maxrows = 0
+		standardphases = []
+		for phase in self.phases:
+			total = 0
+			for i in sorted(self.rowmaxlines[phase]):
+				total += self.rowmaxlines[phase][i]
+			if total > maxrows:
+				maxrows = total
+			if total == self.rowcount[phase]:
+				standardphases.append(phase)
+		self.height = self.scaleH + (maxrows*self.rowH)
+		self.bodyH = self.height - self.scaleH
+		for phase in standardphases:
+			for i in sorted(self.rowheight[phase]):
+				self.rowheight[phase][i] = self.bodyH/self.rowcount[phase]
+	# Function: createTimeScale
+	# Description:
+	#	 Create the timescale for a timeline block
+	# Arguments:
+	#	 m0: start time (mode begin)
+	#	 mMax: end time (mode end)
+	#	 tTotal: total timeline time
+	#	 mode: suspend or resume
+	# Output:
+	#	 The html code needed to display the time scale
+	def createTimeScale(self, m0, mMax, tTotal, mode):
+		timescale = '<div class="t" style="right:{0}%">{1}</div>\n'
+		rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">Resume</div>\n'
+		output = '<div class="timescale">\n'
+		# set scale for timeline
+		mTotal = mMax - m0
+		tS = 0.1
+		if(tTotal <= 0):
+			return output+'</div>\n'
+		if(tTotal > 4):
+			tS = 1
+		divTotal = int(mTotal/tS) + 1
+		divEdge = (mTotal - tS*(divTotal-1))*100/mTotal
+		for i in range(divTotal):
+			htmlline = ''
+			if(mode == 'resume'):
+				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal))
+				val = '%0.fms' % (float(i)*tS*1000)
+				htmlline = timescale.format(pos, val)
+				if(i == 0):
+					htmlline = rline
+			else:
+				pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge)
+				val = '%0.fms' % (float(i-divTotal+1)*tS*1000)
+				if(i == divTotal - 1):
+					val = 'Suspend'
+				htmlline = timescale.format(pos, val)
+			output += htmlline
+		output += '</div>\n'
+		return output
 
-# Class: TestRun
+# Class: TestProps
 # Description:
-#	 A container for a suspend/resume test run. This is necessary as
-#	 there could be more than one, and they need to be separate.
-class TestRun:
+#	 A list of values describing the properties of these test runs
+class TestProps:
+	stamp = ''
+	tracertype = ''
+	S0i3 = False
+	fwdata = []
 	ftrace_line_fmt_fg = \
 		'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
-		'[ +!]*(?P<dur>[0-9\.]*) .*\|  (?P<msg>.*)'
+		'[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\|  (?P<msg>.*)'
 	ftrace_line_fmt_nop = \
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\
 		'(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\
 		'(?P<msg>.*)'
 	ftrace_line_fmt = ftrace_line_fmt_nop
 	cgformat = False
-	ftemp = dict()
-	ttemp = dict()
-	inthepipe = False
-	tracertype = ''
 	data = 0
-	def __init__(self, dataobj):
-		self.data = dataobj
-		self.ftemp = dict()
-		self.ttemp = dict()
-	def isReady(self):
-		if(tracertype == '' or not data):
-			return False
-		return True
+	ktemp = dict()
+	def __init__(self):
+		self.ktemp = dict()
 	def setTracerType(self, tracer):
 		self.tracertype = tracer
 		if(tracer == 'function_graph'):
@@ -881,6 +1743,19 @@ class TestRun:
 		else:
 			doError('Invalid tracer format: [%s]' % tracer, False)
 
+# Class: TestRun
+# Description:
+#	 A container for a suspend/resume test run. This is necessary as
+#	 there could be more than one, and they need to be separate.
+class TestRun:
+	ftemp = dict()
+	ttemp = dict()
+	data = 0
+	def __init__(self, dataobj):
+		self.data = dataobj
+		self.ftemp = dict()
+		self.ttemp = dict()
+
 # ----------------- FUNCTIONS --------------------
 
 # Function: vprint
@@ -893,104 +1768,16 @@ def vprint(msg):
 	if(sysvals.verbose):
 		print(msg)
 
-# Function: initFtrace
-# Description:
-#	 Configure ftrace to use trace events and/or a callgraph
-def initFtrace():
-	global sysvals
-
-	tp = sysvals.tpath
-	cf = 'dpm_run_callback'
-	if(sysvals.usetraceeventsonly):
-		cf = '-e dpm_prepare -e dpm_complete -e dpm_run_callback'
-	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		print('INITIALIZING FTRACE...')
-		# turn trace off
-		os.system('echo 0 > '+tp+'tracing_on')
-		# set the trace clock to global
-		os.system('echo global > '+tp+'trace_clock')
-		# set trace buffer to a huge value
-		os.system('echo nop > '+tp+'current_tracer')
-		os.system('echo 100000 > '+tp+'buffer_size_kb')
-		# initialize the callgraph trace, unless this is an x2 run
-		if(sysvals.usecallgraph and sysvals.execcount == 1):
-			# set trace type
-			os.system('echo function_graph > '+tp+'current_tracer')
-			os.system('echo "" > '+tp+'set_ftrace_filter')
-			# set trace format options
-			os.system('echo funcgraph-abstime > '+tp+'trace_options')
-			os.system('echo funcgraph-proc > '+tp+'trace_options')
-			# focus only on device suspend and resume
-			os.system('cat '+tp+'available_filter_functions | grep '+\
-				cf+' > '+tp+'set_graph_function')
-		if(sysvals.usetraceevents):
-			# turn trace events on
-			events = iter(sysvals.traceevents)
-			for e in events:
-				os.system('echo 1 > '+sysvals.epath+e+'/enable')
-		# clear the trace buffer
-		os.system('echo "" > '+tp+'trace')
-
-# Function: initFtraceAndroid
-# Description:
-#	 Configure ftrace to capture trace events
-def initFtraceAndroid():
-	global sysvals
-
-	tp = sysvals.tpath
-	if(sysvals.usetraceevents):
-		print('INITIALIZING FTRACE...')
-		# turn trace off
-		os.system(sysvals.adb+" shell 'echo 0 > "+tp+"tracing_on'")
-		# set the trace clock to global
-		os.system(sysvals.adb+" shell 'echo global > "+tp+"trace_clock'")
-		# set trace buffer to a huge value
-		os.system(sysvals.adb+" shell 'echo nop > "+tp+"current_tracer'")
-		os.system(sysvals.adb+" shell 'echo 10000 > "+tp+"buffer_size_kb'")
-		# turn trace events on
-		events = iter(sysvals.traceevents)
-		for e in events:
-			os.system(sysvals.adb+" shell 'echo 1 > "+\
-				sysvals.epath+e+"/enable'")
-		# clear the trace buffer
-		os.system(sysvals.adb+" shell 'echo \"\" > "+tp+"trace'")
-
-# Function: verifyFtrace
-# Description:
-#	 Check that ftrace is working on the system
-# Output:
-#	 True or False
-def verifyFtrace():
-	global sysvals
-	# files needed for any trace data
-	files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock',
-			 'trace_marker', 'trace_options', 'tracing_on']
-	# files needed for callgraph trace data
-	tp = sysvals.tpath
-	if(sysvals.usecallgraph):
-		files += [
-			'available_filter_functions',
-			'set_ftrace_filter',
-			'set_graph_function'
-		]
-	for f in files:
-		if(sysvals.android):
-			out = os.popen(sysvals.adb+' shell ls '+tp+f).read().strip()
-			if(out != tp+f):
-				return False
-		else:
-			if(os.path.exists(tp+f) == False):
-				return False
-	return True
-
 # Function: parseStamp
 # Description:
 #	 Pull in the stamp comment line from the data file(s),
 #	 create the stamp, and add it to the global sysvals object
 # Arguments:
 #	 m: the valid re.match output for the stamp line
-def parseStamp(m, data):
+def parseStamp(line, data):
 	global sysvals
+
+	m = re.match(sysvals.stampfmt, line)
 	data.stamp = {'time': '', 'host': '', 'mode': ''}
 	dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
 		int(m.group('d')), int(m.group('H')), int(m.group('M')),
@@ -999,6 +1786,7 @@ def parseStamp(m, data):
 	data.stamp['host'] = m.group('host')
 	data.stamp['mode'] = m.group('mode')
 	data.stamp['kernel'] = m.group('kernel')
+	sysvals.hostname = data.stamp['host']
 	sysvals.suspendmode = data.stamp['mode']
 	if not sysvals.stamp:
 		sysvals.stamp = data.stamp
@@ -1031,14 +1819,35 @@ def diffStamp(stamp1, stamp2):
 def doesTraceLogHaveTraceEvents():
 	global sysvals
 
+	# check for kprobes
+	sysvals.usekprobes = False
+	out = os.system('grep -q "_cal: (" '+sysvals.ftracefile)
+	if(out == 0):
+		sysvals.usekprobes = True
+	# check for callgraph data on trace event blocks
+	out = os.system('grep -q "_cpu_down()" '+sysvals.ftracefile)
+	if(out == 0):
+		sysvals.usekprobes = True
+	out = os.popen('head -1 '+sysvals.ftracefile).read().replace('\n', '')
+	m = re.match(sysvals.stampfmt, out)
+	if m and m.group('mode') == 'command':
+		sysvals.usetraceeventsonly = True
+		sysvals.usetraceevents = True
+		return
+	# figure out what level of trace events are supported
 	sysvals.usetraceeventsonly = True
 	sysvals.usetraceevents = False
 	for e in sysvals.traceevents:
-		out = os.popen('cat '+sysvals.ftracefile+' | grep "'+e+': "').read()
-		if(not out):
+		out = os.system('grep -q "'+e+': " '+sysvals.ftracefile)
+		if(out != 0):
 			sysvals.usetraceeventsonly = False
-		if(e == 'suspend_resume' and out):
+		if(e == 'suspend_resume' and out == 0):
 			sysvals.usetraceevents = True
+	# determine is this log is properly formatted
+	for e in ['SUSPEND START', 'RESUME COMPLETE']:
+		out = os.system('grep -q "'+e+'" '+sysvals.ftracefile)
+		if(out != 0):
+			sysvals.usetracemarkers = False
 
 # Function: appendIncompleteTraceLog
 # Description:
@@ -1055,44 +1864,42 @@ def appendIncompleteTraceLog(testruns):
 
 	# create TestRun vessels for ftrace parsing
 	testcnt = len(testruns)
-	testidx = -1
+	testidx = 0
 	testrun = []
 	for data in testruns:
 		testrun.append(TestRun(data))
 
 	# extract the callgraph and traceevent data
 	vprint('Analyzing the ftrace data...')
+	tp = TestProps()
 	tf = open(sysvals.ftracefile, 'r')
+	data = 0
 	for line in tf:
 		# remove any latent carriage returns
 		line = line.replace('\r\n', '')
-		# grab the time stamp first (signifies the start of the test run)
+		# grab the time stamp
 		m = re.match(sysvals.stampfmt, line)
 		if(m):
-			testidx += 1
-			parseStamp(m, testrun[testidx].data)
-			continue
-		# pull out any firmware data
-		if(re.match(sysvals.firmwarefmt, line)):
-			continue
-		# if we havent found a test time stamp yet keep spinning til we do
-		if(testidx < 0):
+			tp.stamp = line
 			continue
 		# determine the trace data type (required for further parsing)
 		m = re.match(sysvals.tracertypefmt, line)
 		if(m):
-			tracer = m.group('t')
-			testrun[testidx].setTracerType(tracer)
+			tp.setTracerType(m.group('t'))
+			continue
+		# device properties line
+		if(re.match(sysvals.devpropfmt, line)):
+			devProps(line)
 			continue
-		# parse only valid lines, if this isnt one move on
-		m = re.match(testrun[testidx].ftrace_line_fmt, line)
+		# parse only valid lines, if this is not one move on
+		m = re.match(tp.ftrace_line_fmt, line)
 		if(not m):
 			continue
 		# gather the basic message data from the line
 		m_time = m.group('time')
 		m_pid = m.group('pid')
 		m_msg = m.group('msg')
-		if(testrun[testidx].cgformat):
+		if(tp.cgformat):
 			m_param3 = m.group('dur')
 		else:
 			m_param3 = 'traceevent'
@@ -1104,119 +1911,114 @@ def appendIncompleteTraceLog(testruns):
 		# the line should be a call, return, or event
 		if(not t.fcall and not t.freturn and not t.fevent):
 			continue
-		# only parse the ftrace data during suspend/resume
-		data = testrun[testidx].data
-		if(not testrun[testidx].inthepipe):
-			# look for the suspend start marker
-			if(t.fevent):
-				if(t.name == 'SUSPEND START'):
-					testrun[testidx].inthepipe = True
-					data.setStart(t.time)
+		# look for the suspend start marker
+		if(t.startMarker()):
+			data = testrun[testidx].data
+			parseStamp(tp.stamp, data)
+			data.setStart(t.time)
+			continue
+		if(not data):
+			continue
+		# find the end of resume
+		if(t.endMarker()):
+			data.setEnd(t.time)
+			testidx += 1
+			if(testidx >= testcnt):
+				break
+			continue
+		# trace event processing
+		if(t.fevent):
+			# general trace events have two types, begin and end
+			if(re.match('(?P<name>.*) begin$', t.name)):
+				isbegin = True
+			elif(re.match('(?P<name>.*) end$', t.name)):
+				isbegin = False
+			else:
 				continue
-		else:
-			# trace event processing
-			if(t.fevent):
-				if(t.name == 'RESUME COMPLETE'):
-					testrun[testidx].inthepipe = False
-					data.setEnd(t.time)
-					if(testidx == testcnt - 1):
-						break
-					continue
-				# general trace events have two types, begin and end
-				if(re.match('(?P<name>.*) begin$', t.name)):
-					isbegin = True
-				elif(re.match('(?P<name>.*) end$', t.name)):
-					isbegin = False
+			m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
+			if(m):
+				val = m.group('val')
+				if val == '0':
+					name = m.group('name')
 				else:
-					continue
-				m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
-				if(m):
-					val = m.group('val')
-					if val == '0':
-						name = m.group('name')
-					else:
-						name = m.group('name')+'['+val+']'
+					name = m.group('name')+'['+val+']'
+			else:
+				m = re.match('(?P<name>.*) .*', t.name)
+				name = m.group('name')
+			# special processing for trace events
+			if re.match('dpm_prepare\[.*', name):
+				continue
+			elif re.match('machine_suspend.*', name):
+				continue
+			elif re.match('suspend_enter\[.*', name):
+				if(not isbegin):
+					data.dmesg['suspend_prepare']['end'] = t.time
+				continue
+			elif re.match('dpm_suspend\[.*', name):
+				if(not isbegin):
+					data.dmesg['suspend']['end'] = t.time
+				continue
+			elif re.match('dpm_suspend_late\[.*', name):
+				if(isbegin):
+					data.dmesg['suspend_late']['start'] = t.time
 				else:
-					m = re.match('(?P<name>.*) .*', t.name)
-					name = m.group('name')
-				# special processing for trace events
-				if re.match('dpm_prepare\[.*', name):
-					continue
-				elif re.match('machine_suspend.*', name):
-					continue
-				elif re.match('suspend_enter\[.*', name):
-					if(not isbegin):
-						data.dmesg['suspend_prepare']['end'] = t.time
-					continue
-				elif re.match('dpm_suspend\[.*', name):
-					if(not isbegin):
-						data.dmesg['suspend']['end'] = t.time
-					continue
-				elif re.match('dpm_suspend_late\[.*', name):
-					if(isbegin):
-						data.dmesg['suspend_late']['start'] = t.time
-					else:
-						data.dmesg['suspend_late']['end'] = t.time
-					continue
-				elif re.match('dpm_suspend_noirq\[.*', name):
-					if(isbegin):
-						data.dmesg['suspend_noirq']['start'] = t.time
-					else:
-						data.dmesg['suspend_noirq']['end'] = t.time
-					continue
-				elif re.match('dpm_resume_noirq\[.*', name):
-					if(isbegin):
-						data.dmesg['resume_machine']['end'] = t.time
-						data.dmesg['resume_noirq']['start'] = t.time
-					else:
-						data.dmesg['resume_noirq']['end'] = t.time
-					continue
-				elif re.match('dpm_resume_early\[.*', name):
-					if(isbegin):
-						data.dmesg['resume_early']['start'] = t.time
-					else:
-						data.dmesg['resume_early']['end'] = t.time
-					continue
-				elif re.match('dpm_resume\[.*', name):
-					if(isbegin):
-						data.dmesg['resume']['start'] = t.time
-					else:
-						data.dmesg['resume']['end'] = t.time
-					continue
-				elif re.match('dpm_complete\[.*', name):
-					if(isbegin):
-						data.dmesg['resume_complete']['start'] = t.time
-					else:
-						data.dmesg['resume_complete']['end'] = t.time
-					continue
-				# is this trace event outside of the devices calls
-				if(data.isTraceEventOutsideDeviceCalls(pid, t.time)):
-					# global events (outside device calls) are simply graphed
-					if(isbegin):
-						# store each trace event in ttemp
-						if(name not in testrun[testidx].ttemp):
-							testrun[testidx].ttemp[name] = []
-						testrun[testidx].ttemp[name].append(\
-							{'begin': t.time, 'end': t.time})
-					else:
-						# finish off matching trace event in ttemp
-						if(name in testrun[testidx].ttemp):
-							testrun[testidx].ttemp[name][-1]['end'] = t.time
+					data.dmesg['suspend_late']['end'] = t.time
+				continue
+			elif re.match('dpm_suspend_noirq\[.*', name):
+				if(isbegin):
+					data.dmesg['suspend_noirq']['start'] = t.time
 				else:
-					if(isbegin):
-						data.addIntraDevTraceEvent('', name, pid, t.time)
-					else:
-						data.capIntraDevTraceEvent('', name, pid, t.time)
-			# call/return processing
-			elif sysvals.usecallgraph:
-				# create a callgraph object for the data
-				if(pid not in testrun[testidx].ftemp):
-					testrun[testidx].ftemp[pid] = []
-					testrun[testidx].ftemp[pid].append(FTraceCallGraph())
-				# when the call is finished, see which device matches it
-				cg = testrun[testidx].ftemp[pid][-1]
-				if(cg.addLine(t, m)):
-					testrun[testidx].ftemp[pid].append(FTraceCallGraph())
+					data.dmesg['suspend_noirq']['end'] = t.time
+				continue
+			elif re.match('dpm_resume_noirq\[.*', name):
+				if(isbegin):
+					data.dmesg['resume_machine']['end'] = t.time
+					data.dmesg['resume_noirq']['start'] = t.time
+				else:
+					data.dmesg['resume_noirq']['end'] = t.time
+				continue
+			elif re.match('dpm_resume_early\[.*', name):
+				if(isbegin):
+					data.dmesg['resume_early']['start'] = t.time
+				else:
+					data.dmesg['resume_early']['end'] = t.time
+				continue
+			elif re.match('dpm_resume\[.*', name):
+				if(isbegin):
+					data.dmesg['resume']['start'] = t.time
+				else:
+					data.dmesg['resume']['end'] = t.time
+				continue
+			elif re.match('dpm_complete\[.*', name):
+				if(isbegin):
+					data.dmesg['resume_complete']['start'] = t.time
+				else:
+					data.dmesg['resume_complete']['end'] = t.time
+				continue
+			# skip trace events inside devices calls
+			if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
+				continue
+			# global events (outside device calls) are simply graphed
+			if(isbegin):
+				# store each trace event in ttemp
+				if(name not in testrun[testidx].ttemp):
+					testrun[testidx].ttemp[name] = []
+				testrun[testidx].ttemp[name].append(\
+					{'begin': t.time, 'end': t.time})
+			else:
+				# finish off matching trace event in ttemp
+				if(name in testrun[testidx].ttemp):
+					testrun[testidx].ttemp[name][-1]['end'] = t.time
+		# call/return processing
+		elif sysvals.usecallgraph:
+			# create a callgraph object for the data
+			if(pid not in testrun[testidx].ftemp):
+				testrun[testidx].ftemp[pid] = []
+				testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid))
+			# when the call is finished, see which device matches it
+			cg = testrun[testidx].ftemp[pid][-1]
+			if(cg.addLine(t)):
+				testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid))
 	tf.close()
 
 	for test in testrun:
@@ -1224,20 +2026,14 @@ def appendIncompleteTraceLog(testruns):
 		if(sysvals.usetraceevents):
 			for name in test.ttemp:
 				for event in test.ttemp[name]:
-					begin = event['begin']
-					end = event['end']
-					# if event starts before timeline start, expand timeline
-					if(begin < test.data.start):
-						test.data.setStart(begin)
-					# if event ends after timeline end, expand the timeline
-					if(end > test.data.end):
-						test.data.setEnd(end)
-					test.data.newActionGlobal(name, begin, end)
+					test.data.newActionGlobal(name, event['begin'], event['end'])
 
 		# add the callgraph data to the device hierarchy
 		for pid in test.ftemp:
 			for cg in test.ftemp[pid]:
-				if(not cg.sanityCheck()):
+				if len(cg.list) < 1 or cg.invalid:
+					continue
+				if(not cg.postProcess()):
 					id = 'task %s cpu %s' % (pid, m.group('cpu'))
 					vprint('Sanity check failed for '+\
 						id+', ignoring this callback')
@@ -1259,14 +2055,6 @@ def appendIncompleteTraceLog(testruns):
 		if(sysvals.verbose):
 			test.data.printDetails()
 
-
-	# add the time in between the tests as a new phase so we can see it
-	if(len(testruns) > 1):
-		t1e = testruns[0].getEnd()
-		t2s = testruns[-1].getStart()
-		testruns[-1].newPhaseWithSingleAction('user mode', \
-			'user mode', t1e, t2s, '#FF9966')
-
 # Function: parseTraceLog
 # Description:
 #	 Analyze an ftrace log output file generated from this app during
@@ -1280,9 +2068,16 @@ def parseTraceLog():
 
 	vprint('Analyzing the ftrace data...')
 	if(os.path.exists(sysvals.ftracefile) == False):
-		doError('%s doesnt exist' % sysvals.ftracefile, False)
+		doError('%s does not exist' % sysvals.ftracefile, False)
+
+	sysvals.setupAllKprobes()
+	tracewatch = ['suspend_enter']
+	if sysvals.usekprobes:
+		tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
+			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', 'CPU_OFF']
 
 	# extract the callgraph and traceevent data
+	tp = TestProps()
 	testruns = []
 	testdata = []
 	testrun = 0
@@ -1295,27 +2090,17 @@ def parseTraceLog():
 		# stamp line: each stamp means a new test run
 		m = re.match(sysvals.stampfmt, line)
 		if(m):
-			data = Data(len(testdata))
-			testdata.append(data)
-			testrun = TestRun(data)
-			testruns.append(testrun)
-			parseStamp(m, data)
-			continue
-		if(not data):
+			tp.stamp = line
 			continue
 		# firmware line: pull out any firmware data
 		m = re.match(sysvals.firmwarefmt, line)
 		if(m):
-			data.fwSuspend = int(m.group('s'))
-			data.fwResume = int(m.group('r'))
-			if(data.fwSuspend > 0 or data.fwResume > 0):
-				data.fwValid = True
+			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
 		# tracer type line: determine the trace data type
 		m = re.match(sysvals.tracertypefmt, line)
 		if(m):
-			tracer = m.group('t')
-			testrun.setTracerType(tracer)
+			tp.setTracerType(m.group('t'))
 			continue
 		# post resume time line: did this test run include post-resume data
 		m = re.match(sysvals.postresumefmt, line)
@@ -1324,15 +2109,20 @@ def parseTraceLog():
 			if(t > 0):
 				sysvals.postresumetime = t
 			continue
+		# device properties line
+		if(re.match(sysvals.devpropfmt, line)):
+			devProps(line)
+			continue
 		# ftrace line: parse only valid lines
-		m = re.match(testrun.ftrace_line_fmt, line)
+		m = re.match(tp.ftrace_line_fmt, line)
 		if(not m):
 			continue
 		# gather the basic message data from the line
 		m_time = m.group('time')
+		m_proc = m.group('proc')
 		m_pid = m.group('pid')
 		m_msg = m.group('msg')
-		if(testrun.cgformat):
+		if(tp.cgformat):
 			m_param3 = m.group('dur')
 		else:
 			m_param3 = 'traceevent'
@@ -1344,24 +2134,38 @@ def parseTraceLog():
 		# the line should be a call, return, or event
 		if(not t.fcall and not t.freturn and not t.fevent):
 			continue
-		# only parse the ftrace data during suspend/resume
-		if(not testrun.inthepipe):
-			# look for the suspend start marker
-			if(t.fevent):
-				if(t.name == 'SUSPEND START'):
-					testrun.inthepipe = True
-					data.setStart(t.time)
+		# find the start of suspend
+		if(t.startMarker()):
+			phase = 'suspend_prepare'
+			data = Data(len(testdata))
+			testdata.append(data)
+			testrun = TestRun(data)
+			testruns.append(testrun)
+			parseStamp(tp.stamp, data)
+			if len(tp.fwdata) > data.testnumber:
+				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
+				if(data.fwSuspend > 0 or data.fwResume > 0):
+					data.fwValid = True
+			data.setStart(t.time)
+			continue
+		if(not data):
+			continue
+		# find the end of resume
+		if(t.endMarker()):
+			if(sysvals.usetracemarkers and sysvals.postresumetime > 0):
+				phase = 'post_resume'
+				data.newPhase(phase, t.time, t.time, '#F0F0F0', -1)
+			data.setEnd(t.time)
+			if(not sysvals.usetracemarkers):
+				# no trace markers? then quit and be sure to finish recording
+				# the event we used to trigger resume end
+				if(len(testrun.ttemp['thaw_processes']) > 0):
+					# if an entry exists, assume this is its end
+					testrun.ttemp['thaw_processes'][-1]['end'] = t.time
+				break
 			continue
 		# trace event processing
 		if(t.fevent):
-			if(t.name == 'RESUME COMPLETE'):
-				if(sysvals.postresumetime > 0):
-					phase = 'post_resume'
-					data.newPhase(phase, t.time, t.time, '#FF9966', -1)
-				else:
-					testrun.inthepipe = False
-				data.setEnd(t.time)
-				continue
 			if(phase == 'post_resume'):
 				data.setEnd(t.time)
 			if(t.type == 'suspend_resume'):
@@ -1383,8 +2187,7 @@ def parseTraceLog():
 					m = re.match('(?P<name>.*) .*', t.name)
 					name = m.group('name')
 				# ignore these events
-				if(re.match('acpi_suspend\[.*', t.name) or
-					re.match('suspend_enter\[.*', name)):
+				if(name.split('[')[0] in tracewatch):
 					continue
 				# -- phase changes --
 				# suspend_prepare start
@@ -1418,7 +2221,7 @@ def parseTraceLog():
 						data.dmesg[phase]['end'] = t.time
 						data.tSuspended = t.time
 					else:
-						if(sysvals.suspendmode in ['mem', 'disk']):
+						if(sysvals.suspendmode in ['mem', 'disk'] and not tp.S0i3):
 							data.dmesg['suspend_machine']['end'] = t.time
 							data.tSuspended = t.time
 						phase = 'resume_machine'
@@ -1426,6 +2229,15 @@ def parseTraceLog():
 						data.tResumed = t.time
 						data.tLow = data.tResumed - data.tSuspended
 					continue
+				# acpi_suspend
+				elif(re.match('acpi_suspend\[.*', t.name)):
+					# acpi_suspend[0] S0i3
+					if(re.match('acpi_suspend\[0\] begin', t.name)):
+						if(sysvals.suspendmode == 'mem'):
+							tp.S0i3 = True
+							data.dmesg['suspend_machine']['end'] = t.time
+							data.tSuspended = t.time
+					continue
 				# resume_noirq start
 				elif(re.match('dpm_resume_noirq\[.*', t.name)):
 					phase = 'resume_noirq'
@@ -1449,30 +2261,25 @@ def parseTraceLog():
 					if(isbegin):
 						data.dmesg[phase]['start'] = t.time
 					continue
-
-				# is this trace event outside of the devices calls
-				if(data.isTraceEventOutsideDeviceCalls(pid, t.time)):
-					# global events (outside device calls) are simply graphed
-					if(name not in testrun.ttemp):
-						testrun.ttemp[name] = []
-					if(isbegin):
-						# create a new list entry
-						testrun.ttemp[name].append(\
-							{'begin': t.time, 'end': t.time})
-					else:
-						if(len(testrun.ttemp[name]) > 0):
-							# if an antry exists, assume this is its end
-							testrun.ttemp[name][-1]['end'] = t.time
-						elif(phase == 'post_resume'):
-							# post resume events can just have ends
-							testrun.ttemp[name].append({
-								'begin': data.dmesg[phase]['start'],
-								'end': t.time})
+				# skip trace events inside devices calls
+				if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
+					continue
+				# global events (outside device calls) are graphed
+				if(name not in testrun.ttemp):
+					testrun.ttemp[name] = []
+				if(isbegin):
+					# create a new list entry
+					testrun.ttemp[name].append(\
+						{'begin': t.time, 'end': t.time, 'pid': pid})
 				else:
-					if(isbegin):
-						data.addIntraDevTraceEvent('', name, pid, t.time)
-					else:
-						data.capIntraDevTraceEvent('', name, pid, t.time)
+					if(len(testrun.ttemp[name]) > 0):
+						# if an entry exists, assume this is its end
+						testrun.ttemp[name][-1]['end'] = t.time
+					elif(phase == 'post_resume'):
+						# post resume events can just have ends
+						testrun.ttemp[name].append({
+							'begin': data.dmesg[phase]['start'],
+							'end': t.time})
 			# device callback start
 			elif(t.type == 'device_pm_callback_start'):
 				m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\
@@ -1495,75 +2302,127 @@ def parseTraceLog():
 					dev = list[n]
 					dev['length'] = t.time - dev['start']
 					dev['end'] = t.time
+		# kprobe event processing
+		elif(t.fkprobe):
+			kprobename = t.type
+			kprobedata = t.name
+			key = (kprobename, pid)
+			# displayname is generated from kprobe data
+			displayname = ''
+			if(t.fcall):
+				displayname = sysvals.kprobeDisplayName(kprobename, kprobedata)
+				if not displayname:
+					continue
+				if(key not in tp.ktemp):
+					tp.ktemp[key] = []
+				tp.ktemp[key].append({
+					'pid': pid,
+					'begin': t.time,
+					'end': t.time,
+					'name': displayname,
+					'cdata': kprobedata,
+					'proc': m_proc,
+				})
+			elif(t.freturn):
+				if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1:
+					continue
+				e = tp.ktemp[key][-1]
+				if e['begin'] < 0.0 or t.time - e['begin'] < 0.000001:
+					tp.ktemp[key].pop()
+				else:
+					e['end'] = t.time
+					e['rdata'] = kprobedata
 		# callgraph processing
 		elif sysvals.usecallgraph:
-			# this shouldn't happen, but JIC, ignore callgraph data post-res
-			if(phase == 'post_resume'):
-				continue
 			# create a callgraph object for the data
-			if(pid not in testrun.ftemp):
-				testrun.ftemp[pid] = []
-				testrun.ftemp[pid].append(FTraceCallGraph())
+			key = (m_proc, pid)
+			if(key not in testrun.ftemp):
+				testrun.ftemp[key] = []
+				testrun.ftemp[key].append(FTraceCallGraph(pid))
 			# when the call is finished, see which device matches it
-			cg = testrun.ftemp[pid][-1]
-			if(cg.addLine(t, m)):
-				testrun.ftemp[pid].append(FTraceCallGraph())
+			cg = testrun.ftemp[key][-1]
+			if(cg.addLine(t)):
+				testrun.ftemp[key].append(FTraceCallGraph(pid))
 	tf.close()
 
+	if sysvals.suspendmode == 'command':
+		for test in testruns:
+			for p in test.data.phases:
+				if p == 'resume_complete':
+					test.data.dmesg[p]['start'] = test.data.start
+					test.data.dmesg[p]['end'] = test.data.end
+				else:
+					test.data.dmesg[p]['start'] = test.data.start
+					test.data.dmesg[p]['end'] = test.data.start
+			test.data.tSuspended = test.data.start
+			test.data.tResumed = test.data.start
+			test.data.tLow = 0
+			test.data.fwValid = False
+
 	for test in testruns:
 		# add the traceevent data to the device hierarchy
 		if(sysvals.usetraceevents):
+			# add actual trace funcs
 			for name in test.ttemp:
 				for event in test.ttemp[name]:
-					begin = event['begin']
-					end = event['end']
-					# if event starts before timeline start, expand timeline
-					if(begin < test.data.start):
-						test.data.setStart(begin)
-					# if event ends after timeline end, expand the timeline
-					if(end > test.data.end):
-						test.data.setEnd(end)
-					test.data.newActionGlobal(name, begin, end)
-
-		# add the callgraph data to the device hierarchy
-		borderphase = {
-			'dpm_prepare': 'suspend_prepare',
-			'dpm_complete': 'resume_complete'
-		}
-		for pid in test.ftemp:
-			for cg in test.ftemp[pid]:
-				if len(cg.list) < 2:
+					test.data.newActionGlobal(name, event['begin'], event['end'], event['pid'])
+			# add the kprobe based virtual tracefuncs as actual devices
+			for key in tp.ktemp:
+				name, pid = key
+				if name not in sysvals.tracefuncs:
 					continue
-				if(not cg.sanityCheck()):
-					id = 'task %s cpu %s' % (pid, m.group('cpu'))
-					vprint('Sanity check failed for '+\
-						id+', ignoring this callback')
+				for e in tp.ktemp[key]:
+					kb, ke = e['begin'], e['end']
+					if kb == ke or not test.data.isInsideTimeline(kb, ke):
+						continue
+					test.data.newActionGlobal(e['name'], kb, ke, pid)
+			# add config base kprobes and dev kprobes
+			for key in tp.ktemp:
+				name, pid = key
+				if name in sysvals.tracefuncs:
 					continue
-				callstart = cg.start
-				callend = cg.end
-				if(cg.list[0].name in borderphase):
-					p = borderphase[cg.list[0].name]
-					list = test.data.dmesg[p]['list']
-					for devname in list:
-						dev = list[devname]
-						if(pid == dev['pid'] and
-							callstart <= dev['start'] and
-							callend >= dev['end']):
-							dev['ftrace'] = cg.slice(dev['start'], dev['end'])
-					continue
-				if(cg.list[0].name != 'dpm_run_callback'):
-					continue
-				for p in test.data.phases:
-					if(test.data.dmesg[p]['start'] <= callstart and
-						callstart <= test.data.dmesg[p]['end']):
-						list = test.data.dmesg[p]['list']
-						for devname in list:
-							dev = list[devname]
-							if(pid == dev['pid'] and
-								callstart <= dev['start'] and
-								callend >= dev['end']):
-								dev['ftrace'] = cg
-						break
+				for e in tp.ktemp[key]:
+					kb, ke = e['begin'], e['end']
+					if kb == ke or not test.data.isInsideTimeline(kb, ke):
+						continue
+					color = sysvals.kprobeColor(e['name'])
+					if name not in sysvals.dev_tracefuncs:
+						# config base kprobe
+						test.data.newActionGlobal(e['name'], kb, ke, -2, color)
+					elif sysvals.usedevsrc:
+						# dev kprobe
+						data.addDeviceFunctionCall(e['name'], name, e['proc'], pid, kb,
+							ke, e['cdata'], e['rdata'])
+		if sysvals.usecallgraph:
+			# add the callgraph data to the device hierarchy
+			sortlist = dict()
+			for key in test.ftemp:
+				proc, pid = key
+				for cg in test.ftemp[key]:
+					if len(cg.list) < 1 or cg.invalid:
+						continue
+					if(not cg.postProcess()):
+						id = 'task %s' % (pid)
+						vprint('Sanity check failed for '+\
+							id+', ignoring this callback')
+						continue
+					# match cg data to devices
+					if sysvals.suspendmode == 'command' or not cg.deviceMatch(pid, test.data):
+						sortkey = '%f%f%d' % (cg.start, cg.end, pid)
+						sortlist[sortkey] = cg
+			# create blocks for orphan cg data
+			for sortkey in sorted(sortlist):
+				cg = sortlist[sortkey]
+				name = cg.list[0].name
+				if sysvals.isCallgraphFunc(name):
+					vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
+					cg.newActionFromFunction(test.data)
+
+	if sysvals.suspendmode == 'command':
+		if(sysvals.verbose):
+			for data in testdata:
+				data.printDetails()
+		return testdata
 
 	# fill in any missing phases
 	for data in testdata:
@@ -1587,14 +2446,52 @@ def parseTraceLog():
 		if(sysvals.verbose):
 			data.printDetails()
 
-	# add the time in between the tests as a new phase so we can see it
-	if(len(testdata) > 1):
-		t1e = testdata[0].getEnd()
-		t2s = testdata[-1].getStart()
-		testdata[-1].newPhaseWithSingleAction('user mode', \
-			'user mode', t1e, t2s, '#FF9966')
 	return testdata
 
+# Function: loadRawKernelLog
+# Description:
+#	 Load a raw kernel log that wasn't created by this tool, it might be
+#	 possible to extract a valid suspend/resume log
+def loadRawKernelLog(dmesgfile):
+	global sysvals
+
+	stamp = {'time': '', 'host': '', 'mode': 'mem', 'kernel': ''}
+	stamp['time'] = datetime.now().strftime('%B %d %Y, %I:%M:%S %p')
+	stamp['host'] = sysvals.hostname
+
+	testruns = []
+	data = 0
+	lf = open(dmesgfile, 'r')
+	for line in lf:
+		line = line.replace('\r\n', '')
+		idx = line.find('[')
+		if idx > 1:
+			line = line[idx:]
+		m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+		if(not m):
+			continue
+		msg = m.group("msg")
+		m = re.match('PM: Syncing filesystems.*', msg)
+		if(m):
+			if(data):
+				testruns.append(data)
+			data = Data(len(testruns))
+			data.stamp = stamp
+		if(data):
+			m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg)
+			if(m):
+				stamp['kernel'] = m.group('k')
+			m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg)
+			if(m):
+				stamp['mode'] = m.group('m')
+			data.dmesgtext.append(line)
+	if(data):
+		testruns.append(data)
+		sysvals.stamp = stamp
+		sysvals.suspendmode = stamp['mode']
+	lf.close()
+	return testruns
+
 # Function: loadKernelLog
 # Description:
 #	 [deprecated for kernel 3.15.0 or newer]
@@ -1607,9 +2504,10 @@ def loadKernelLog():
 
 	vprint('Analyzing the dmesg data...')
 	if(os.path.exists(sysvals.dmesgfile) == False):
-		doError('%s doesnt exist' % sysvals.dmesgfile, False)
+		doError('%s does not exist' % sysvals.dmesgfile, False)
 
-	# there can be multiple test runs in a single file delineated by stamps
+	# there can be multiple test runs in a single file
+	tp = TestProps()
 	testruns = []
 	data = 0
 	lf = open(sysvals.dmesgfile, 'r')
@@ -1620,35 +2518,43 @@ def loadKernelLog():
 			line = line[idx:]
 		m = re.match(sysvals.stampfmt, line)
 		if(m):
-			if(data):
-				testruns.append(data)
-			data = Data(len(testruns))
-			parseStamp(m, data)
-			continue
-		if(not data):
+			tp.stamp = line
 			continue
 		m = re.match(sysvals.firmwarefmt, line)
 		if(m):
-			data.fwSuspend = int(m.group('s'))
-			data.fwResume = int(m.group('r'))
-			if(data.fwSuspend > 0 or data.fwResume > 0):
-				data.fwValid = True
+			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
 		m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
-		if(m):
-			data.dmesgtext.append(line)
-			if(re.match('ACPI: resume from mwait', m.group('msg'))):
-				print('NOTE: This suspend appears to be freeze rather than'+\
-					' %s, it will be treated as such' % sysvals.suspendmode)
-				sysvals.suspendmode = 'freeze'
-		else:
-			vprint('ignoring dmesg line: %s' % line.replace('\n', ''))
-	testruns.append(data)
+		if(not m):
+			continue
+		msg = m.group("msg")
+		if(re.match('PM: Syncing filesystems.*', msg)):
+			if(data):
+				testruns.append(data)
+			data = Data(len(testruns))
+			parseStamp(tp.stamp, data)
+			if len(tp.fwdata) > data.testnumber:
+				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
+				if(data.fwSuspend > 0 or data.fwResume > 0):
+					data.fwValid = True
+		if(re.match('ACPI: resume from mwait', msg)):
+			print('NOTE: This suspend appears to be freeze rather than'+\
+				' %s, it will be treated as such' % sysvals.suspendmode)
+			sysvals.suspendmode = 'freeze'
+		if(not data):
+			continue
+		data.dmesgtext.append(line)
+	if(data):
+		testruns.append(data)
 	lf.close()
 
-	if(not data):
-		print('ERROR: analyze_suspend header missing from dmesg log')
-		sys.exit()
+	if(len(testruns) < 1):
+		# bad log, but see if you can extract something meaningful anyway
+		testruns = loadRawKernelLog(sysvals.dmesgfile)
+
+	if(len(testruns) < 1):
+		doError(' dmesg log is completely unreadable: %s' \
+			% sysvals.dmesgfile, False)
 
 	# fix lines with same timestamp/function with the call and return swapped
 	for data in testruns:
@@ -1865,7 +2771,8 @@ def parseKernelLog(data):
 						actions[a] = []
 					actions[a].append({'begin': ktime, 'end': ktime})
 				if(re.match(at[a]['emsg'], msg)):
-					actions[a][-1]['end'] = ktime
+					if(a in actions):
+						actions[a][-1]['end'] = ktime
 			# now look for CPU on/off events
 			if(re.match('Disabling non-boot CPUs .*', msg)):
 				# start of first cpu suspend
@@ -1912,15 +2819,7 @@ def parseKernelLog(data):
 	# fill in any actions we've found
 	for name in actions:
 		for event in actions[name]:
-			begin = event['begin']
-			end = event['end']
-			# if event starts before timeline start, expand timeline
-			if(begin < data.start):
-				data.setStart(begin)
-			# if event ends after timeline end, expand the timeline
-			if(end > data.end):
-				data.setEnd(end)
-			data.newActionGlobal(name, begin, end)
+			data.newActionGlobal(name, event['begin'], event['end'])
 
 	if(sysvals.verbose):
 		data.printDetails()
@@ -1929,92 +2828,6 @@ def parseKernelLog(data):
 	data.fixupInitcallsThatDidntReturn()
 	return True
 
-# Function: setTimelineRows
-# Description:
-#	 Organize the timeline entries into the smallest
-#	 number of rows possible, with no entry overlapping
-# Arguments:
-#	 list: the list of devices/actions for a single phase
-#	 sortedkeys: cronologically sorted key list to use
-# Output:
-#	 The total number of rows needed to display this phase of the timeline
-def setTimelineRows(list, sortedkeys):
-
-	# clear all rows and set them to undefined
-	remaining = len(list)
-	rowdata = dict()
-	row = 0
-	for item in list:
-		list[item]['row'] = -1
-
-	# try to pack each row with as many ranges as possible
-	while(remaining > 0):
-		if(row not in rowdata):
-			rowdata[row] = []
-		for item in sortedkeys:
-			if(list[item]['row'] < 0):
-				s = list[item]['start']
-				e = list[item]['end']
-				valid = True
-				for ritem in rowdata[row]:
-					rs = ritem['start']
-					re = ritem['end']
-					if(not (((s <= rs) and (e <= rs)) or
-						((s >= re) and (e >= re)))):
-						valid = False
-						break
-				if(valid):
-					rowdata[row].append(list[item])
-					list[item]['row'] = row
-					remaining -= 1
-		row += 1
-	return row
-
-# Function: createTimeScale
-# Description:
-#	 Create the timescale header for the html timeline
-# Arguments:
-#	 t0: start time (suspend begin)
-#	 tMax: end time (resume end)
-#	 tSuspend: time when suspend occurs, i.e. the zero time
-# Output:
-#	 The html code needed to display the time scale
-def createTimeScale(t0, tMax, tSuspended):
-	timescale = '<div class="t" style="right:{0}%">{1}</div>\n'
-	output = '<div id="timescale">\n'
-
-	# set scale for timeline
-	tTotal = tMax - t0
-	tS = 0.1
-	if(tTotal <= 0):
-		return output
-	if(tTotal > 4):
-		tS = 1
-	if(tSuspended < 0):
-		for i in range(int(tTotal/tS)+1):
-			pos = '%0.3f' % (100 - ((float(i)*tS*100)/tTotal))
-			if(i > 0):
-				val = '%0.fms' % (float(i)*tS*1000)
-			else:
-				val = ''
-			output += timescale.format(pos, val)
-	else:
-		tSuspend = tSuspended - t0
-		divTotal = int(tTotal/tS) + 1
-		divSuspend = int(tSuspend/tS)
-		s0 = (tSuspend - tS*divSuspend)*100/tTotal
-		for i in range(divTotal):
-			pos = '%0.3f' % (100 - ((float(i)*tS*100)/tTotal) - s0)
-			if((i == 0) and (s0 < 3)):
-				val = ''
-			elif(i == divSuspend):
-				val = 'S/R'
-			else:
-				val = '%0.fms' % (float(i-divSuspend)*tS*1000)
-			output += timescale.format(pos, val)
-	output += '</div>\n'
-	return output
-
 # Function: createHTMLSummarySimple
 # Description:
 #	 Create summary html file for a series of tests
@@ -2146,6 +2959,32 @@ def createHTMLSummarySimple(testruns, htmlfile):
 	hf.write('</body>\n</html>\n')
 	hf.close()
 
+def htmlTitle():
+	global sysvals
+	modename = {
+		'freeze': 'Freeze (S0)',
+		'standby': 'Standby (S1)',
+		'mem': 'Suspend (S3)',
+		'disk': 'Hibernate (S4)'
+	}
+	kernel = sysvals.stamp['kernel']
+	host = sysvals.hostname[0].upper()+sysvals.hostname[1:]
+	mode = sysvals.suspendmode
+	if sysvals.suspendmode in modename:
+		mode = modename[sysvals.suspendmode]
+	return host+' '+mode+' '+kernel
+
+def ordinal(value):
+	suffix = 'th'
+	if value < 10 or value > 19:
+		if value % 10 == 1:
+			suffix = 'st'
+		elif value % 10 == 2:
+			suffix = 'nd'
+		elif value % 10 == 3:
+			suffix = 'rd'
+	return '%d%s' % (value, suffix)
+
 # Function: createHTML
 # Description:
 #	 Create the output html file from the resident test data
@@ -2156,6 +2995,10 @@ def createHTMLSummarySimple(testruns, htmlfile):
 def createHTML(testruns):
 	global sysvals
 
+	if len(testruns) < 1:
+		print('ERROR: Not enough test data to build a timeline')
+		return
+
 	for data in testruns:
 		data.normalizeTime(testruns[-1].tSuspended)
 
@@ -2163,16 +3006,18 @@ def createHTML(testruns):
 	if len(testruns) > 1:
 		x2changes = ['1', 'relative']
 	# html function templates
+	headline_version = '<div class="version"><a href="https://01.org/suspendresume">AnalyzeSuspend v%s</a></div>' % sysvals.version
 	headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
 	html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail%s</button>' % x2changes[0]
 	html_zoombox = '<center><button id="zoomin">ZOOM IN</button><button id="zoomout">ZOOM OUT</button><button id="zoomdef">ZOOM 1:1</button></center>\n'
 	html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n'
 	html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n'
-	html_device = '<div id="{0}" title="{1}" class="thread" style="left:{2}%;top:{3}%;height:{4}%;width:{5}%;">{6}</div>\n'
-	html_traceevent = '<div title="{0}" class="traceevent" style="left:{1}%;top:{2}%;height:{3}%;width:{4}%;border:1px solid {5};background-color:{5}">{6}</div>\n'
-	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}%;height:{3}%;background-color:{4}">{5}</div>\n'
+	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;">\n'
+	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
+	html_traceevent = '<div title="{0}" class="traceevent" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{5}</div>\n'
+	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background-color:{4}">{5}</div>\n'
 	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background-color:{3}"></div>\n'
-	html_legend = '<div class="square" style="left:{0}%;background-color:{1}">&nbsp;{2}</div>\n'
+	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background-color:{1}">&nbsp;{2}</div>\n'
 	html_timetotal = '<table class="time1">\n<tr>'\
 		'<td class="green">{2} Suspend Time: <b>{0} ms</b></td>'\
 		'<td class="yellow">{2} Resume Time: <b>{1} ms</b></td>'\
@@ -2182,6 +3027,10 @@ def createHTML(testruns):
 		'<td class="gray">'+sysvals.suspendmode+' time: <b>{1} ms</b></td>'\
 		'<td class="yellow">{3} Resume Time: <b>{2} ms</b></td>'\
 		'</tr>\n</table>\n'
+	html_timetotal3 = '<table class="time1">\n<tr>'\
+		'<td class="green">Execution Time: <b>{0} ms</b></td>'\
+		'<td class="yellow">Command: <b>{1}</b></td>'\
+		'</tr>\n</table>\n'
 	html_timegroups = '<table class="time2">\n<tr>'\
 		'<td class="green">{4}Kernel Suspend: {0} ms</td>'\
 		'<td class="purple">{4}Firmware Suspend: {1} ms</td>'\
@@ -2189,12 +3038,21 @@ def createHTML(testruns):
 		'<td class="yellow">{4}Kernel Resume: {3} ms</td>'\
 		'</tr>\n</table>\n'
 
+	# html format variables
+	rowheight = 30
+	devtextS = '14px'
+	devtextH = '30px'
+	hoverZ = 'z-index:10;'
+
+	if sysvals.usedevsrc:
+		hoverZ = ''
+
 	# device timeline
 	vprint('Creating Device Timeline...')
-	devtl = Timeline()
+
+	devtl = Timeline(rowheight)
 
 	# Generate the header for this timeline
-	textnum = ['First', 'Second']
 	for data in testruns:
 		tTotal = data.end - data.start
 		tEnd = data.dmesg['resume_complete']['end']
@@ -2203,7 +3061,17 @@ def createHTML(testruns):
 			sys.exit()
 		if(data.tLow > 0):
 			low_time = '%.0f'%(data.tLow*1000)
-		if data.fwValid:
+		if sysvals.suspendmode == 'command':
+			run_time = '%.0f'%((data.end-data.start)*1000)
+			if sysvals.testcommand:
+				testdesc = sysvals.testcommand
+			else:
+				testdesc = 'unknown'
+			if(len(testruns) > 1):
+				testdesc = ordinal(data.testnumber+1)+' '+testdesc
+			thtml = html_timetotal3.format(run_time, testdesc)
+			devtl.html['header'] += thtml
+		elif data.fwValid:
 			suspend_time = '%.0f'%((data.tSuspended-data.start)*1000 + \
 				(data.fwSuspend/1000000.0))
 			resume_time = '%.0f'%((tEnd-data.tSuspended)*1000 + \
@@ -2211,7 +3079,7 @@ def createHTML(testruns):
 			testdesc1 = 'Total'
 			testdesc2 = ''
 			if(len(testruns) > 1):
-				testdesc1 = testdesc2 = textnum[data.testnumber]
+				testdesc1 = testdesc2 = ordinal(data.testnumber+1)
 				testdesc2 += ' '
 			if(data.tLow == 0):
 				thtml = html_timetotal.format(suspend_time, \
@@ -2219,28 +3087,28 @@ def createHTML(testruns):
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc1)
-			devtl.html['timeline'] += thtml
+			devtl.html['header'] += thtml
 			sktime = '%.3f'%((data.dmesg['suspend_machine']['end'] - \
 				data.getStart())*1000)
 			sftime = '%.3f'%(data.fwSuspend / 1000000.0)
 			rftime = '%.3f'%(data.fwResume / 1000000.0)
-			rktime = '%.3f'%((data.getEnd() - \
+			rktime = '%.3f'%((data.dmesg['resume_complete']['end'] - \
 				data.dmesg['resume_machine']['start'])*1000)
-			devtl.html['timeline'] += html_timegroups.format(sktime, \
+			devtl.html['header'] += html_timegroups.format(sktime, \
 				sftime, rftime, rktime, testdesc2)
 		else:
 			suspend_time = '%.0f'%((data.tSuspended-data.start)*1000)
 			resume_time = '%.0f'%((tEnd-data.tSuspended)*1000)
 			testdesc = 'Kernel'
 			if(len(testruns) > 1):
-				testdesc = textnum[data.testnumber]+' '+testdesc
+				testdesc = ordinal(data.testnumber+1)+' '+testdesc
 			if(data.tLow == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc)
 			else:
 				thtml = html_timetotal2.format(suspend_time, low_time, \
 					resume_time, testdesc)
-			devtl.html['timeline'] += thtml
+			devtl.html['header'] += thtml
 
 	# time scale for potentially multiple datasets
 	t0 = testruns[0].start
@@ -2249,153 +3117,241 @@ def createHTML(testruns):
 	tTotal = tMax - t0
 
 	# determine the maximum number of rows we need to draw
-	timelinerows = 0
 	for data in testruns:
-		for phase in data.dmesg:
-			list = data.dmesg[phase]['list']
-			rows = setTimelineRows(list, list)
-			data.dmesg[phase]['row'] = rows
-			if(rows > timelinerows):
-				timelinerows = rows
-
-	# calculate the timeline height and create bounding box, add buttons
-	devtl.setRows(timelinerows + 1)
-	devtl.html['timeline'] += html_devlist1
-	if len(testruns) > 1:
-		devtl.html['timeline'] += html_devlist2
+		data.selectTimelineDevices('%f', tTotal, sysvals.mindevlen)
+		for group in data.devicegroups:
+			devlist = []
+			for phase in group:
+				for devname in data.tdevlist[phase]:
+					devlist.append((phase,devname))
+			devtl.getPhaseRows(data.dmesg, devlist)
+	devtl.calcTotalRows()
+
+	# create bounding box, add buttons
+	if sysvals.suspendmode != 'command':
+		devtl.html['timeline'] += html_devlist1
+		if len(testruns) > 1:
+			devtl.html['timeline'] += html_devlist2
 	devtl.html['timeline'] += html_zoombox
 	devtl.html['timeline'] += html_timeline.format('dmesg', devtl.height)
 
-	# draw the colored boxes for each of the phases
-	for data in testruns:
-		for b in data.dmesg:
-			phase = data.dmesg[b]
-			length = phase['end']-phase['start']
-			left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
-			width = '%.3f' % ((length*100.0)/tTotal)
-			devtl.html['timeline'] += html_phase.format(left, width, \
-				'%.3f'%devtl.scaleH, '%.3f'%(100-devtl.scaleH), \
-				data.dmesg[b]['color'], '')
+	# draw the full timeline
+	phases = {'suspend':[],'resume':[]}
+	for phase in data.dmesg:
+		if 'resume' in phase:
+			phases['resume'].append(phase)
+		else:
+			phases['suspend'].append(phase)
 
-	# draw the time scale, try to make the number of labels readable
-	devtl.html['scale'] = createTimeScale(t0, tMax, tSuspended)
-	devtl.html['timeline'] += devtl.html['scale']
+	# draw each test run chronologically
 	for data in testruns:
-		for b in data.dmesg:
-			phaselist = data.dmesg[b]['list']
-			for d in phaselist:
-				name = d
-				drv = ''
-				dev = phaselist[d]
-				if(d in sysvals.altdevname):
-					name = sysvals.altdevname[d]
-				if('drv' in dev and dev['drv']):
-					drv = ' {%s}' % dev['drv']
-				height = (100.0 - devtl.scaleH)/data.dmesg[b]['row']
-				top = '%.3f' % ((dev['row']*height) + devtl.scaleH)
-				left = '%.3f' % (((dev['start']-t0)*100)/tTotal)
-				width = '%.3f' % (((dev['end']-dev['start'])*100)/tTotal)
-				length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
-				color = 'rgba(204,204,204,0.5)'
-				devtl.html['timeline'] += html_device.format(dev['id'], \
-					d+drv+length+b, left, top, '%.3f'%height, width, name+drv)
-
-	# draw any trace events found
-	for data in testruns:
-		for b in data.dmesg:
-			phaselist = data.dmesg[b]['list']
-			for name in phaselist:
-				dev = phaselist[name]
-				if('traceevents' in dev):
-					vprint('Debug trace events found for device %s' % name)
-					vprint('%20s %20s %10s %8s' % ('action', \
+		# if nore than one test, draw a block to represent user mode
+		if(data.testnumber > 0):
+			m0 = testruns[data.testnumber-1].end
+			mMax = testruns[data.testnumber].start
+			mTotal = mMax - m0
+			name = 'usermode%d' % data.testnumber
+			top = '%d' % devtl.scaleH
+			left = '%f' % (((m0-t0)*100.0)/tTotal)
+			width = '%f' % ((mTotal*100.0)/tTotal)
+			title = 'user mode (%0.3f ms) ' % (mTotal*1000)
+			devtl.html['timeline'] += html_device.format(name, \
+				title, left, top, '%d'%devtl.bodyH, width, '', '', '')
+		# now draw the actual timeline blocks
+		for dir in phases:
+			# draw suspend and resume blocks separately
+			bname = '%s%d' % (dir[0], data.testnumber)
+			if dir == 'suspend':
+				m0 = testruns[data.testnumber].start
+				mMax = testruns[data.testnumber].tSuspended
+				mTotal = mMax - m0
+				left = '%f' % (((m0-t0)*100.0)/tTotal)
+			else:
+				m0 = testruns[data.testnumber].tSuspended
+				mMax = testruns[data.testnumber].end
+				mTotal = mMax - m0
+				left = '%f' % ((((m0-t0)*100.0)+sysvals.srgap/2)/tTotal)
+			# if a timeline block is 0 length, skip altogether
+			if mTotal == 0:
+				continue
+			width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
+			devtl.html['timeline'] += html_tblock.format(bname, left, width)
+			for b in sorted(phases[dir]):
+				# draw the phase color background
+				phase = data.dmesg[b]
+				length = phase['end']-phase['start']
+				left = '%f' % (((phase['start']-m0)*100.0)/mTotal)
+				width = '%f' % ((length*100.0)/mTotal)
+				devtl.html['timeline'] += html_phase.format(left, width, \
+					'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
+					data.dmesg[b]['color'], '')
+				# draw the devices for this phase
+				phaselist = data.dmesg[b]['list']
+				for d in data.tdevlist[b]:
+					name = d
+					drv = ''
+					dev = phaselist[d]
+					xtraclass = ''
+					xtrainfo = ''
+					xtrastyle = ''
+					if 'htmlclass' in dev:
+						xtraclass = dev['htmlclass']
+						xtrainfo = dev['htmlclass']
+					if 'color' in dev:
+						xtrastyle = 'background-color:%s;' % dev['color']
+					if(d in sysvals.devprops):
+						name = sysvals.devprops[d].altName(d)
+						xtraclass = sysvals.devprops[d].xtraClass()
+						xtrainfo = sysvals.devprops[d].xtraInfo()
+					if('drv' in dev and dev['drv']):
+						drv = ' {%s}' % dev['drv']
+					rowheight = devtl.phaseRowHeight(b, dev['row'])
+					rowtop = devtl.phaseRowTop(b, dev['row'])
+					top = '%.3f' % (rowtop + devtl.scaleH)
+					left = '%f' % (((dev['start']-m0)*100)/mTotal)
+					width = '%f' % (((dev['end']-dev['start'])*100)/mTotal)
+					length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
+					if sysvals.suspendmode == 'command':
+						title = name+drv+xtrainfo+length+'cmdexec'
+					else:
+						title = name+drv+xtrainfo+length+b
+					devtl.html['timeline'] += html_device.format(dev['id'], \
+						title, left, top, '%.3f'%rowheight, width, \
+						d+drv, xtraclass, xtrastyle)
+					if('src' not in dev):
+						continue
+					# draw any trace events for this device
+					vprint('Debug trace events found for device %s' % d)
+					vprint('%20s %20s %10s %8s' % ('title', \
 						'name', 'time(ms)', 'length(ms)'))
-					for e in dev['traceevents']:
-						vprint('%20s %20s %10.3f %8.3f' % (e.action, \
-							e.name, e.time*1000, e.length*1000))
-						height = (100.0 - devtl.scaleH)/data.dmesg[b]['row']
-						top = '%.3f' % ((dev['row']*height) + devtl.scaleH)
-						left = '%.3f' % (((e.time-t0)*100)/tTotal)
-						width = '%.3f' % (e.length*100/tTotal)
+					for e in dev['src']:
+						vprint('%20s %20s %10.3f %8.3f' % (e.title, \
+							e.text, e.time*1000, e.length*1000))
+						height = devtl.rowH
+						top = '%.3f' % (rowtop + devtl.scaleH + (e.row*devtl.rowH))
+						left = '%f' % (((e.time-m0)*100)/mTotal)
+						width = '%f' % (e.length*100/mTotal)
 						color = 'rgba(204,204,204,0.5)'
 						devtl.html['timeline'] += \
-							html_traceevent.format(e.action+' '+e.name, \
+							html_traceevent.format(e.title, \
 								left, top, '%.3f'%height, \
-								width, e.color, '')
+								width, e.text)
+			# draw the time scale, try to make the number of labels readable
+			devtl.html['timeline'] += devtl.createTimeScale(m0, mMax, tTotal, dir)
+			devtl.html['timeline'] += '</div>\n'
 
 	# timeline is finished
 	devtl.html['timeline'] += '</div>\n</div>\n'
 
 	# draw a legend which describes the phases by color
-	data = testruns[-1]
-	devtl.html['legend'] = '<div class="legend">\n'
-	pdelta = 100.0/len(data.phases)
-	pmargin = pdelta / 4.0
-	for phase in data.phases:
-		order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
-		name = string.replace(phase, '_', ' &nbsp;')
-		devtl.html['legend'] += html_legend.format(order, \
-			data.dmesg[phase]['color'], name)
-	devtl.html['legend'] += '</div>\n'
+	if sysvals.suspendmode != 'command':
+		data = testruns[-1]
+		devtl.html['legend'] = '<div class="legend">\n'
+		pdelta = 100.0/len(data.phases)
+		pmargin = pdelta / 4.0
+		for phase in data.phases:
+			tmp = phase.split('_')
+			id = tmp[0][0]
+			if(len(tmp) > 1):
+				id += tmp[1][0]
+			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+			name = string.replace(phase, '_', ' &nbsp;')
+			devtl.html['legend'] += html_legend.format(order, \
+				data.dmesg[phase]['color'], name, id)
+		devtl.html['legend'] += '</div>\n'
 
 	hf = open(sysvals.htmlfile, 'w')
-	thread_height = 0
+
+	if not sysvals.cgexp:
+		cgchk = 'checked'
+		cgnchk = 'not(:checked)'
+	else:
+		cgchk = 'not(:checked)'
+		cgnchk = 'checked'
 
 	# write the html header first (html head, css code, up to body start)
 	html_header = '<!DOCTYPE html>\n<html>\n<head>\n\
 	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
-	<title>AnalyzeSuspend</title>\n\
+	<title>'+htmlTitle()+'</title>\n\
 	<style type=\'text/css\'>\n\
-		body {overflow-y: scroll;}\n\
-		.stamp {width: 100%;text-align:center;background-color:gray;line-height:30px;color:white;font: 25px Arial;}\n\
-		.callgraph {margin-top: 30px;box-shadow: 5px 5px 20px black;}\n\
-		.callgraph article * {padding-left: 28px;}\n\
-		h1 {color:black;font: bold 30px Times;}\n\
-		t0 {color:black;font: bold 30px Times;}\n\
-		t1 {color:black;font: 30px Times;}\n\
-		t2 {color:black;font: 25px Times;}\n\
-		t3 {color:black;font: 20px Times;white-space:nowrap;}\n\
-		t4 {color:black;font: bold 30px Times;line-height:60px;white-space:nowrap;}\n\
+		body {overflow-y:scroll;}\n\
+		.stamp {width:100%;text-align:center;background-color:gray;line-height:30px;color:white;font:25px Arial;}\n\
+		.callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
+		.callgraph article * {padding-left:28px;}\n\
+		h1 {color:black;font:bold 30px Times;}\n\
+		t0 {color:black;font:bold 30px Times;}\n\
+		t1 {color:black;font:30px Times;}\n\
+		t2 {color:black;font:25px Times;}\n\
+		t3 {color:black;font:20px Times;white-space:nowrap;}\n\
+		t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\
+		cS {color:blue;font:bold 11px Times;}\n\
+		cR {color:red;font:bold 11px Times;}\n\
 		table {width:100%;}\n\
 		.gray {background-color:rgba(80,80,80,0.1);}\n\
 		.green {background-color:rgba(204,255,204,0.4);}\n\
 		.purple {background-color:rgba(128,0,128,0.2);}\n\
 		.yellow {background-color:rgba(255,255,204,0.4);}\n\
-		.time1 {font: 22px Arial;border:1px solid;}\n\
-		.time2 {font: 15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
-		td {text-align: center;}\n\
+		.time1 {font:22px Arial;border:1px solid;}\n\
+		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
+		td {text-align:center;}\n\
 		r {color:#500000;font:15px Tahoma;}\n\
 		n {color:#505050;font:15px Tahoma;}\n\
-		.tdhl {color: red;}\n\
-		.hide {display: none;}\n\
-		.pf {display: none;}\n\
-		.pf:checked + label {background: url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:not(:checked) ~ label {background: url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
-		.pf:checked ~ *:not(:nth-child(2)) {display: none;}\n\
-		.zoombox {position: relative; width: 100%; overflow-x: scroll;}\n\
-		.timeline {position: relative; font-size: 14px;cursor: pointer;width: 100%; overflow: hidden; background-color:#dddddd;}\n\
-		.thread {position: absolute; height: '+'%.3f'%thread_height+'%; overflow: hidden; line-height: 30px; border:1px solid;text-align:center;white-space:nowrap;background-color:rgba(204,204,204,0.5);}\n\
-		.thread:hover {background-color:white;border:1px solid red;z-index:10;}\n\
-		.hover {background-color:white;border:1px solid red;z-index:10;}\n\
-		.traceevent {position: absolute;opacity: 0.3;height: '+'%.3f'%thread_height+'%;width:0;overflow:hidden;line-height:30px;text-align:center;white-space:nowrap;}\n\
-		.phase {position: absolute;overflow: hidden;border:0px;text-align:center;}\n\
+		.tdhl {color:red;}\n\
+		.hide {display:none;}\n\
+		.pf {display:none;}\n\
+		.pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+		.pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\
+		.zoombox {position:relative;width:100%;overflow-x:scroll;}\n\
+		.timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\
+		.thread {position:absolute;height:0%;overflow:hidden;line-height:'+devtextH+';font-size:'+devtextS+';border:1px solid;text-align:center;white-space:nowrap;background-color:rgba(204,204,204,0.5);}\n\
+		.thread.sync {background-color:'+sysvals.synccolor+';}\n\
+		.thread.bg {background-color:'+sysvals.kprobecolor+';}\n\
+		.thread:hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
+		.hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\
+		.hover.sync {background-color:white;}\n\
+		.hover.bg {background-color:white;}\n\
+		.traceevent {position:absolute;font-size:10px;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,rgba(204,204,204,1),rgba(150,150,150,1));}\n\
+		.traceevent:hover {background:white;}\n\
+		.phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\
 		.phaselet {position:absolute;overflow:hidden;border:0px;text-align:center;height:100px;font-size:24px;}\n\
-		.t {position:absolute;top:0%;height:100%;border-right:1px solid black;}\n\
-		.legend {position: relative; width: 100%; height: 40px; text-align: center;margin-bottom:20px}\n\
-		.legend .square {position:absolute;top:10px; width: 0px;height: 20px;border:1px solid;padding-left:20px;}\n\
+		.t {z-index:2;position:absolute;pointer-events:none;top:0%;height:100%;border-right:1px solid black;}\n\
+		.legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
+		.legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
 		button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
+		.logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
 		.devlist {position:'+x2changes[1]+';width:190px;}\n\
-		#devicedetail {height:100px;box-shadow: 5px 5px 20px black;}\n\
+		a:link {color:white;text-decoration:none;}\n\
+		a:visited {color:white;}\n\
+		a:hover {color:white;}\n\
+		a:active {color:white;}\n\
+		.version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\
+		#devicedetail {height:100px;box-shadow:5px 5px 20px black;}\n\
+		.tblock {position:absolute;height:100%;}\n\
+		.bg {z-index:1;}\n\
 	</style>\n</head>\n<body>\n'
-	hf.write(html_header)
+
+	# no header or css if its embedded
+	if(sysvals.embedded):
+		hf.write('pass True tSus %.3f tRes %.3f tLow %.3f fwvalid %s tSus %.3f tRes %.3f\n' %
+			(data.tSuspended-data.start, data.end-data.tSuspended, data.tLow, data.fwValid, \
+				data.fwSuspend/1000000, data.fwResume/1000000))
+	else:
+		hf.write(html_header)
 
 	# write the test title and general info header
 	if(sysvals.stamp['time'] != ""):
+		hf.write(headline_version)
+		if sysvals.addlogs and sysvals.dmesgfile:
+			hf.write('<button id="showdmesg" class="logbtn">dmesg</button>')
+		if sysvals.addlogs and sysvals.ftracefile:
+			hf.write('<button id="showftrace" class="logbtn">ftrace</button>')
 		hf.write(headline_stamp.format(sysvals.stamp['host'],
 			sysvals.stamp['kernel'], sysvals.stamp['mode'], \
 				sysvals.stamp['time']))
 
 	# write the device timeline
+	hf.write(devtl.html['header'])
 	hf.write(devtl.html['timeline'])
 	hf.write(devtl.html['legend'])
 	hf.write('<div id="devicedetailtitle"></div>\n')
@@ -2410,12 +3366,15 @@ def createHTML(testruns):
 			width = '%.3f' % ((length*100.0)/tTotal)
 			hf.write(html_phaselet.format(b, left, width, \
 				data.dmesg[b]['color']))
+		if sysvals.suspendmode == 'command':
+			hf.write(html_phaselet.format('cmdexec', '0', '0', \
+				data.dmesg['resume_complete']['color']))
 		hf.write('</div>\n')
 	hf.write('</div>\n')
 
 	# write the ftrace data (callgraph)
 	data = testruns[-1]
-	if(sysvals.usecallgraph):
+	if(sysvals.usecallgraph and not sysvals.embedded):
 		hf.write('<section id="callgraphs" class="callgraph">\n')
 		# write out the ftrace data converted to html
 		html_func_top = '<article id="{0}" class="atop" style="background-color:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n'
@@ -2428,22 +3387,29 @@ def createHTML(testruns):
 			for devname in data.sortedDevices(p):
 				if('ftrace' not in list[devname]):
 					continue
-				name = devname
-				if(devname in sysvals.altdevname):
-					name = sysvals.altdevname[devname]
 				devid = list[devname]['id']
 				cg = list[devname]['ftrace']
-				flen = '<r>(%.3f ms @ %.3f to %.3f)</r>' % \
-					((cg.end - cg.start)*1000, cg.start*1000, cg.end*1000)
+				clen = (cg.end - cg.start) * 1000
+				if clen < sysvals.mincglen:
+					continue
+				fmt = '<r>(%.3f ms @ '+sysvals.timeformat+' to '+sysvals.timeformat+')</r>'
+				flen = fmt % (clen, cg.start, cg.end)
+				name = devname
+				if(devname in sysvals.devprops):
+					name = sysvals.devprops[devname].altName(devname)
+				if sysvals.suspendmode == 'command':
+					ftitle = name
+				else:
+					ftitle = name+' '+p
 				hf.write(html_func_top.format(devid, data.dmesg[p]['color'], \
-					num, name+' '+p, flen))
+					num, ftitle, flen))
 				num += 1
 				for line in cg.list:
 					if(line.length < 0.000000001):
 						flen = ''
 					else:
-						flen = '<n>(%.3f ms @ %.3f)</n>' % (line.length*1000, \
-							line.time*1000)
+						fmt = '<n>(%.3f ms @ '+sysvals.timeformat+')</n>'
+						flen = fmt % (line.length*1000, line.time)
 					if(line.freturn and line.fcall):
 						hf.write(html_func_leaf.format(line.name, flen))
 					elif(line.freturn):
@@ -2453,9 +3419,40 @@ def createHTML(testruns):
 						num += 1
 				hf.write(html_func_end)
 		hf.write('\n\n    </section>\n')
-	# write the footer and close
-	addScriptCode(hf, testruns)
-	hf.write('</body>\n</html>\n')
+
+	# add the dmesg log as a hidden div
+	if sysvals.addlogs and sysvals.dmesgfile:
+		hf.write('<div id="dmesglog" style="display:none;">\n')
+		lf = open(sysvals.dmesgfile, 'r')
+		for line in lf:
+			hf.write(line)
+		lf.close()
+		hf.write('</div>\n')
+	# add the ftrace log as a hidden div
+	if sysvals.addlogs and sysvals.ftracefile:
+		hf.write('<div id="ftracelog" style="display:none;">\n')
+		lf = open(sysvals.ftracefile, 'r')
+		for line in lf:
+			hf.write(line)
+		lf.close()
+		hf.write('</div>\n')
+
+	if(not sysvals.embedded):
+		# write the footer and close
+		addScriptCode(hf, testruns)
+		hf.write('</body>\n</html>\n')
+	else:
+		# embedded out will be loaded in a page, skip the js
+		t0 = (testruns[0].start - testruns[-1].tSuspended) * 1000
+		tMax = (testruns[-1].end - testruns[-1].tSuspended) * 1000
+		# add js code in a div entry for later evaluation
+		detail = 'var bounds = [%f,%f];\n' % (t0, tMax)
+		detail += 'var devtable = [\n'
+		for data in testruns:
+			topo = data.deviceTopology()
+			detail += '\t"%s",\n' % (topo)
+		detail += '];\n'
+		hf.write('<div id=customcode style=display:none>\n'+detail+'</div>\n')
 	hf.close()
 	return True
 
@@ -2466,8 +3463,8 @@ def createHTML(testruns):
 #	 hf: the open html file pointer
 #	 testruns: array of Data objects from parseKernelLog or parseTraceLog
 def addScriptCode(hf, testruns):
-	t0 = (testruns[0].start - testruns[-1].tSuspended) * 1000
-	tMax = (testruns[-1].end - testruns[-1].tSuspended) * 1000
+	t0 = testruns[0].start * 1000
+	tMax = testruns[-1].end * 1000
 	# create an array in javascript memory with the device details
 	detail = '	var devtable = [];\n'
 	for data in testruns:
@@ -2477,8 +3474,43 @@ def addScriptCode(hf, testruns):
 	# add the code which will manipulate the data in the browser
 	script_code = \
 	'<script type="text/javascript">\n'+detail+\
+	'	var resolution = -1;\n'\
+	'	function redrawTimescale(t0, tMax, tS) {\n'\
+	'		var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;"><cR><-R</cR></div>\';\n'\
+	'		var tTotal = tMax - t0;\n'\
+	'		var list = document.getElementsByClassName("tblock");\n'\
+	'		for (var i = 0; i < list.length; i++) {\n'\
+	'			var timescale = list[i].getElementsByClassName("timescale")[0];\n'\
+	'			var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100);\n'\
+	'			var mTotal = tTotal*parseFloat(list[i].style.width)/100;\n'\
+	'			var mMax = m0 + mTotal;\n'\
+	'			var html = "";\n'\
+	'			var divTotal = Math.floor(mTotal/tS) + 1;\n'\
+	'			if(divTotal > 1000) continue;\n'\
+	'			var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal;\n'\
+	'			var pos = 0.0, val = 0.0;\n'\
+	'			for (var j = 0; j < divTotal; j++) {\n'\
+	'				var htmlline = "";\n'\
+	'				if(list[i].id[5] == "r") {\n'\
+	'					pos = 100 - (((j)*tS*100)/mTotal);\n'\
+	'					val = (j)*tS;\n'\
+	'					htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'					if(j == 0)\n'\
+	'						htmlline = rline;\n'\
+	'				} else {\n'\
+	'					pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\
+	'					val = (j-divTotal+1)*tS;\n'\
+	'					if(j == divTotal - 1)\n'\
+	'						htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S-></cS></div>\';\n'\
+	'					else\n'\
+	'						htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+	'				}\n'\
+	'				html += htmlline;\n'\
+	'			}\n'\
+	'			timescale.innerHTML = html;\n'\
+	'		}\n'\
+	'	}\n'\
 	'	function zoomTimeline() {\n'\
-	'		var timescale = document.getElementById("timescale");\n'\
 	'		var dmesg = document.getElementById("dmesg");\n'\
 	'		var zoombox = document.getElementById("dmesgzoombox");\n'\
 	'		var val = parseFloat(dmesg.style.width);\n'\
@@ -2486,7 +3518,7 @@ def addScriptCode(hf, testruns):
 	'		var sh = window.outerWidth / 2;\n'\
 	'		if(this.id == "zoomin") {\n'\
 	'			newval = val * 1.2;\n'\
-	'			if(newval > 40000) newval = 40000;\n'\
+	'			if(newval > 910034) newval = 910034;\n'\
 	'			dmesg.style.width = newval+"%";\n'\
 	'			zoombox.scrollLeft = ((zoombox.scrollLeft + sh) * newval / val) - sh;\n'\
 	'		} else if (this.id == "zoomout") {\n'\
@@ -2498,19 +3530,17 @@ def addScriptCode(hf, testruns):
 	'			zoombox.scrollLeft = 0;\n'\
 	'			dmesg.style.width = "100%";\n'\
 	'		}\n'\
-	'		var html = "";\n'\
+	'		var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1];\n'\
 	'		var t0 = bounds[0];\n'\
 	'		var tMax = bounds[1];\n'\
 	'		var tTotal = tMax - t0;\n'\
 	'		var wTotal = tTotal * 100.0 / newval;\n'\
-	'		for(var tS = 1000; (wTotal / tS) < 3; tS /= 10);\n'\
-	'		if(tS < 1) tS = 1;\n'\
-	'		for(var s = ((t0 / tS)|0) * tS; s < tMax; s += tS) {\n'\
-	'			var pos = (tMax - s) * 100.0 / tTotal;\n'\
-	'			var name = (s == 0)?"S/R":(s+"ms");\n'\
-	'			html += "<div class=\\"t\\" style=\\"right:"+pos+"%\\">"+name+"</div>";\n'\
-	'		}\n'\
-	'		timescale.innerHTML = html;\n'\
+	'		var idx = 7*window.innerWidth/1100;\n'\
+	'		for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++);\n'\
+	'		if(i >= tS.length) i = tS.length - 1;\n'\
+	'		if(tS[i] == resolution) return;\n'\
+	'		resolution = tS[i];\n'\
+	'		redrawTimescale(t0, tMax, tS[i]);\n'\
 	'	}\n'\
 	'	function deviceHover() {\n'\
 	'		var name = this.title.slice(0, this.title.indexOf(" ("));\n'\
@@ -2523,12 +3553,13 @@ def addScriptCode(hf, testruns):
 	'			cpu = parseInt(name.slice(8));\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
 	'			dname = dev[i].title.slice(0, dev[i].title.indexOf(" ("));\n'\
+	'			var cname = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\
 	'			if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\
 	'				(name == dname))\n'\
 	'			{\n'\
-	'				dev[i].className = "thread hover";\n'\
+	'				dev[i].className = "hover "+cname;\n'\
 	'			} else {\n'\
-	'				dev[i].className = "thread";\n'\
+	'				dev[i].className = cname;\n'\
 	'			}\n'\
 	'		}\n'\
 	'	}\n'\
@@ -2536,7 +3567,7 @@ def addScriptCode(hf, testruns):
 	'		var dmesg = document.getElementById("dmesg");\n'\
 	'		var dev = dmesg.getElementsByClassName("thread");\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
-	'			dev[i].className = "thread";\n'\
+	'			dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\
 	'		}\n'\
 	'	}\n'\
 	'	function deviceTitle(title, total, cpu) {\n'\
@@ -2547,7 +3578,7 @@ def addScriptCode(hf, testruns):
 	'			total[2] = (total[2]+total[4])/2;\n'\
 	'		}\n'\
 	'		var devtitle = document.getElementById("devicedetailtitle");\n'\
-	'		var name = title.slice(0, title.indexOf(" "));\n'\
+	'		var name = title.slice(0, title.indexOf(" ("));\n'\
 	'		if(cpu >= 0) name = "CPU"+cpu;\n'\
 	'		var driver = "";\n'\
 	'		var tS = "<t2>(</t2>";\n'\
@@ -2579,6 +3610,8 @@ def addScriptCode(hf, testruns):
 	'		var dev = dmesg.getElementsByClassName("thread");\n'\
 	'		var idlist = [];\n'\
 	'		var pdata = [[]];\n'\
+	'		if(document.getElementById("devicedetail1"))\n'\
+	'			pdata = [[], []];\n'\
 	'		var pd = pdata[0];\n'\
 	'		var total = [0.0, 0.0, 0.0];\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
@@ -2634,6 +3667,7 @@ def addScriptCode(hf, testruns):
 	'		var cglist = document.getElementById("callgraphs");\n'\
 	'		if(!cglist) return;\n'\
 	'		var cg = cglist.getElementsByClassName("atop");\n'\
+	'		if(cg.length < 10) return;\n'\
 	'		for (var i = 0; i < cg.length; i++) {\n'\
 	'			if(idlist.indexOf(cg[i].id) >= 0) {\n'\
 	'				cg[i].style.display = "block";\n'\
@@ -2658,15 +3692,32 @@ def addScriptCode(hf, testruns):
 	'			dt = devtable[1];\n'\
 	'		win.document.write(html+dt);\n'\
 	'	}\n'\
+	'	function logWindow(e) {\n'\
+	'		var name = e.target.id.slice(4);\n'\
+	'		var win = window.open();\n'\
+	'		var log = document.getElementById(name+"log");\n'\
+	'		var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>";\n'\
+	'		win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\
+	'		win.document.close();\n'\
+	'	}\n'\
+	'	function onClickPhase(e) {\n'\
+	'	}\n'\
+	'	window.addEventListener("resize", function () {zoomTimeline();});\n'\
 	'	window.addEventListener("load", function () {\n'\
 	'		var dmesg = document.getElementById("dmesg");\n'\
 	'		dmesg.style.width = "100%"\n'\
 	'		document.getElementById("zoomin").onclick = zoomTimeline;\n'\
 	'		document.getElementById("zoomout").onclick = zoomTimeline;\n'\
 	'		document.getElementById("zoomdef").onclick = zoomTimeline;\n'\
-	'		var devlist = document.getElementsByClassName("devlist");\n'\
-	'		for (var i = 0; i < devlist.length; i++)\n'\
-	'			devlist[i].onclick = devListWindow;\n'\
+	'		var list = document.getElementsByClassName("square");\n'\
+	'		for (var i = 0; i < list.length; i++)\n'\
+	'			list[i].onclick = onClickPhase;\n'\
+	'		var list = document.getElementsByClassName("logbtn");\n'\
+	'		for (var i = 0; i < list.length; i++)\n'\
+	'			list[i].onclick = logWindow;\n'\
+	'		list = document.getElementsByClassName("devlist");\n'\
+	'		for (var i = 0; i < list.length; i++)\n'\
+	'			list[i].onclick = devListWindow;\n'\
 	'		var dev = dmesg.getElementsByClassName("thread");\n'\
 	'		for (var i = 0; i < dev.length; i++) {\n'\
 	'			dev[i].onclick = deviceDetail;\n'\
@@ -2685,141 +3736,87 @@ def addScriptCode(hf, testruns):
 def executeSuspend():
 	global sysvals
 
-	detectUSB(False)
 	t0 = time.time()*1000
 	tp = sysvals.tpath
+	fwdata = []
+	# mark the start point in the kernel ring buffer just as we start
+	sysvals.initdmesg()
+	# start ftrace
+	if(sysvals.usecallgraph or sysvals.usetraceevents):
+		print('START TRACING')
+		sysvals.fsetVal('1', 'tracing_on')
 	# execute however many s/r runs requested
 	for count in range(1,sysvals.execcount+1):
-		# clear the kernel ring buffer just as we start
-		os.system('dmesg -C')
-		# enable callgraph ftrace only for the second run
-		if(sysvals.usecallgraph and count == 2):
-			# set trace type
-			os.system('echo function_graph > '+tp+'current_tracer')
-			os.system('echo "" > '+tp+'set_ftrace_filter')
-			# set trace format options
-			os.system('echo funcgraph-abstime > '+tp+'trace_options')
-			os.system('echo funcgraph-proc > '+tp+'trace_options')
-			# focus only on device suspend and resume
-			os.system('cat '+tp+'available_filter_functions | '+\
-				'grep dpm_run_callback > '+tp+'set_graph_function')
 		# if this is test2 and there's a delay, start here
 		if(count > 1 and sysvals.x2delay > 0):
 			tN = time.time()*1000
 			while (tN - t0) < sysvals.x2delay:
 				tN = time.time()*1000
 				time.sleep(0.001)
-		# start ftrace
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			print('START TRACING')
-			os.system('echo 1 > '+tp+'tracing_on')
 		# initiate suspend
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			os.system('echo SUSPEND START > '+tp+'trace_marker')
-		if(sysvals.rtcwake):
-			print('SUSPEND START')
-			print('will autoresume in %d seconds' % sysvals.rtcwaketime)
-			sysvals.rtcWakeAlarm()
+			sysvals.fsetVal('SUSPEND START', 'trace_marker')
+		if sysvals.suspendmode == 'command':
+			print('COMMAND START')
+			if(sysvals.rtcwake):
+				print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
+				sysvals.rtcWakeAlarmOn()
+			os.system(sysvals.testcommand)
 		else:
-			print('SUSPEND START (press a key to resume)')
-		pf = open(sysvals.powerfile, 'w')
-		pf.write(sysvals.suspendmode)
-		# execution will pause here
-		pf.close()
+			if(sysvals.rtcwake):
+				print('SUSPEND START')
+				print('will autoresume in %d seconds' % sysvals.rtcwaketime)
+				sysvals.rtcWakeAlarmOn()
+			else:
+				print('SUSPEND START (press a key to resume)')
+			pf = open(sysvals.powerfile, 'w')
+			pf.write(sysvals.suspendmode)
+			# execution will pause here
+			try:
+				pf.close()
+			except:
+				pass
 		t0 = time.time()*1000
+		if(sysvals.rtcwake):
+			sysvals.rtcWakeAlarmOff()
 		# return from suspend
 		print('RESUME COMPLETE')
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			os.system('echo RESUME COMPLETE > '+tp+'trace_marker')
-		# see if there's firmware timing data to be had
-		t = sysvals.postresumetime
-		if(t > 0):
-			print('Waiting %d seconds for POST-RESUME trace events...' % t)
-			time.sleep(t)
-		# stop ftrace
-		if(sysvals.usecallgraph or sysvals.usetraceevents):
-			os.system('echo 0 > '+tp+'tracing_on')
-			print('CAPTURING TRACE')
-			writeDatafileHeader(sysvals.ftracefile)
-			os.system('cat '+tp+'trace >> '+sysvals.ftracefile)
-			os.system('echo "" > '+tp+'trace')
-		# grab a copy of the dmesg output
-		print('CAPTURING DMESG')
-		writeDatafileHeader(sysvals.dmesgfile)
-		os.system('dmesg -c >> '+sysvals.dmesgfile)
-
-def writeDatafileHeader(filename):
+			sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
+		if(sysvals.suspendmode == 'mem'):
+			fwdata.append(getFPDT(False))
+	# look for post resume events after the last test run
+	t = sysvals.postresumetime
+	if(t > 0):
+		print('Waiting %d seconds for POST-RESUME trace events...' % t)
+		time.sleep(t)
+	# stop ftrace
+	if(sysvals.usecallgraph or sysvals.usetraceevents):
+		sysvals.fsetVal('0', 'tracing_on')
+		print('CAPTURING TRACE')
+		writeDatafileHeader(sysvals.ftracefile, fwdata)
+		os.system('cat '+tp+'trace >> '+sysvals.ftracefile)
+		sysvals.fsetVal('', 'trace')
+		devProps()
+	# grab a copy of the dmesg output
+	print('CAPTURING DMESG')
+	writeDatafileHeader(sysvals.dmesgfile, fwdata)
+	sysvals.getdmesg()
+
+def writeDatafileHeader(filename, fwdata):
 	global sysvals
 
-	fw = getFPDT(False)
 	prt = sysvals.postresumetime
 	fp = open(filename, 'a')
 	fp.write(sysvals.teststamp+'\n')
-	if(fw):
-		fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
+	if(sysvals.suspendmode == 'mem'):
+		for fw in fwdata:
+			if(fw):
+				fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
 	if(prt > 0):
 		fp.write('# post resume time %u\n' % prt)
 	fp.close()
 
-# Function: executeAndroidSuspend
-# Description:
-#	 Execute system suspend through the sysfs interface
-#	 on a remote android device, then transfer the output
-#	 dmesg and ftrace files to the local output directory.
-def executeAndroidSuspend():
-	global sysvals
-
-	# check to see if the display is currently off
-	tp = sysvals.tpath
-	out = os.popen(sysvals.adb+\
-		' shell dumpsys power | grep mScreenOn').read().strip()
-	# if so we need to turn it on so we can issue a new suspend
-	if(out.endswith('false')):
-		print('Waking the device up for the test...')
-		# send the KEYPAD_POWER keyevent to wake it up
-		os.system(sysvals.adb+' shell input keyevent 26')
-		# wait a few seconds so the user can see the device wake up
-		time.sleep(3)
-	# execute however many s/r runs requested
-	for count in range(1,sysvals.execcount+1):
-		# clear the kernel ring buffer just as we start
-		os.system(sysvals.adb+' shell dmesg -c > /dev/null 2>&1')
-		# start ftrace
-		if(sysvals.usetraceevents):
-			print('START TRACING')
-			os.system(sysvals.adb+" shell 'echo 1 > "+tp+"tracing_on'")
-		# initiate suspend
-		for count in range(1,sysvals.execcount+1):
-			if(sysvals.usetraceevents):
-				os.system(sysvals.adb+\
-					" shell 'echo SUSPEND START > "+tp+"trace_marker'")
-			print('SUSPEND START (press a key on the device to resume)')
-			os.system(sysvals.adb+" shell 'echo "+sysvals.suspendmode+\
-				" > "+sysvals.powerfile+"'")
-			# execution will pause here, then adb will exit
-			while(True):
-				check = os.popen(sysvals.adb+\
-					' shell pwd 2>/dev/null').read().strip()
-				if(len(check) > 0):
-					break
-				time.sleep(1)
-			if(sysvals.usetraceevents):
-				os.system(sysvals.adb+" shell 'echo RESUME COMPLETE > "+tp+\
-					"trace_marker'")
-		# return from suspend
-		print('RESUME COMPLETE')
-		# stop ftrace
-		if(sysvals.usetraceevents):
-			os.system(sysvals.adb+" shell 'echo 0 > "+tp+"tracing_on'")
-			print('CAPTURING TRACE')
-			os.system('echo "'+sysvals.teststamp+'" > '+sysvals.ftracefile)
-			os.system(sysvals.adb+' shell cat '+tp+\
-				'trace >> '+sysvals.ftracefile)
-		# grab a copy of the dmesg output
-		print('CAPTURING DMESG')
-		os.system('echo "'+sysvals.teststamp+'" > '+sysvals.dmesgfile)
-		os.system(sysvals.adb+' shell dmesg >> '+sysvals.dmesgfile)
-
 # Function: setUSBDevicesAuto
 # Description:
 #	 Set the autosuspend control parameter of all USB devices to auto
@@ -2829,7 +3826,7 @@ def executeAndroidSuspend():
 def setUSBDevicesAuto():
 	global sysvals
 
-	rootCheck()
+	rootCheck(True)
 	for dirname, dirnames, filenames in os.walk('/sys/devices'):
 		if(re.match('.*/usb[0-9]*.*', dirname) and
 			'idVendor' in filenames and 'idProduct' in filenames):
@@ -2874,9 +3871,7 @@ def ms2nice(val):
 # Description:
 #	 Detect all the USB hosts and devices currently connected and add
 #	 a list of USB device names to sysvals for better timeline readability
-# Arguments:
-#	 output: True to output the info to stdout, False otherwise
-def detectUSB(output):
+def detectUSB():
 	global sysvals
 
 	field = {'idVendor':'', 'idProduct':'', 'product':'', 'speed':''}
@@ -2887,18 +3882,18 @@ def detectUSB(output):
 			'runtime_suspended_time':'',
 			'active_duration':'',
 			'connected_duration':''}
-	if(output):
-		print('LEGEND')
-		print('---------------------------------------------------------------------------------------------')
-		print('  A = async/sync PM queue Y/N                       D = autosuspend delay (seconds)')
-		print('  S = autosuspend Y/N                         rACTIVE = runtime active (min/sec)')
-		print('  P = persist across suspend Y/N              rSUSPEN = runtime suspend (min/sec)')
-		print('  E = runtime suspend enabled/forbidden Y/N    ACTIVE = active duration (min/sec)')
-		print('  R = runtime status active/suspended Y/N     CONNECT = connected duration (min/sec)')
-		print('  U = runtime usage count')
-		print('---------------------------------------------------------------------------------------------')
-		print('  NAME       ID      DESCRIPTION         SPEED A S P E R U D rACTIVE rSUSPEN  ACTIVE CONNECT')
-		print('---------------------------------------------------------------------------------------------')
+
+	print('LEGEND')
+	print('---------------------------------------------------------------------------------------------')
+	print('  A = async/sync PM queue Y/N                       D = autosuspend delay (seconds)')
+	print('  S = autosuspend Y/N                         rACTIVE = runtime active (min/sec)')
+	print('  P = persist across suspend Y/N              rSUSPEN = runtime suspend (min/sec)')
+	print('  E = runtime suspend enabled/forbidden Y/N    ACTIVE = active duration (min/sec)')
+	print('  R = runtime status active/suspended Y/N     CONNECT = connected duration (min/sec)')
+	print('  U = runtime usage count')
+	print('---------------------------------------------------------------------------------------------')
+	print('  NAME       ID      DESCRIPTION         SPEED A S P E R U D rACTIVE rSUSPEN  ACTIVE CONNECT')
+	print('---------------------------------------------------------------------------------------------')
 
 	for dirname, dirnames, filenames in os.walk('/sys/devices'):
 		if(re.match('.*/usb[0-9]*.*', dirname) and
@@ -2907,35 +3902,149 @@ def detectUSB(output):
 				field[i] = os.popen('cat %s/%s 2>/dev/null' % \
 					(dirname, i)).read().replace('\n', '')
 			name = dirname.split('/')[-1]
-			if(len(field['product']) > 0):
-				sysvals.altdevname[name] = \
-					'%s [%s]' % (field['product'], name)
+			for i in power:
+				power[i] = os.popen('cat %s/power/%s 2>/dev/null' % \
+					(dirname, i)).read().replace('\n', '')
+			if(re.match('usb[0-9]*', name)):
+				first = '%-8s' % name
 			else:
-				sysvals.altdevname[name] = \
-					'%s:%s [%s]' % (field['idVendor'], \
-						field['idProduct'], name)
-			if(output):
-				for i in power:
-					power[i] = os.popen('cat %s/power/%s 2>/dev/null' % \
-						(dirname, i)).read().replace('\n', '')
-				if(re.match('usb[0-9]*', name)):
-					first = '%-8s' % name
-				else:
-					first = '%8s' % name
-				print('%s [%s:%s] %-20s %-4s %1s %1s %1s %1s %1s %1s %1s %s %s %s %s' % \
-					(first, field['idVendor'], field['idProduct'], \
-					field['product'][0:20], field['speed'], \
-					yesno(power['async']), \
-					yesno(power['control']), \
-					yesno(power['persist']), \
-					yesno(power['runtime_enabled']), \
-					yesno(power['runtime_status']), \
-					power['runtime_usage'], \
-					power['autosuspend'], \
-					ms2nice(power['runtime_active_time']), \
-					ms2nice(power['runtime_suspended_time']), \
-					ms2nice(power['active_duration']), \
-					ms2nice(power['connected_duration'])))
+				first = '%8s' % name
+			print('%s [%s:%s] %-20s %-4s %1s %1s %1s %1s %1s %1s %1s %s %s %s %s' % \
+				(first, field['idVendor'], field['idProduct'], \
+				field['product'][0:20], field['speed'], \
+				yesno(power['async']), \
+				yesno(power['control']), \
+				yesno(power['persist']), \
+				yesno(power['runtime_enabled']), \
+				yesno(power['runtime_status']), \
+				power['runtime_usage'], \
+				power['autosuspend'], \
+				ms2nice(power['runtime_active_time']), \
+				ms2nice(power['runtime_suspended_time']), \
+				ms2nice(power['active_duration']), \
+				ms2nice(power['connected_duration'])))
+
+# Function: devProps
+# Description:
+#	 Retrieve a list of properties for all devices in the trace log
+def devProps(data=0):
+	global sysvals
+	props = dict()
+
+	if data:
+		idx = data.index(': ') + 2
+		if idx >= len(data):
+			return
+		devlist = data[idx:].split(';')
+		for dev in devlist:
+			f = dev.split(',')
+			if len(f) < 3:
+				continue
+			dev = f[0]
+			props[dev] = DevProps()
+			props[dev].altname = f[1]
+			if int(f[2]):
+				props[dev].async = True
+			else:
+				props[dev].async = False
+			sysvals.devprops = props
+		if sysvals.suspendmode == 'command' and 'testcommandstring' in props:
+			sysvals.testcommand = props['testcommandstring'].altname
+		return
+
+	if(os.path.exists(sysvals.ftracefile) == False):
+		doError('%s does not exist' % sysvals.ftracefile, False)
+
+	# first get the list of devices we need properties for
+	msghead = 'Additional data added by AnalyzeSuspend'
+	alreadystamped = False
+	tp = TestProps()
+	tf = open(sysvals.ftracefile, 'r')
+	for line in tf:
+		if msghead in line:
+			alreadystamped = True
+			continue
+		# determine the trace data type (required for further parsing)
+		m = re.match(sysvals.tracertypefmt, line)
+		if(m):
+			tp.setTracerType(m.group('t'))
+			continue
+		# parse only valid lines, if this is not one move on
+		m = re.match(tp.ftrace_line_fmt, line)
+		if(not m or 'device_pm_callback_start' not in line):
+			continue
+		m = re.match('.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg'));
+		if(not m):
+			continue
+		drv, dev, par = m.group('drv'), m.group('d'), m.group('p')
+		if dev not in props:
+			props[dev] = DevProps()
+	tf.close()
+
+	if not alreadystamped and sysvals.suspendmode == 'command':
+		out = '#\n# '+msghead+'\n# Device Properties: '
+		out += 'testcommandstring,%s,0;' % (sysvals.testcommand)
+		with open(sysvals.ftracefile, 'a') as fp:
+			fp.write(out+'\n')
+		sysvals.devprops = props
+		return
+
+	# now get the syspath for each of our target devices
+	for dirname, dirnames, filenames in os.walk('/sys/devices'):
+		if(re.match('.*/power', dirname) and 'async' in filenames):
+			dev = dirname.split('/')[-2]
+			if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)):
+				props[dev].syspath = dirname[:-6]
+
+	# now fill in the properties for our target devices
+	for dev in props:
+		dirname = props[dev].syspath
+		if not dirname or not os.path.exists(dirname):
+			continue
+		with open(dirname+'/power/async') as fp:
+			text = fp.read()
+			props[dev].async = False
+			if 'enabled' in text:
+				props[dev].async = True
+		fields = os.listdir(dirname)
+		if 'product' in fields:
+			with open(dirname+'/product') as fp:
+				props[dev].altname = fp.read()
+		elif 'name' in fields:
+			with open(dirname+'/name') as fp:
+				props[dev].altname = fp.read()
+		elif 'model' in fields:
+			with open(dirname+'/model') as fp:
+				props[dev].altname = fp.read()
+		elif 'description' in fields:
+			with open(dirname+'/description') as fp:
+				props[dev].altname = fp.read()
+		elif 'id' in fields:
+			with open(dirname+'/id') as fp:
+				props[dev].altname = fp.read()
+		elif 'idVendor' in fields and 'idProduct' in fields:
+			idv, idp = '', ''
+			with open(dirname+'/idVendor') as fp:
+				idv = fp.read().strip()
+			with open(dirname+'/idProduct') as fp:
+				idp = fp.read().strip()
+			props[dev].altname = '%s:%s' % (idv, idp)
+
+		if props[dev].altname:
+			out = props[dev].altname.strip().replace('\n', ' ')
+			out = out.replace(',', ' ')
+			out = out.replace(';', ' ')
+			props[dev].altname = out
+
+	# and now write the data to the ftrace file
+	if not alreadystamped:
+		out = '#\n# '+msghead+'\n# Device Properties: '
+		for dev in sorted(props):
+			out += props[dev].out(dev)
+		with open(sysvals.ftracefile, 'a') as fp:
+			fp.write(out+'\n')
+
+	sysvals.devprops = props
 
 # Function: getModes
 # Description:
@@ -2945,15 +4054,10 @@ def detectUSB(output):
 def getModes():
 	global sysvals
 	modes = ''
-	if(not sysvals.android):
-		if(os.path.exists(sysvals.powerfile)):
-			fp = open(sysvals.powerfile, 'r')
-			modes = string.split(fp.read())
-			fp.close()
-	else:
-		line = os.popen(sysvals.adb+' shell cat '+\
-			sysvals.powerfile).read().strip()
-		modes = string.split(line)
+	if(os.path.exists(sysvals.powerfile)):
+		fp = open(sysvals.powerfile, 'r')
+		modes = string.split(fp.read())
+		fp.close()
 	return modes
 
 # Function: getFPDT
@@ -2971,22 +4075,22 @@ def getFPDT(output):
 	prectype[0] = 'Basic S3 Resume Performance Record'
 	prectype[1] = 'Basic S3 Suspend Performance Record'
 
-	rootCheck()
+	rootCheck(True)
 	if(not os.path.exists(sysvals.fpdtpath)):
 		if(output):
-			doError('file doesnt exist: %s' % sysvals.fpdtpath, False)
+			doError('file does not exist: %s' % sysvals.fpdtpath, False)
 		return False
 	if(not os.access(sysvals.fpdtpath, os.R_OK)):
 		if(output):
-			doError('file isnt readable: %s' % sysvals.fpdtpath, False)
+			doError('file is not readable: %s' % sysvals.fpdtpath, False)
 		return False
 	if(not os.path.exists(sysvals.mempath)):
 		if(output):
-			doError('file doesnt exist: %s' % sysvals.mempath, False)
+			doError('file does not exist: %s' % sysvals.mempath, False)
 		return False
 	if(not os.access(sysvals.mempath, os.R_OK)):
 		if(output):
-			doError('file isnt readable: %s' % sysvals.mempath, False)
+			doError('file is not readable: %s' % sysvals.mempath, False)
 		return False
 
 	fp = open(sysvals.fpdtpath, 'rb')
@@ -3027,15 +4131,19 @@ def getFPDT(output):
 	while(i < len(records)):
 		header = struct.unpack('HBB', records[i:i+4])
 		if(header[0] not in rectype):
+			i += header[1]
 			continue
 		if(header[1] != 16):
+			i += header[1]
 			continue
 		addr = struct.unpack('Q', records[i+8:i+16])[0]
 		try:
 			fp.seek(addr)
 			first = fp.read(8)
 		except:
-			doError('Bad address 0x%x in %s' % (addr, sysvals.mempath), False)
+			if(output):
+				print('Bad address 0x%x in %s' % (addr, sysvals.mempath))
+			return [0, 0]
 		rechead = struct.unpack('4sI', first)
 		recdata = fp.read(rechead[1]-8)
 		if(rechead[0] == 'FBPT'):
@@ -3090,89 +4198,60 @@ def getFPDT(output):
 #	 print the results to the terminal
 # Output:
 #	 True if the test will work, False if not
-def statusCheck():
+def statusCheck(probecheck=False):
 	global sysvals
 	status = True
 
-	if(sysvals.android):
-		print('Checking the android system ...')
-	else:
-		print('Checking this system (%s)...' % platform.node())
-
-	# check if adb is connected to a device
-	if(sysvals.android):
-		res = 'NO'
-		out = os.popen(sysvals.adb+' get-state').read().strip()
-		if(out == 'device'):
-			res = 'YES'
-		print('    is android device connected: %s' % res)
-		if(res != 'YES'):
-			print('    Please connect the device before using this tool')
-			return False
+	print('Checking this system (%s)...' % platform.node())
 
 	# check we have root access
-	res = 'NO (No features of this tool will work!)'
-	if(sysvals.android):
-		out = os.popen(sysvals.adb+' shell id').read().strip()
-		if('root' in out):
-			res = 'YES'
-	else:
-		if(os.environ['USER'] == 'root'):
-			res = 'YES'
+	res = sysvals.colorText('NO (No features of this tool will work!)')
+	if(rootCheck(False)):
+		res = 'YES'
 	print('    have root access: %s' % res)
 	if(res != 'YES'):
-		if(sysvals.android):
-			print('    Try running "adb root" to restart the daemon as root')
-		else:
-			print('    Try running this script with sudo')
+		print('    Try running this script with sudo')
 		return False
 
 	# check sysfs is mounted
-	res = 'NO (No features of this tool will work!)'
-	if(sysvals.android):
-		out = os.popen(sysvals.adb+' shell ls '+\
-			sysvals.powerfile).read().strip()
-		if(out == sysvals.powerfile):
-			res = 'YES'
-	else:
-		if(os.path.exists(sysvals.powerfile)):
-			res = 'YES'
+	res = sysvals.colorText('NO (No features of this tool will work!)')
+	if(os.path.exists(sysvals.powerfile)):
+		res = 'YES'
 	print('    is sysfs mounted: %s' % res)
 	if(res != 'YES'):
 		return False
 
 	# check target mode is a valid mode
-	res = 'NO'
-	modes = getModes()
-	if(sysvals.suspendmode in modes):
-		res = 'YES'
-	else:
-		status = False
-	print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
-	if(res == 'NO'):
-		print('      valid power modes are: %s' % modes)
-		print('      please choose one with -m')
-
-	# check if the tool can unlock the device
-	if(sysvals.android):
-		res = 'YES'
-		out1 = os.popen(sysvals.adb+\
-			' shell dumpsys power | grep mScreenOn').read().strip()
-		out2 = os.popen(sysvals.adb+\
-			' shell input').read().strip()
-		if(not out1.startswith('mScreenOn') or not out2.startswith('usage')):
-			res = 'NO (wake the android device up before running the test)'
-		print('    can I unlock the screen: %s' % res)
+	if sysvals.suspendmode != 'command':
+		res = sysvals.colorText('NO')
+		modes = getModes()
+		if(sysvals.suspendmode in modes):
+			res = 'YES'
+		else:
+			status = False
+		print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
+		if(res == 'NO'):
+			print('      valid power modes are: %s' % modes)
+			print('      please choose one with -m')
 
 	# check if ftrace is available
-	res = 'NO'
-	ftgood = verifyFtrace()
+	res = sysvals.colorText('NO')
+	ftgood = sysvals.verifyFtrace()
 	if(ftgood):
 		res = 'YES'
 	elif(sysvals.usecallgraph):
 		status = False
 	print('    is ftrace supported: %s' % res)
 
+	# check if kprobes are available
+	res = sysvals.colorText('NO')
+	sysvals.usekprobes = sysvals.verifyKprobes()
+	if(sysvals.usekprobes):
+		res = 'YES'
+	else:
+		sysvals.usedevsrc = False
+	print('    are kprobes supported: %s' % res)
+
 	# what data source are we using
 	res = 'DMESG'
 	if(ftgood):
@@ -3180,14 +4259,8 @@ def statusCheck():
 		sysvals.usetraceevents = False
 		for e in sysvals.traceevents:
 			check = False
-			if(sysvals.android):
-				out = os.popen(sysvals.adb+' shell ls -d '+\
-					sysvals.epath+e).read().strip()
-				if(out == sysvals.epath+e):
-					check = True
-			else:
-				if(os.path.exists(sysvals.epath+e)):
-					check = True
+			if(os.path.exists(sysvals.epath+e)):
+				check = True
 			if(not check):
 				sysvals.usetraceeventsonly = False
 			if(e == 'suspend_resume' and check):
@@ -3199,13 +4272,48 @@ def statusCheck():
 	print('    timeline data source: %s' % res)
 
 	# check if rtcwake
-	res = 'NO'
+	res = sysvals.colorText('NO')
 	if(sysvals.rtcpath != ''):
 		res = 'YES'
 	elif(sysvals.rtcwake):
 		status = False
 	print('    is rtcwake supported: %s' % res)
 
+	if not probecheck:
+		return status
+
+	if (sysvals.usecallgraph and len(sysvals.debugfuncs) > 0) or len(sysvals.kprobes) > 0:
+		sysvals.initFtrace(True)
+
+	# verify callgraph debugfuncs
+	if sysvals.usecallgraph and len(sysvals.debugfuncs) > 0:
+		print('    verifying these ftrace callgraph functions work:')
+		sysvals.setFtraceFilterFunctions(sysvals.debugfuncs)
+		fp = open(sysvals.tpath+'set_graph_function', 'r')
+		flist = fp.read().split('\n')
+		fp.close()
+		for func in sysvals.debugfuncs:
+			res = sysvals.colorText('NO')
+			if func in flist:
+				res = 'YES'
+			else:
+				for i in flist:
+					if ' [' in i and func == i.split(' ')[0]:
+						res = 'YES'
+						break
+			print('         %s: %s' % (func, res))
+
+	# verify kprobes
+	if len(sysvals.kprobes) > 0:
+		print('    verifying these kprobes work:')
+		for name in sorted(sysvals.kprobes):
+			if name in sysvals.tracefuncs:
+				continue
+			res = sysvals.colorText('NO')
+			if sysvals.testKprobe(sysvals.kprobes[name]):
+				res = 'YES'
+			print('         %s: %s' % (name, res))
+
 	return status
 
 # Function: doError
@@ -3226,7 +4334,7 @@ def doError(msg, help):
 # Arguments:
 #	 msg: the warning message to print
 #	 file: If not empty, a filename to request be sent to the owner for debug
-def doWarning(msg, file):
+def doWarning(msg, file=''):
 	print('/* %s */') % msg
 	if(file):
 		print('/* For a fix, please send this'+\
@@ -3235,18 +4343,25 @@ def doWarning(msg, file):
 # Function: rootCheck
 # Description:
 #	 quick check to see if we have root access
-def rootCheck():
-	if(os.environ['USER'] != 'root'):
-		doError('This script must be run as root', False)
+def rootCheck(fatal):
+	global sysvals
+	if(os.access(sysvals.powerfile, os.W_OK)):
+		return True
+	if fatal:
+		doError('This command must be run as root', False)
+	return False
 
 # Function: getArgInt
 # Description:
 #	 pull out an integer argument from the command line with checks
-def getArgInt(name, args, min, max):
-	try:
-		arg = args.next()
-	except:
-		doError(name+': no argument supplied', True)
+def getArgInt(name, args, min, max, main=True):
+	if main:
+		try:
+			arg = args.next()
+		except:
+			doError(name+': no argument supplied', True)
+	else:
+		arg = args
 	try:
 		val = int(arg)
 	except:
@@ -3255,6 +4370,25 @@ def getArgInt(name, args, min, max):
 		doError(name+': value should be between %d and %d' % (min, max), True)
 	return val
 
+# Function: getArgFloat
+# Description:
+#	 pull out a float argument from the command line with checks
+def getArgFloat(name, args, min, max, main=True):
+	if main:
+		try:
+			arg = args.next()
+		except:
+			doError(name+': no argument supplied', True)
+	else:
+		arg = args
+	try:
+		val = float(arg)
+	except:
+		doError(name+': non-numerical value given', True)
+	if(val < min or val > max):
+		doError(name+': value should be between %f and %f' % (min, max), True)
+	return val
+
 # Function: rerunTest
 # Description:
 #	 generate an output from an existing set of ftrace/dmesg logs
@@ -3282,15 +4416,12 @@ def rerunTest():
 # Function: runTest
 # Description:
 #	 execute a suspend/resume, gather the logs, and generate the output
-def runTest(subdir):
+def runTest(subdir, testpath=''):
 	global sysvals
 
 	# prepare for the test
-	if(not sysvals.android):
-		initFtrace()
-	else:
-		initFtraceAndroid()
-	sysvals.initTestOutput(subdir)
+	sysvals.initFtrace()
+	sysvals.initTestOutput(subdir, testpath)
 
 	vprint('Output files:\n    %s' % sysvals.dmesgfile)
 	if(sysvals.usecallgraph or
@@ -3300,10 +4431,8 @@ def runTest(subdir):
 	vprint('    %s' % sysvals.htmlfile)
 
 	# execute the test
-	if(not sysvals.android):
-		executeSuspend()
-	else:
-		executeAndroidSuspend()
+	executeSuspend()
+	sysvals.cleanupFtrace()
 
 	# analyze the data and create the html output
 	print('PROCESSING DATA')
@@ -3367,6 +4496,153 @@ def runSummary(subdir, output):
 
 	createHTMLSummarySimple(testruns, subdir+'/summary.html')
 
+# Function: checkArgBool
+# Description:
+#	 check if a boolean string value is true or false
+def checkArgBool(value):
+	yes = ['1', 'true', 'yes', 'on']
+	if value.lower() in yes:
+		return True
+	return False
+
+# Function: configFromFile
+# Description:
+#	 Configure the script via the info in a config file
+def configFromFile(file):
+	global sysvals
+	Config = ConfigParser.ConfigParser()
+
+	ignorekprobes = False
+	Config.read(file)
+	sections = Config.sections()
+	if 'Settings' in sections:
+		for opt in Config.options('Settings'):
+			value = Config.get('Settings', opt).lower()
+			if(opt.lower() == 'verbose'):
+				sysvals.verbose = checkArgBool(value)
+			elif(opt.lower() == 'addlogs'):
+				sysvals.addlogs = checkArgBool(value)
+			elif(opt.lower() == 'dev'):
+				sysvals.usedevsrc = checkArgBool(value)
+			elif(opt.lower() == 'ignorekprobes'):
+				ignorekprobes = checkArgBool(value)
+			elif(opt.lower() == 'x2'):
+				if checkArgBool(value):
+					sysvals.execcount = 2
+			elif(opt.lower() == 'callgraph'):
+				sysvals.usecallgraph = checkArgBool(value)
+			elif(opt.lower() == 'callgraphfunc'):
+				sysvals.debugfuncs = []
+				if value:
+					value = value.split(',')
+				for i in value:
+					sysvals.debugfuncs.append(i.strip())
+			elif(opt.lower() == 'expandcg'):
+				sysvals.cgexp = checkArgBool(value)
+			elif(opt.lower() == 'srgap'):
+				if checkArgBool(value):
+					sysvals.srgap = 5
+			elif(opt.lower() == 'mode'):
+				sysvals.suspendmode = value
+			elif(opt.lower() == 'command'):
+				sysvals.testcommand = value
+			elif(opt.lower() == 'x2delay'):
+				sysvals.x2delay = getArgInt('-x2delay', value, 0, 60000, False)
+			elif(opt.lower() == 'postres'):
+				sysvals.postresumetime = getArgInt('-postres', value, 0, 3600, False)
+			elif(opt.lower() == 'rtcwake'):
+				sysvals.rtcwake = True
+				sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False)
+			elif(opt.lower() == 'timeprec'):
+				sysvals.setPrecision(getArgInt('-timeprec', value, 0, 6, False))
+			elif(opt.lower() == 'mindev'):
+				sysvals.mindevlen = getArgFloat('-mindev', value, 0.0, 10000.0, False)
+			elif(opt.lower() == 'mincg'):
+				sysvals.mincglen = getArgFloat('-mincg', value, 0.0, 10000.0, False)
+			elif(opt.lower() == 'kprobecolor'):
+				try:
+					val = int(value, 16)
+					sysvals.kprobecolor = '#'+value
+				except:
+					sysvals.kprobecolor = value
+			elif(opt.lower() == 'synccolor'):
+				try:
+					val = int(value, 16)
+					sysvals.synccolor = '#'+value
+				except:
+					sysvals.synccolor = value
+			elif(opt.lower() == 'output-dir'):
+				args = dict()
+				n = datetime.now()
+				args['date'] = n.strftime('%y%m%d')
+				args['time'] = n.strftime('%H%M%S')
+				args['hostname'] = sysvals.hostname
+				sysvals.outdir = value.format(**args)
+
+	if sysvals.suspendmode == 'command' and not sysvals.testcommand:
+		doError('No command supplied for mode "command"', False)
+	if sysvals.usedevsrc and sysvals.usecallgraph:
+		doError('dev and callgraph cannot both be true', False)
+	if sysvals.usecallgraph and sysvals.execcount > 1:
+		doError('-x2 is not compatible with -f', False)
+
+	if ignorekprobes:
+		return
+
+	kprobes = dict()
+	archkprobe = 'Kprobe_'+platform.machine()
+	if archkprobe in sections:
+		for name in Config.options(archkprobe):
+			kprobes[name] = Config.get(archkprobe, name)
+	if 'Kprobe' in sections:
+		for name in Config.options('Kprobe'):
+			kprobes[name] = Config.get('Kprobe', name)
+
+	for name in kprobes:
+		function = name
+		format = name
+		color = ''
+		args = dict()
+		data = kprobes[name].split()
+		i = 0
+		for val in data:
+			# bracketted strings are special formatting, read them separately
+			if val[0] == '[' and val[-1] == ']':
+				for prop in val[1:-1].split(','):
+					p = prop.split('=')
+					if p[0] == 'color':
+						try:
+							color = int(p[1], 16)
+							color = '#'+p[1]
+						except:
+							color = p[1]
+				continue
+			# first real arg should be the format string
+			if i == 0:
+				format = val
+			# all other args are actual function args
+			else:
+				d = val.split('=')
+				args[d[0]] = d[1]
+			i += 1
+		if not function or not format:
+			doError('Invalid kprobe: %s' % name, False)
+		for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', format):
+			if arg not in args:
+				doError('Kprobe "%s" is missing argument "%s"' % (name, arg), False)
+		if name in sysvals.kprobes:
+			doError('Duplicate kprobe found "%s"' % (name), False)
+		vprint('Adding KPROBE: %s %s %s %s' % (name, function, format, args))
+		sysvals.kprobes[name] = {
+			'name': name,
+			'func': function,
+			'format': format,
+			'args': args,
+			'mask': re.sub('{(?P<n>[a-z,A-Z,0-9]*)}', '.*', format)
+		}
+		if color:
+			sysvals.kprobes[name]['color'] = color
+
 # Function: printHelp
 # Description:
 #	 print out the help text
@@ -3375,7 +4651,7 @@ def printHelp():
 	modes = getModes()
 
 	print('')
-	print('AnalyzeSuspend v%.1f' % sysvals.version)
+	print('AnalyzeSuspend v%s' % sysvals.version)
 	print('Usage: sudo analyze_suspend.py <options>')
 	print('')
 	print('Description:')
@@ -3396,27 +4672,38 @@ def printHelp():
 	print('  [general]')
 	print('    -h          Print this help text')
 	print('    -v          Print the current tool version')
+	print('    -config file Pull arguments and config options from a file')
 	print('    -verbose    Print extra information during execution and analysis')
 	print('    -status     Test to see if the system is enabled to run this tool')
 	print('    -modes      List available suspend modes')
 	print('    -m mode     Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode)
-	print('    -rtcwake t  Use rtcwake to autoresume after <t> seconds (default: disabled)')
+	print('    -o subdir   Override the output subdirectory')
 	print('  [advanced]')
+	print('    -rtcwake t  Use rtcwake to autoresume after <t> seconds (default: disabled)')
+	print('    -addlogs    Add the dmesg and ftrace logs to the html output')
+	print('    -multi n d  Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
+	print('                be created in a new subdirectory with a summary page.')
+	print('    -srgap      Add a visible gap in the timeline between sus/res (default: disabled)')
+	print('    -cmd {s}    Instead of suspend/resume, run a command, e.g. "sync -d"')
+	print('    -mindev ms  Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)')
+	print('    -mincg  ms  Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
+	print('    -timeprec N Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
+	print('  [debug]')
 	print('    -f          Use ftrace to create device callgraphs (default: disabled)')
-	print('    -filter "d1 d2 ..." Filter out all but this list of dev names')
+	print('    -expandcg   pre-expand the callgraph data in the html output (default: disabled)')
+	print('    -flist      Print the list of functions currently being captured in ftrace')
+	print('    -flistall   Print all functions capable of being captured in ftrace')
+	print('    -fadd file  Add functions to be graphed in the timeline from a list in a text file')
+	print('    -filter "d1 d2 ..." Filter out all but this list of device names')
+	print('    -dev        Display common low level functions in the timeline')
+	print('  [post-resume task analysis]')
 	print('    -x2         Run two suspend/resumes back to back (default: disabled)')
 	print('    -x2delay t  Minimum millisecond delay <t> between the two test runs (default: 0 ms)')
 	print('    -postres t  Time after resume completion to wait for post-resume events (default: 0 S)')
-	print('    -multi n d  Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
-	print('                be created in a new subdirectory with a summary page.')
 	print('  [utilities]')
 	print('    -fpdt       Print out the contents of the ACPI Firmware Performance Data Table')
 	print('    -usbtopo    Print out the current USB topology with power info')
 	print('    -usbauto    Enable autosuspend for all connected USB devices')
-	print('  [android testing]')
-	print('    -adb binary Use the given adb binary to run the test on an android device.')
-	print('                The device should already be connected and with root access.')
-	print('                Commands will be executed on the device using "adb shell"')
 	print('  [re-analyze data from previous runs]')
 	print('    -ftrace ftracefile  Create HTML output using ftrace input')
 	print('    -dmesg dmesgfile    Create HTML output using dmesg (not needed for kernel >= 3.15)')
@@ -3430,6 +4717,7 @@ if __name__ == '__main__':
 	cmd = ''
 	cmdarg = ''
 	multitest = {'run': False, 'count': 0, 'delay': 0}
+	simplecmds = ['-modes', '-fpdt', '-flist', '-flistall', '-usbtopo', '-usbauto', '-status']
 	# loop through the command line arguments
 	args = iter(sys.argv[1:])
 	for arg in args:
@@ -3438,58 +4726,85 @@ if __name__ == '__main__':
 				val = args.next()
 			except:
 				doError('No mode supplied', True)
+			if val == 'command' and not sysvals.testcommand:
+				doError('No command supplied for mode "command"', True)
 			sysvals.suspendmode = val
-		elif(arg == '-adb'):
-			try:
-				val = args.next()
-			except:
-				doError('No adb binary supplied', True)
-			if(not os.path.exists(val)):
-				doError('file doesnt exist: %s' % val, False)
-			if(not os.access(val, os.X_OK)):
-				doError('file isnt executable: %s' % val, False)
-			try:
-				check = os.popen(val+' version').read().strip()
-			except:
-				doError('adb version failed to execute', False)
-			if(not re.match('Android Debug Bridge .*', check)):
-				doError('adb version failed to execute', False)
-			sysvals.adb = val
-			sysvals.android = True
+		elif(arg in simplecmds):
+			cmd = arg[1:]
+		elif(arg == '-h'):
+			printHelp()
+			sys.exit()
+		elif(arg == '-v'):
+			print("Version %s" % sysvals.version)
+			sys.exit()
 		elif(arg == '-x2'):
-			if(sysvals.postresumetime > 0):
-				doError('-x2 is not compatible with -postres', False)
 			sysvals.execcount = 2
+			if(sysvals.usecallgraph):
+				doError('-x2 is not compatible with -f', False)
 		elif(arg == '-x2delay'):
 			sysvals.x2delay = getArgInt('-x2delay', args, 0, 60000)
 		elif(arg == '-postres'):
-			if(sysvals.execcount != 1):
-				doError('-x2 is not compatible with -postres', False)
 			sysvals.postresumetime = getArgInt('-postres', args, 0, 3600)
 		elif(arg == '-f'):
 			sysvals.usecallgraph = True
-		elif(arg == '-modes'):
-			cmd = 'modes'
-		elif(arg == '-fpdt'):
-			cmd = 'fpdt'
-		elif(arg == '-usbtopo'):
-			cmd = 'usbtopo'
-		elif(arg == '-usbauto'):
-			cmd = 'usbauto'
-		elif(arg == '-status'):
-			cmd = 'status'
+			if(sysvals.execcount > 1):
+				doError('-x2 is not compatible with -f', False)
+			if(sysvals.usedevsrc):
+				doError('-dev is not compatible with -f', False)
+		elif(arg == '-addlogs'):
+			sysvals.addlogs = True
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
-		elif(arg == '-v'):
-			print("Version %.1f" % sysvals.version)
-			sys.exit()
+		elif(arg == '-dev'):
+			sysvals.usedevsrc = True
+			if(sysvals.usecallgraph):
+				doError('-dev is not compatible with -f', False)
 		elif(arg == '-rtcwake'):
 			sysvals.rtcwake = True
 			sysvals.rtcwaketime = getArgInt('-rtcwake', args, 0, 3600)
+		elif(arg == '-timeprec'):
+			sysvals.setPrecision(getArgInt('-timeprec', args, 0, 6))
+		elif(arg == '-mindev'):
+			sysvals.mindevlen = getArgFloat('-mindev', args, 0.0, 10000.0)
+		elif(arg == '-mincg'):
+			sysvals.mincglen = getArgFloat('-mincg', args, 0.0, 10000.0)
+		elif(arg == '-cmd'):
+			try:
+				val = args.next()
+			except:
+				doError('No command string supplied', True)
+			sysvals.testcommand = val
+			sysvals.suspendmode = 'command'
+		elif(arg == '-expandcg'):
+			sysvals.cgexp = True
+		elif(arg == '-srgap'):
+			sysvals.srgap = 5
 		elif(arg == '-multi'):
 			multitest['run'] = True
 			multitest['count'] = getArgInt('-multi n (exec count)', args, 2, 1000000)
 			multitest['delay'] = getArgInt('-multi d (delay between tests)', args, 0, 3600)
+		elif(arg == '-o'):
+			try:
+				val = args.next()
+			except:
+				doError('No subdirectory name supplied', True)
+			sysvals.outdir = val
+		elif(arg == '-config'):
+			try:
+				val = args.next()
+			except:
+				doError('No text file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val, False)
+			configFromFile(val)
+		elif(arg == '-fadd'):
+			try:
+				val = args.next()
+			except:
+				doError('No text file supplied', True)
+			if(os.path.exists(val) == False):
+				doError('%s does not exist' % val, False)
+			sysvals.addFtraceFilterFunctions(val)
 		elif(arg == '-dmesg'):
 			try:
 				val = args.next()
@@ -3498,17 +4813,16 @@ if __name__ == '__main__':
 			sysvals.notestrun = True
 			sysvals.dmesgfile = val
 			if(os.path.exists(sysvals.dmesgfile) == False):
-				doError('%s doesnt exist' % sysvals.dmesgfile, False)
+				doError('%s does not exist' % sysvals.dmesgfile, False)
 		elif(arg == '-ftrace'):
 			try:
 				val = args.next()
 			except:
 				doError('No ftrace file supplied', True)
 			sysvals.notestrun = True
-			sysvals.usecallgraph = True
 			sysvals.ftracefile = val
 			if(os.path.exists(sysvals.ftracefile) == False):
-				doError('%s doesnt exist' % sysvals.ftracefile, False)
+				doError('%s does not exist' % sysvals.ftracefile, False)
 		elif(arg == '-summary'):
 			try:
 				val = args.next()
@@ -3518,35 +4832,35 @@ if __name__ == '__main__':
 			cmdarg = val
 			sysvals.notestrun = True
 			if(os.path.isdir(val) == False):
-				doError('%s isnt accesible' % val, False)
+				doError('%s is not accesible' % val, False)
 		elif(arg == '-filter'):
 			try:
 				val = args.next()
 			except:
 				doError('No devnames supplied', True)
 			sysvals.setDeviceFilter(val)
-		elif(arg == '-h'):
-			printHelp()
-			sys.exit()
 		else:
 			doError('Invalid argument: '+arg, True)
 
+	# callgraph size cannot exceed device size
+	if sysvals.mincglen < sysvals.mindevlen:
+		sysvals.mincglen = sysvals.mindevlen
+
 	# just run a utility command and exit
 	if(cmd != ''):
 		if(cmd == 'status'):
-			statusCheck()
+			statusCheck(True)
 		elif(cmd == 'fpdt'):
-			if(sysvals.android):
-				doError('cannot read FPDT on android device', False)
 			getFPDT(True)
 		elif(cmd == 'usbtopo'):
-			if(sysvals.android):
-				doError('cannot read USB topology '+\
-					'on an android device', False)
-			detectUSB(True)
+			detectUSB()
 		elif(cmd == 'modes'):
 			modes = getModes()
 			print modes
+		elif(cmd == 'flist'):
+			sysvals.getFtraceFilterFunctions(True)
+		elif(cmd == 'flistall'):
+			sysvals.getFtraceFilterFunctions(False)
 		elif(cmd == 'usbauto'):
 			setUSBDevicesAuto()
 		elif(cmd == 'summary'):
@@ -3554,15 +4868,6 @@ if __name__ == '__main__':
 			runSummary(cmdarg, True)
 		sys.exit()
 
-	# run test on android device
-	if(sysvals.android):
-		if(sysvals.usecallgraph):
-			doError('ftrace (-f) is not yet supported '+\
-				'in the android kernel', False)
-		if(sysvals.notestrun):
-			doError('cannot analyze test files on the '+\
-				'android device', False)
-
 	# if instructed, re-analyze existing data files
 	if(sysvals.notestrun):
 		rerunTest()
@@ -3574,18 +4879,20 @@ if __name__ == '__main__':
 		sys.exit()
 
 	if multitest['run']:
-		# run multiple tests in a separte subdirectory
+		# run multiple tests in a separate subdirectory
 		s = 'x%d' % multitest['count']
-		subdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S')
-		os.mkdir(subdir)
+		if not sysvals.outdir:
+			sysvals.outdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S')
+		if not os.path.isdir(sysvals.outdir):
+			os.mkdir(sysvals.outdir)
 		for i in range(multitest['count']):
 			if(i != 0):
 				print('Waiting %d seconds...' % (multitest['delay']))
 				time.sleep(multitest['delay'])
 			print('TEST (%d/%d) START' % (i+1, multitest['count']))
-			runTest(subdir)
+			runTest(sysvals.outdir)
 			print('TEST (%d/%d) COMPLETE' % (i+1, multitest['count']))
-		runSummary(subdir, False)
+		runSummary(sysvals.outdir, False)
 	else:
 		# run the test in the current directory
-		runTest(".")
+		runTest('.', sysvals.outdir)
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index e367b1a85d70..8561e7ddca59 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -1,7 +1,7 @@
 CC		= $(CROSS_COMPILE)gcc
 BUILD_OUTPUT	:= $(CURDIR)
-PREFIX		:= /usr
-DESTDIR		:=
+PREFIX		?= /usr
+DESTDIR		?=
 
 ifeq ("$(origin O)", "command line")
 	BUILD_OUTPUT := $(O)
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 89a55d5e32f3..492e84fbebfa 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -123,7 +123,7 @@ cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
 35 * 100 = 3500 MHz TSC frequency
 cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
 cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
-cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
+cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727
 37 * 100 = 3700 MHz max turbo 4 active cores
 38 * 100 = 3800 MHz max turbo 3 active cores
 39 * 100 = 3900 MHz max turbo 2 active cores
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index acbf7ff2ee6e..3e199b508a96 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1480,7 +1480,7 @@ dump_knl_turbo_ratio_limits(void)
 	unsigned int cores[buckets_no];
 	unsigned int ratio[buckets_no];
 
-	get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+	get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
 
 	fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
 		base_cpu, msr);