Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/suspend-2.6

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/suspend-2.6: (26 commits) PM / Wakeup: Show wakeup sources statistics in debugfs PM: Introduce library for device-specific OPPs (v7) PM: Add sysfs attr for rechecking dev hash from PM trace PM: Lock PM device list mutex in show_dev_hash() PM / Runtime: Remove idle notification after failing suspend PM / Hibernate: Modify signature used to mark swap PM / Runtime: Reduce code duplication in core helper functions PM: Allow wakeup events to abort freezing of tasks PM: runtime: add missed pm_request_autosuspend PM / Hibernate: Make some boot messages look less scary PM / Runtime: Implement autosuspend support PM / Runtime: Add no_callbacks flag PM / Runtime: Combine runtime PM entry points PM / Runtime: Merge synchronous and async runtime routines PM / Runtime: Replace boolean arguments with bitflags PM / Runtime: Move code in drivers/base/power/runtime.c sysfs: Add sysfs_merge_group() and sysfs_unmerge_group() PM: Fix potential issue with failing asynchronous suspend PM / Wakeup: Introduce wakeup source objects and event statistics (v3) PM: Fix signed/unsigned warning in dpm_show_time() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2010-10-21 14:53:17 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-10-21 14:53:17 -0700
commit: a8cbf22559ceefdcdfac00701e8e6da7518b7e8e (patch)
tree: 63ebd5779a37f809f7daed77dbf27aa3f1e1110c /drivers
parent: e36f561a2c88394ef2708f1ab300fe8a79e9f651 (diff)
parent: 9c034392533f3e9f00656d5c58478cff2560ef81 (diff)
download: linux-a8cbf22559ceefdcdfac00701e8e6da7518b7e8e.tar.gz
9 files changed, 1927 insertions, 539 deletions
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index cbccf9a3cee4..abe46edfe5b4 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
 obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_OPS)	+= generic_ops.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
+obj-$(CONFIG_PM_OPP)	+= opp.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 ccflags-$(CONFIG_PM_VERBOSE)   += -DDEBUG
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 4b29d4981253..81f2c84697f4 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -46,7 +46,7 @@ int pm_generic_runtime_suspend(struct device *dev)
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int ret;
 
-	ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : -EINVAL;
+	ret = pm && pm->runtime_suspend ? pm->runtime_suspend(dev) : 0;
 
 	return ret;
 }
@@ -65,7 +65,7 @@ int pm_generic_runtime_resume(struct device *dev)
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int ret;
 
-	ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : -EINVAL;
+	ret = pm && pm->runtime_resume ? pm->runtime_resume(dev) : 0;
 
 	return ret;
 }
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 276d5a701dc3..31b526661ec4 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -51,6 +51,8 @@ static pm_message_t pm_transition;
  */
 static bool transition_started;
 
+static int async_error;
+
 /**
  * device_pm_init - Initialize the PM-related part of a device object.
  * @dev: Device object being initialized.
@@ -60,7 +62,8 @@ void device_pm_init(struct device *dev)
 	dev->power.status = DPM_ON;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
-	dev->power.wakeup_count = 0;
+	dev->power.wakeup = NULL;
+	spin_lock_init(&dev->power.lock);
 	pm_runtime_init(dev);
 }
 
@@ -120,6 +123,7 @@ void device_pm_remove(struct device *dev)
 	mutex_lock(&dpm_list_mtx);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	device_wakeup_disable(dev);
 	pm_runtime_remove(dev);
 }
 
@@ -407,7 +411,7 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info,
 static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info)
 {
 	ktime_t calltime;
-	s64 usecs64;
+	u64 usecs64;
 	int usecs;
 
 	calltime = ktime_get();
@@ -600,6 +604,7 @@ static void dpm_resume(pm_message_t state)
 	INIT_LIST_HEAD(&list);
 	mutex_lock(&dpm_list_mtx);
 	pm_transition = state;
+	async_error = 0;
 
 	list_for_each_entry(dev, &dpm_list, power.entry) {
 		if (dev->power.status < DPM_OFF)
@@ -829,8 +834,6 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
 	return error;
 }
 
-static int async_error;
-
 /**
  * device_suspend - Execute "suspend" callbacks for given device.
  * @dev: Device to handle.
@@ -885,6 +888,9 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
+	if (error)
+		async_error = error;
+
 	return error;
 }
 
@@ -894,10 +900,8 @@ static void async_suspend(void *data, async_cookie_t cookie)
 	int error;
 
 	error = __device_suspend(dev, pm_transition, true);
-	if (error) {
+	if (error)
 		pm_dev_err(dev, pm_transition, " async", error);
-		async_error = error;
-	}
 
 	put_device(dev);
 }
@@ -1085,8 +1089,9 @@ EXPORT_SYMBOL_GPL(__suspend_report_result);
  * @dev: Device to wait for.
  * @subordinate: Device that needs to wait for @dev.
  */
-void device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
+int device_pm_wait_for_dev(struct device *subordinate, struct device *dev)
 {
 	dpm_wait(dev, subordinate->power.async_suspend);
+	return async_error;
 }
 EXPORT_SYMBOL_GPL(device_pm_wait_for_dev);
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
new file mode 100644
index 000000000000..2bb9b4cf59d7
--- /dev/null
+++ b/drivers/base/power/opp.c
@@ -0,0 +1,628 @@
+/*
+ * Generic OPP Interface
+ *
+ * Copyright (C) 2009-2010 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/opp.h>
+
+/*
+ * Internal data structure organization with the OPP layer library is as
+ * follows:
+ * dev_opp_list (root)
+ *	|- device 1 (represents voltage domain 1)
+ *	|	|- opp 1 (availability, freq, voltage)
+ *	|	|- opp 2 ..
+ *	...	...
+ *	|	`- opp n ..
+ *	|- device 2 (represents the next voltage domain)
+ *	...
+ *	`- device m (represents mth voltage domain)
+ * device 1, 2.. are represented by dev_opp structure while each opp
+ * is represented by the opp structure.
+ */
+
+/**
+ * struct opp - Generic OPP description structure
+ * @node:	opp list node. The nodes are maintained throughout the lifetime
+ *		of boot. It is expected only an optimal set of OPPs are
+ *		added to the library by the SoC framework.
+ *		RCU usage: opp list is traversed with RCU locks. node
+ *		modification is possible realtime, hence the modifications
+ *		are protected by the dev_opp_list_lock for integrity.
+ *		IMPORTANT: the opp nodes should be maintained in increasing
+ *		order.
+ * @available:	true/false - marks if this OPP as available or not
+ * @rate:	Frequency in hertz
+ * @u_volt:	Nominal voltage in microvolts corresponding to this OPP
+ * @dev_opp:	points back to the device_opp struct this opp belongs to
+ *
+ * This structure stores the OPP information for a given device.
+ */
+struct opp {
+	struct list_head node;
+
+	bool available;
+	unsigned long rate;
+	unsigned long u_volt;
+
+	struct device_opp *dev_opp;
+};
+
+/**
+ * struct device_opp - Device opp structure
+ * @node:	list node - contains the devices with OPPs that
+ *		have been registered. Nodes once added are not modified in this
+ *		list.
+ *		RCU usage: nodes are not modified in the list of device_opp,
+ *		however addition is possible and is secured by dev_opp_list_lock
+ * @dev:	device pointer
+ * @opp_list:	list of opps
+ *
+ * This is an internal data structure maintaining the link to opps attached to
+ * a device. This structure is not meant to be shared to users as it is
+ * meant for book keeping and private to OPP library
+ */
+struct device_opp {
+	struct list_head node;
+
+	struct device *dev;
+	struct list_head opp_list;
+};
+
+/*
+ * The root of the list of all devices. All device_opp structures branch off
+ * from here, with each device_opp containing the list of opp it supports in
+ * various states of availability.
+ */
+static LIST_HEAD(dev_opp_list);
+/* Lock to allow exclusive modification to the device and opp lists */
+static DEFINE_MUTEX(dev_opp_list_lock);
+
+/**
+ * find_device_opp() - find device_opp struct using device pointer
+ * @dev:	device pointer used to lookup device OPPs
+ *
+ * Search list of device OPPs for one containing matching device. Does a RCU
+ * reader operation to grab the pointer needed.
+ *
+ * Returns pointer to 'struct device_opp' if found, otherwise -ENODEV or
+ * -EINVAL based on type of error.
+ *
+ * Locking: This function must be called under rcu_read_lock(). device_opp
+ * is a RCU protected pointer. This means that device_opp is valid as long
+ * as we are under RCU lock.
+ */
+static struct device_opp *find_device_opp(struct device *dev)
+{
+	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
+
+	if (unlikely(IS_ERR_OR_NULL(dev))) {
+		pr_err("%s: Invalid parameters\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+	list_for_each_entry_rcu(tmp_dev_opp, &dev_opp_list, node) {
+		if (tmp_dev_opp->dev == dev) {
+			dev_opp = tmp_dev_opp;
+			break;
+		}
+	}
+
+	return dev_opp;
+}
+
+/**
+ * opp_get_voltage() - Gets the voltage corresponding to an available opp
+ * @opp:	opp for which voltage has to be returned for
+ *
+ * Return voltage in micro volt corresponding to the opp, else
+ * return 0
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. This means that opp which could have been fetched by
+ * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
+ * under RCU lock. The pointer returned by the opp_find_freq family must be
+ * used in the same section as the usage of this function with the pointer
+ * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
+ * pointer.
+ */
+unsigned long opp_get_voltage(struct opp *opp)
+{
+	struct opp *tmp_opp;
+	unsigned long v = 0;
+
+	tmp_opp = rcu_dereference(opp);
+	if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
+		pr_err("%s: Invalid parameters\n", __func__);
+	else
+		v = tmp_opp->u_volt;
+
+	return v;
+}
+
+/**
+ * opp_get_freq() - Gets the frequency corresponding to an available opp
+ * @opp:	opp for which frequency has to be returned for
+ *
+ * Return frequency in hertz corresponding to the opp, else
+ * return 0
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. This means that opp which could have been fetched by
+ * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are
+ * under RCU lock. The pointer returned by the opp_find_freq family must be
+ * used in the same section as the usage of this function with the pointer
+ * prior to unlocking with rcu_read_unlock() to maintain the integrity of the
+ * pointer.
+ */
+unsigned long opp_get_freq(struct opp *opp)
+{
+	struct opp *tmp_opp;
+	unsigned long f = 0;
+
+	tmp_opp = rcu_dereference(opp);
+	if (unlikely(IS_ERR_OR_NULL(tmp_opp)) || !tmp_opp->available)
+		pr_err("%s: Invalid parameters\n", __func__);
+	else
+		f = tmp_opp->rate;
+
+	return f;
+}
+
+/**
+ * opp_get_opp_count() - Get number of opps available in the opp list
+ * @dev:	device for which we do this operation
+ *
+ * This function returns the number of available opps if there are any,
+ * else returns 0 if none or the corresponding error value.
+ *
+ * Locking: This function must be called under rcu_read_lock(). This function
+ * internally references two RCU protected structures: device_opp and opp which
+ * are safe as long as we are under a common RCU locked section.
+ */
+int opp_get_opp_count(struct device *dev)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp;
+	int count = 0;
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int r = PTR_ERR(dev_opp);
+		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
+		return r;
+	}
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available)
+			count++;
+	}
+
+	return count;
+}
+
+/**
+ * opp_find_freq_exact() - search for an exact frequency
+ * @dev:		device for which we do this operation
+ * @freq:		frequency to search for
+ * @is_available:	true/false - match for available opp
+ *
+ * Searches for exact match in the opp list and returns pointer to the matching
+ * opp if found, else returns ERR_PTR in case of error and should be handled
+ * using IS_ERR.
+ *
+ * Note: available is a modifier for the search. if available=true, then the
+ * match is for exact matching frequency and is available in the stored OPP
+ * table. if false, the match is for exact frequency which is not available.
+ *
+ * This provides a mechanism to enable an opp which is not available currently
+ * or the opposite as well.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct opp *opp_find_freq_exact(struct device *dev, unsigned long freq,
+				bool available)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int r = PTR_ERR(dev_opp);
+		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
+		return ERR_PTR(r);
+	}
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available == available &&
+				temp_opp->rate == freq) {
+			opp = temp_opp;
+			break;
+		}
+	}
+
+	return opp;
+}
+
+/**
+ * opp_find_freq_ceil() - Search for an rounded ceil freq
+ * @dev:	device for which we do this operation
+ * @freq:	Start frequency
+ *
+ * Search for the matching ceil *available* OPP from a starting freq
+ * for a device.
+ *
+ * Returns matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct opp *opp_find_freq_ceil(struct device *dev, unsigned long *freq)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+
+	if (!dev || !freq) {
+		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
+		return ERR_PTR(-EINVAL);
+	}
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		return opp;
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available && temp_opp->rate >= *freq) {
+			opp = temp_opp;
+			*freq = opp->rate;
+			break;
+		}
+	}
+
+	return opp;
+}
+
+/**
+ * opp_find_freq_floor() - Search for a rounded floor freq
+ * @dev:	device for which we do this operation
+ * @freq:	Start frequency
+ *
+ * Search for the matching floor *available* OPP from a starting freq
+ * for a device.
+ *
+ * Returns matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR.
+ *
+ * Locking: This function must be called under rcu_read_lock(). opp is a rcu
+ * protected pointer. The reason for the same is that the opp pointer which is
+ * returned will remain valid for use with opp_get_{voltage, freq} only while
+ * under the locked area. The pointer returned must be used prior to unlocking
+ * with rcu_read_unlock() to maintain the integrity of the pointer.
+ */
+struct opp *opp_find_freq_floor(struct device *dev, unsigned long *freq)
+{
+	struct device_opp *dev_opp;
+	struct opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+
+	if (!dev || !freq) {
+		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
+		return ERR_PTR(-EINVAL);
+	}
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		return opp;
+
+	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
+		if (temp_opp->available) {
+			/* go to the next node, before choosing prev */
+			if (temp_opp->rate > *freq)
+				break;
+			else
+				opp = temp_opp;
+		}
+	}
+	if (!IS_ERR(opp))
+		*freq = opp->rate;
+
+	return opp;
+}
+
+/**
+ * opp_add()  - Add an OPP table from a table definitions
+ * @dev:	device for which we do this operation
+ * @freq:	Frequency in Hz for this OPP
+ * @u_volt:	Voltage in uVolts for this OPP
+ *
+ * This function adds an opp definition to the opp list and returns status.
+ * The opp is made available by default and it can be controlled using
+ * opp_enable/disable functions.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks
+ * to keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex cannot be locked.
+ */
+int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
+{
+	struct device_opp *dev_opp = NULL;
+	struct opp *opp, *new_opp;
+	struct list_head *head;
+
+	/* allocate new OPP node */
+	new_opp = kzalloc(sizeof(struct opp), GFP_KERNEL);
+	if (!new_opp) {
+		dev_warn(dev, "%s: Unable to create new OPP node\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Check for existing list for 'dev' */
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		/*
+		 * Allocate a new device OPP table. In the infrequent case
+		 * where a new device is needed to be added, we pay this
+		 * penalty.
+		 */
+		dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
+		if (!dev_opp) {
+			mutex_unlock(&dev_opp_list_lock);
+			kfree(new_opp);
+			dev_warn(dev,
+				"%s: Unable to create device OPP structure\n",
+				__func__);
+			return -ENOMEM;
+		}
+
+		dev_opp->dev = dev;
+		INIT_LIST_HEAD(&dev_opp->opp_list);
+
+		/* Secure the device list modification */
+		list_add_rcu(&dev_opp->node, &dev_opp_list);
+	}
+
+	/* populate the opp table */
+	new_opp->dev_opp = dev_opp;
+	new_opp->rate = freq;
+	new_opp->u_volt = u_volt;
+	new_opp->available = true;
+
+	/* Insert new OPP in order of increasing frequency */
+	head = &dev_opp->opp_list;
+	list_for_each_entry_rcu(opp, &dev_opp->opp_list, node) {
+		if (new_opp->rate < opp->rate)
+			break;
+		else
+			head = &opp->node;
+	}
+
+	list_add_rcu(&new_opp->node, head);
+	mutex_unlock(&dev_opp_list_lock);
+
+	return 0;
+}
+
+/**
+ * opp_set_availability() - helper to set the availability of an opp
+ * @dev:		device for which we do this operation
+ * @freq:		OPP frequency to modify availability
+ * @availability_req:	availability status requested for this opp
+ *
+ * Set the availability of an OPP with an RCU operation, opp_{enable,disable}
+ * share a common logic which is isolated here.
+ *
+ * Returns -EINVAL for bad pointers, -ENOMEM if no memory available for the
+ * copy operation, returns 0 if no modifcation was done OR modification was
+ * successful.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function internally uses RCU updater strategy with mutex locks to
+ * keep the integrity of the internal data structures. Callers should ensure
+ * that this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ */
+static int opp_set_availability(struct device *dev, unsigned long freq,
+		bool availability_req)
+{
+	struct device_opp *tmp_dev_opp, *dev_opp = NULL;
+	struct opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
+	int r = 0;
+
+	/* keep the node allocated */
+	new_opp = kmalloc(sizeof(struct opp), GFP_KERNEL);
+	if (!new_opp) {
+		dev_warn(dev, "%s: Unable to create OPP\n", __func__);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Find the device_opp */
+	list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) {
+		if (dev == tmp_dev_opp->dev) {
+			dev_opp = tmp_dev_opp;
+			break;
+		}
+	}
+	if (IS_ERR(dev_opp)) {
+		r = PTR_ERR(dev_opp);
+		dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
+		goto unlock;
+	}
+
+	/* Do we have the frequency? */
+	list_for_each_entry(tmp_opp, &dev_opp->opp_list, node) {
+		if (tmp_opp->rate == freq) {
+			opp = tmp_opp;
+			break;
+		}
+	}
+	if (IS_ERR(opp)) {
+		r = PTR_ERR(opp);
+		goto unlock;
+	}
+
+	/* Is update really needed? */
+	if (opp->available == availability_req)
+		goto unlock;
+	/* copy the old data over */
+	*new_opp = *opp;
+
+	/* plug in new node */
+	new_opp->available = availability_req;
+
+	list_replace_rcu(&opp->node, &new_opp->node);
+	mutex_unlock(&dev_opp_list_lock);
+	synchronize_rcu();
+
+	/* clean up old opp */
+	new_opp = opp;
+	goto out;
+
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+out:
+	kfree(new_opp);
+	return r;
+}
+
+/**
+ * opp_enable() - Enable a specific OPP
+ * @dev:	device for which we do this operation
+ * @freq:	OPP frequency to enable
+ *
+ * Enables a provided opp. If the operation is valid, this returns 0, else the
+ * corresponding error value. It is meant to be used for users an OPP available
+ * after being temporarily made unavailable with opp_disable.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU and mutex locks to keep the
+ * integrity of the internal data structures. Callers should ensure that
+ * this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ */
+int opp_enable(struct device *dev, unsigned long freq)
+{
+	return opp_set_availability(dev, freq, true);
+}
+
+/**
+ * opp_disable() - Disable a specific OPP
+ * @dev:	device for which we do this operation
+ * @freq:	OPP frequency to disable
+ *
+ * Disables a provided opp. If the operation is valid, this returns
+ * 0, else the corresponding error value. It is meant to be a temporary
+ * control by users to make this OPP not available until the circumstances are
+ * right to make it available again (with a call to opp_enable).
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Hence this function indirectly uses RCU and mutex locks to keep the
+ * integrity of the internal data structures. Callers should ensure that
+ * this function is *NOT* called under RCU protection or in contexts where
+ * mutex locking or synchronize_rcu() blocking calls cannot be used.
+ */
+int opp_disable(struct device *dev, unsigned long freq)
+{
+	return opp_set_availability(dev, freq, false);
+}
+
+#ifdef CONFIG_CPU_FREQ
+/**
+ * opp_init_cpufreq_table() - create a cpufreq table for a device
+ * @dev:	device for which we do this operation
+ * @table:	Cpufreq table returned back to caller
+ *
+ * Generate a cpufreq table for a provided device- this assumes that the
+ * opp list is already initialized and ready for usage.
+ *
+ * This function allocates required memory for the cpufreq table. It is
+ * expected that the caller does the required maintenance such as freeing
+ * the table as required.
+ *
+ * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
+ * if no memory available for the operation (table is not populated), returns 0
+ * if successful and table is populated.
+ *
+ * WARNING: It is  important for the callers to ensure refreshing their copy of
+ * the table if any of the mentioned functions have been invoked in the interim.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * To simplify the logic, we pretend we are updater and hold relevant mutex here
+ * Callers should ensure that this function is *NOT* called under RCU protection
+ * or in contexts where mutex locking cannot be used.
+ */
+int opp_init_cpufreq_table(struct device *dev,
+			    struct cpufreq_frequency_table **table)
+{
+	struct device_opp *dev_opp;
+	struct opp *opp;
+	struct cpufreq_frequency_table *freq_table;
+	int i = 0;
+
+	/* Pretend as if I am an updater */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp)) {
+		int r = PTR_ERR(dev_opp);
+		mutex_unlock(&dev_opp_list_lock);
+		dev_err(dev, "%s: Device OPP not found (%d)\n", __func__, r);
+		return r;
+	}
+
+	freq_table = kzalloc(sizeof(struct cpufreq_frequency_table) *
+			     (opp_get_opp_count(dev) + 1), GFP_KERNEL);
+	if (!freq_table) {
+		mutex_unlock(&dev_opp_list_lock);
+		dev_warn(dev, "%s: Unable to allocate frequency table\n",
+			__func__);
+		return -ENOMEM;
+	}
+
+	list_for_each_entry(opp, &dev_opp->opp_list, node) {
+		if (opp->available) {
+			freq_table[i].index = i;
+			freq_table[i].frequency = opp->rate / 1000;
+			i++;
+		}
+	}
+	mutex_unlock(&dev_opp_list_lock);
+
+	freq_table[i].index = i;
+	freq_table[i].frequency = CPUFREQ_TABLE_END;
+
+	*table = &freq_table[0];
+
+	return 0;
+}
+#endif		/* CONFIG_CPU_FREQ */
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index c0bd03c83b9c..698dde742587 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -34,6 +34,7 @@ extern void device_pm_move_last(struct device *);
 
 static inline void device_pm_init(struct device *dev)
 {
+	spin_lock_init(&dev->power.lock);
 	pm_runtime_init(dev);
 }
 
@@ -59,6 +60,7 @@ static inline void device_pm_move_last(struct device *dev) {}
 
 extern int dpm_sysfs_add(struct device *);
 extern void dpm_sysfs_remove(struct device *);
+extern void rpm_sysfs_remove(struct device *);
 
 #else /* CONFIG_PM */
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b78c401ffa73..1dd8676d7f55 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -2,17 +2,55 @@
  * drivers/base/power/runtime.c - Helper functions for device run-time PM
  *
  * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ * Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu>
  *
  * This file is released under the GPLv2.
  */
 
 #include <linux/sched.h>
 #include <linux/pm_runtime.h>
-#include <linux/jiffies.h>
+#include "power.h"
 
-static int __pm_runtime_resume(struct device *dev, bool from_wq);
-static int __pm_request_idle(struct device *dev);
-static int __pm_request_resume(struct device *dev);
+static int rpm_resume(struct device *dev, int rpmflags);
+static int rpm_suspend(struct device *dev, int rpmflags);
+
+/**
+ * update_pm_runtime_accounting - Update the time accounting of power states
+ * @dev: Device to update the accounting for
+ *
+ * In order to be able to have time accounting of the various power states
+ * (as used by programs such as PowerTOP to show the effectiveness of runtime
+ * PM), we need to track the time spent in each state.
+ * update_pm_runtime_accounting must be called each time before the
+ * runtime_status field is updated, to account the time in the old state
+ * correctly.
+ */
+void update_pm_runtime_accounting(struct device *dev)
+{
+	unsigned long now = jiffies;
+	int delta;
+
+	delta = now - dev->power.accounting_timestamp;
+
+	if (delta < 0)
+		delta = 0;
+
+	dev->power.accounting_timestamp = now;
+
+	if (dev->power.disable_depth > 0)
+		return;
+
+	if (dev->power.runtime_status == RPM_SUSPENDED)
+		dev->power.suspended_jiffies += delta;
+	else
+		dev->power.active_jiffies += delta;
+}
+
+static void __update_runtime_status(struct device *dev, enum rpm_status status)
+{
+	update_pm_runtime_accounting(dev);
+	dev->power.runtime_status = status;
+}
 
 /**
  * pm_runtime_deactivate_timer - Deactivate given device's suspend timer.
@@ -40,62 +78,154 @@ static void pm_runtime_cancel_pending(struct device *dev)
 	dev->power.request = RPM_REQ_NONE;
 }
 
-/**
- * __pm_runtime_idle - Notify device bus type if the device can be suspended.
- * @dev: Device to notify the bus type about.
+/*
+ * pm_runtime_autosuspend_expiration - Get a device's autosuspend-delay expiration time.
+ * @dev: Device to handle.
  *
- * This function must be called under dev->power.lock with interrupts disabled.
+ * Compute the autosuspend-delay expiration time based on the device's
+ * power.last_busy time.  If the delay has already expired or is disabled
+ * (negative) or the power.use_autosuspend flag isn't set, return 0.
+ * Otherwise return the expiration time in jiffies (adjusted to be nonzero).
+ *
+ * This function may be called either with or without dev->power.lock held.
+ * Either way it can be racy, since power.last_busy may be updated at any time.
  */
-static int __pm_runtime_idle(struct device *dev)
-	__releases(&dev->power.lock) __acquires(&dev->power.lock)
+unsigned long pm_runtime_autosuspend_expiration(struct device *dev)
+{
+	int autosuspend_delay;
+	long elapsed;
+	unsigned long last_busy;
+	unsigned long expires = 0;
+
+	if (!dev->power.use_autosuspend)
+		goto out;
+
+	autosuspend_delay = ACCESS_ONCE(dev->power.autosuspend_delay);
+	if (autosuspend_delay < 0)
+		goto out;
+
+	last_busy = ACCESS_ONCE(dev->power.last_busy);
+	elapsed = jiffies - last_busy;
+	if (elapsed < 0)
+		goto out;	/* jiffies has wrapped around. */
+
+	/*
+	 * If the autosuspend_delay is >= 1 second, align the timer by rounding
+	 * up to the nearest second.
+	 */
+	expires = last_busy + msecs_to_jiffies(autosuspend_delay);
+	if (autosuspend_delay >= 1000)
+		expires = round_jiffies(expires);
+	expires += !expires;
+	if (elapsed >= expires - last_busy)
+		expires = 0;	/* Already expired. */
+
+ out:
+	return expires;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration);
+
+/**
+ * rpm_check_suspend_allowed - Test whether a device may be suspended.
+ * @dev: Device to test.
+ */
+static int rpm_check_suspend_allowed(struct device *dev)
 {
 	int retval = 0;
 
 	if (dev->power.runtime_error)
 		retval = -EINVAL;
-	else if (dev->power.idle_notification)
-		retval = -EINPROGRESS;
 	else if (atomic_read(&dev->power.usage_count) > 0
-	    || dev->power.disable_depth > 0
-	    || dev->power.runtime_status != RPM_ACTIVE)
+	    || dev->power.disable_depth > 0)
 		retval = -EAGAIN;
 	else if (!pm_children_suspended(dev))
 		retval = -EBUSY;
+
+	/* Pending resume requests take precedence over suspends. */
+	else if ((dev->power.deferred_resume
+			&& dev->power.status == RPM_SUSPENDING)
+	    || (dev->power.request_pending
+			&& dev->power.request == RPM_REQ_RESUME))
+		retval = -EAGAIN;
+	else if (dev->power.runtime_status == RPM_SUSPENDED)
+		retval = 1;
+
+	return retval;
+}
+
+/**
+ * rpm_idle - Notify device bus type if the device can be suspended.
+ * @dev: Device to notify the bus type about.
+ * @rpmflags: Flag bits.
+ *
+ * Check if the device's run-time PM status allows it to be suspended.  If
+ * another idle notification has been started earlier, return immediately.  If
+ * the RPM_ASYNC flag is set then queue an idle-notification request; otherwise
+ * run the ->runtime_idle() callback directly.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int rpm_idle(struct device *dev, int rpmflags)
+{
+	int (*callback)(struct device *);
+	int retval;
+
+	retval = rpm_check_suspend_allowed(dev);
+	if (retval < 0)
+		;	/* Conditions are wrong. */
+
+	/* Idle notifications are allowed only in the RPM_ACTIVE state. */
+	else if (dev->power.runtime_status != RPM_ACTIVE)
+		retval = -EAGAIN;
+
+	/*
+	 * Any pending request other than an idle notification takes
+	 * precedence over us, except that the timer may be running.
+	 */
+	else if (dev->power.request_pending &&
+	    dev->power.request > RPM_REQ_IDLE)
+		retval = -EAGAIN;
+
+	/* Act as though RPM_NOWAIT is always set. */
+	else if (dev->power.idle_notification)
+		retval = -EINPROGRESS;
 	if (retval)
 		goto out;
 
-	if (dev->power.request_pending) {
-		/*
-		 * If an idle notification request is pending, cancel it.  Any
-		 * other pending request takes precedence over us.
-		 */
-		if (dev->power.request == RPM_REQ_IDLE) {
-			dev->power.request = RPM_REQ_NONE;
-		} else if (dev->power.request != RPM_REQ_NONE) {
-			retval = -EAGAIN;
-			goto out;
+	/* Pending requests need to be canceled. */
+	dev->power.request = RPM_REQ_NONE;
+
+	if (dev->power.no_callbacks) {
+		/* Assume ->runtime_idle() callback would have suspended. */
+		retval = rpm_suspend(dev, rpmflags);
+		goto out;
+	}
+
+	/* Carry out an asynchronous or a synchronous idle notification. */
+	if (rpmflags & RPM_ASYNC) {
+		dev->power.request = RPM_REQ_IDLE;
+		if (!dev->power.request_pending) {
+			dev->power.request_pending = true;
+			queue_work(pm_wq, &dev->power.work);
 		}
+		goto out;
 	}
 
 	dev->power.idle_notification = true;
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) {
-		spin_unlock_irq(&dev->power.lock);
-
-		dev->bus->pm->runtime_idle(dev);
-
-		spin_lock_irq(&dev->power.lock);
-	} else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle) {
-		spin_unlock_irq(&dev->power.lock);
-
-		dev->type->pm->runtime_idle(dev);
+	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle)
+		callback = dev->bus->pm->runtime_idle;
+	else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle)
+		callback = dev->type->pm->runtime_idle;
+	else if (dev->class && dev->class->pm)
+		callback = dev->class->pm->runtime_idle;
+	else
+		callback = NULL;
 
-		spin_lock_irq(&dev->power.lock);
-	} else if (dev->class && dev->class->pm
-	    && dev->class->pm->runtime_idle) {
+	if (callback) {
 		spin_unlock_irq(&dev->power.lock);
 
-		dev->class->pm->runtime_idle(dev);
+		callback(dev);
 
 		spin_lock_irq(&dev->power.lock);
 	}
@@ -108,113 +238,99 @@ static int __pm_runtime_idle(struct device *dev)
 }
 
 /**
- * pm_runtime_idle - Notify device bus type if the device can be suspended.
- * @dev: Device to notify the bus type about.
+ * rpm_callback - Run a given runtime PM callback for a given device.
+ * @cb: Runtime PM callback to run.
+ * @dev: Device to run the callback for.
  */
-int pm_runtime_idle(struct device *dev)
+static int rpm_callback(int (*cb)(struct device *), struct device *dev)
+	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
 	int retval;
 
-	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_idle(dev);
-	spin_unlock_irq(&dev->power.lock);
+	if (!cb)
+		return -ENOSYS;
 
-	return retval;
-}
-EXPORT_SYMBOL_GPL(pm_runtime_idle);
-
-
-/**
- * update_pm_runtime_accounting - Update the time accounting of power states
- * @dev: Device to update the accounting for
- *
- * In order to be able to have time accounting of the various power states
- * (as used by programs such as PowerTOP to show the effectiveness of runtime
- * PM), we need to track the time spent in each state.
- * update_pm_runtime_accounting must be called each time before the
- * runtime_status field is updated, to account the time in the old state
- * correctly.
- */
-void update_pm_runtime_accounting(struct device *dev)
-{
-	unsigned long now = jiffies;
-	int delta;
-
-	delta = now - dev->power.accounting_timestamp;
-
-	if (delta < 0)
-		delta = 0;
+	spin_unlock_irq(&dev->power.lock);
 
-	dev->power.accounting_timestamp = now;
+	retval = cb(dev);
 
-	if (dev->power.disable_depth > 0)
-		return;
-
-	if (dev->power.runtime_status == RPM_SUSPENDED)
-		dev->power.suspended_jiffies += delta;
-	else
-		dev->power.active_jiffies += delta;
-}
+	spin_lock_irq(&dev->power.lock);
+	dev->power.runtime_error = retval;
 
-static void __update_runtime_status(struct device *dev, enum rpm_status status)
-{
-	update_pm_runtime_accounting(dev);
-	dev->power.runtime_status = status;
+	return retval;
 }
 
 /**
- * __pm_runtime_suspend - Carry out run-time suspend of given device.
+ * rpm_suspend - Carry out run-time suspend of given device.
  * @dev: Device to suspend.
- * @from_wq: If set, the function has been called via pm_wq.
+ * @rpmflags: Flag bits.
  *
- * Check if the device can be suspended and run the ->runtime_suspend() callback
- * provided by its bus type.  If another suspend has been started earlier, wait
- * for it to finish.  If an idle notification or suspend request is pending or
- * scheduled, cancel it.
+ * Check if the device's run-time PM status allows it to be suspended.  If
+ * another suspend has been started earlier, either return immediately or wait
+ * for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC flags.  Cancel a
+ * pending idle notification.  If the RPM_ASYNC flag is set then queue a
+ * suspend request; otherwise run the ->runtime_suspend() callback directly.
+ * If a deferred resume was requested while the callback was running then carry
+ * it out; otherwise send an idle notification for the device (if the suspend
+ * failed) or for its parent (if the suspend succeeded).
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
-int __pm_runtime_suspend(struct device *dev, bool from_wq)
+static int rpm_suspend(struct device *dev, int rpmflags)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
+	int (*callback)(struct device *);
 	struct device *parent = NULL;
-	bool notify = false;
-	int retval = 0;
+	int retval;
 
-	dev_dbg(dev, "__pm_runtime_suspend()%s!\n",
-		from_wq ? " from workqueue" : "");
+	dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
 
  repeat:
-	if (dev->power.runtime_error) {
-		retval = -EINVAL;
-		goto out;
-	}
+	retval = rpm_check_suspend_allowed(dev);
 
-	/* Pending resume requests take precedence over us. */
-	if (dev->power.request_pending
-	    && dev->power.request == RPM_REQ_RESUME) {
+	if (retval < 0)
+		;	/* Conditions are wrong. */
+
+	/* Synchronous suspends are not allowed in the RPM_RESUMING state. */
+	else if (dev->power.runtime_status == RPM_RESUMING &&
+	    !(rpmflags & RPM_ASYNC))
 		retval = -EAGAIN;
+	if (retval)
 		goto out;
+
+	/* If the autosuspend_delay time hasn't expired yet, reschedule. */
+	if ((rpmflags & RPM_AUTO)
+	    && dev->power.runtime_status != RPM_SUSPENDING) {
+		unsigned long expires = pm_runtime_autosuspend_expiration(dev);
+
+		if (expires != 0) {
+			/* Pending requests need to be canceled. */
+			dev->power.request = RPM_REQ_NONE;
+
+			/*
+			 * Optimization: If the timer is already running and is
+			 * set to expire at or before the autosuspend delay,
+			 * avoid the overhead of resetting it.  Just let it
+			 * expire; pm_suspend_timer_fn() will take care of the
+			 * rest.
+			 */
+			if (!(dev->power.timer_expires && time_before_eq(
+			    dev->power.timer_expires, expires))) {
+				dev->power.timer_expires = expires;
+				mod_timer(&dev->power.suspend_timer, expires);
+			}
+			dev->power.timer_autosuspends = 1;
+			goto out;
+		}
 	}
 
 	/* Other scheduled or pending requests need to be canceled. */
 	pm_runtime_cancel_pending(dev);
 
-	if (dev->power.runtime_status == RPM_SUSPENDED)
-		retval = 1;
-	else if (dev->power.runtime_status == RPM_RESUMING
-	    || dev->power.disable_depth > 0
-	    || atomic_read(&dev->power.usage_count) > 0)
-		retval = -EAGAIN;
-	else if (!pm_children_suspended(dev))
-		retval = -EBUSY;
-	if (retval)
-		goto out;
-
 	if (dev->power.runtime_status == RPM_SUSPENDING) {
 		DEFINE_WAIT(wait);
 
-		if (from_wq) {
+		if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
 			retval = -EINPROGRESS;
 			goto out;
 		}
@@ -236,46 +352,42 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 		goto repeat;
 	}
 
-	__update_runtime_status(dev, RPM_SUSPENDING);
 	dev->power.deferred_resume = false;
+	if (dev->power.no_callbacks)
+		goto no_callback;	/* Assume success. */
+
+	/* Carry out an asynchronous or a synchronous suspend. */
+	if (rpmflags & RPM_ASYNC) {
+		dev->power.request = (rpmflags & RPM_AUTO) ?
+		    RPM_REQ_AUTOSUSPEND : RPM_REQ_SUSPEND;
+		if (!dev->power.request_pending) {
+			dev->power.request_pending = true;
+			queue_work(pm_wq, &dev->power.work);
+		}
+		goto out;
+	}
 
-	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) {
-		spin_unlock_irq(&dev->power.lock);
-
-		retval = dev->bus->pm->runtime_suspend(dev);
-
-		spin_lock_irq(&dev->power.lock);
-		dev->power.runtime_error = retval;
-	} else if (dev->type && dev->type->pm
-	    && dev->type->pm->runtime_suspend) {
-		spin_unlock_irq(&dev->power.lock);
-
-		retval = dev->type->pm->runtime_suspend(dev);
-
-		spin_lock_irq(&dev->power.lock);
-		dev->power.runtime_error = retval;
-	} else if (dev->class && dev->class->pm
-	    && dev->class->pm->runtime_suspend) {
-		spin_unlock_irq(&dev->power.lock);
-
-		retval = dev->class->pm->runtime_suspend(dev);
+	__update_runtime_status(dev, RPM_SUSPENDING);
 
-		spin_lock_irq(&dev->power.lock);
-		dev->power.runtime_error = retval;
-	} else {
-		retval = -ENOSYS;
-	}
+	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
+		callback = dev->bus->pm->runtime_suspend;
+	else if (dev->type && dev->type->pm && dev->type->pm->runtime_suspend)
+		callback = dev->type->pm->runtime_suspend;
+	else if (dev->class && dev->class->pm)
+		callback = dev->class->pm->runtime_suspend;
+	else
+		callback = NULL;
 
+	retval = rpm_callback(callback, dev);
 	if (retval) {
 		__update_runtime_status(dev, RPM_ACTIVE);
-		if (retval == -EAGAIN || retval == -EBUSY) {
-			if (dev->power.timer_expires == 0)
-				notify = true;
+		dev->power.deferred_resume = 0;
+		if (retval == -EAGAIN || retval == -EBUSY)
 			dev->power.runtime_error = 0;
-		} else {
+		else
 			pm_runtime_cancel_pending(dev);
-		}
 	} else {
+ no_callback:
 		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_deactivate_timer(dev);
 
@@ -287,14 +399,11 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 	wake_up_all(&dev->power.wait_queue);
 
 	if (dev->power.deferred_resume) {
-		__pm_runtime_resume(dev, false);
+		rpm_resume(dev, 0);
 		retval = -EAGAIN;
 		goto out;
 	}
 
-	if (notify)
-		__pm_runtime_idle(dev);
-
 	if (parent && !parent->power.ignore_children) {
 		spin_unlock_irq(&dev->power.lock);
 
@@ -304,72 +413,69 @@ int __pm_runtime_suspend(struct device *dev, bool from_wq)
 	}
 
  out:
-	dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval);
-
-	return retval;
-}
-
-/**
- * pm_runtime_suspend - Carry out run-time suspend of given device.
- * @dev: Device to suspend.
- */
-int pm_runtime_suspend(struct device *dev)
-{
-	int retval;
-
-	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_suspend(dev, false);
-	spin_unlock_irq(&dev->power.lock);
+	dev_dbg(dev, "%s returns %d\n", __func__, retval);
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(pm_runtime_suspend);
 
 /**
- * __pm_runtime_resume - Carry out run-time resume of given device.
+ * rpm_resume - Carry out run-time resume of given device.
  * @dev: Device to resume.
- * @from_wq: If set, the function has been called via pm_wq.
+ * @rpmflags: Flag bits.
  *
- * Check if the device can be woken up and run the ->runtime_resume() callback
- * provided by its bus type.  If another resume has been started earlier, wait
- * for it to finish.  If there's a suspend running in parallel with this
- * function, wait for it to finish and resume the device.  Cancel any scheduled
- * or pending requests.
+ * Check if the device's run-time PM status allows it to be resumed.  Cancel
+ * any scheduled or pending requests.  If another resume has been started
+ * earlier, either return imediately or wait for it to finish, depending on the
+ * RPM_NOWAIT and RPM_ASYNC flags.  Similarly, if there's a suspend running in
+ * parallel with this function, either tell the other process to resume after
+ * suspending (deferred_resume) or wait for it to finish.  If the RPM_ASYNC
+ * flag is set then queue a resume request; otherwise run the
+ * ->runtime_resume() callback directly.  Queue an idle notification for the
+ * device if the resume succeeded.
  *
  * This function must be called under dev->power.lock with interrupts disabled.
  */
-int __pm_runtime_resume(struct device *dev, bool from_wq)
+static int rpm_resume(struct device *dev, int rpmflags)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
+	int (*callback)(struct device *);
 	struct device *parent = NULL;
 	int retval = 0;
 
-	dev_dbg(dev, "__pm_runtime_resume()%s!\n",
-		from_wq ? " from workqueue" : "");
+	dev_dbg(dev, "%s flags 0x%x\n", __func__, rpmflags);
 
  repeat:
-	if (dev->power.runtime_error) {
+	if (dev->power.runtime_error)
 		retval = -EINVAL;
+	else if (dev->power.disable_depth > 0)
+		retval = -EAGAIN;
+	if (retval)
 		goto out;
-	}
 
-	pm_runtime_cancel_pending(dev);
+	/*
+	 * Other scheduled or pending requests need to be canceled.  Small
+	 * optimization: If an autosuspend timer is running, leave it running
+	 * rather than cancelling it now only to restart it again in the near
+	 * future.
+	 */
+	dev->power.request = RPM_REQ_NONE;
+	if (!dev->power.timer_autosuspends)
+		pm_runtime_deactivate_timer(dev);
 
-	if (dev->power.runtime_status == RPM_ACTIVE)
+	if (dev->power.runtime_status == RPM_ACTIVE) {
 		retval = 1;
-	else if (dev->power.disable_depth > 0)
-		retval = -EAGAIN;
-	if (retval)
 		goto out;
+	}
 
 	if (dev->power.runtime_status == RPM_RESUMING
 	    || dev->power.runtime_status == RPM_SUSPENDING) {
 		DEFINE_WAIT(wait);
 
-		if (from_wq) {
+		if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) {
 			if (dev->power.runtime_status == RPM_SUSPENDING)
 				dev->power.deferred_resume = true;
-			retval = -EINPROGRESS;
+			else
+				retval = -EINPROGRESS;
 			goto out;
 		}
 
@@ -391,6 +497,34 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		goto repeat;
 	}
 
+	/*
+	 * See if we can skip waking up the parent.  This is safe only if
+	 * power.no_callbacks is set, because otherwise we don't know whether
+	 * the resume will actually succeed.
+	 */
+	if (dev->power.no_callbacks && !parent && dev->parent) {
+		spin_lock(&dev->parent->power.lock);
+		if (dev->parent->power.disable_depth > 0
+		    || dev->parent->power.ignore_children
+		    || dev->parent->power.runtime_status == RPM_ACTIVE) {
+			atomic_inc(&dev->parent->power.child_count);
+			spin_unlock(&dev->parent->power.lock);
+			goto no_callback;	/* Assume success. */
+		}
+		spin_unlock(&dev->parent->power.lock);
+	}
+
+	/* Carry out an asynchronous or a synchronous resume. */
+	if (rpmflags & RPM_ASYNC) {
+		dev->power.request = RPM_REQ_RESUME;
+		if (!dev->power.request_pending) {
+			dev->power.request_pending = true;
+			queue_work(pm_wq, &dev->power.work);
+		}
+		retval = 0;
+		goto out;
+	}
+
 	if (!parent && dev->parent) {
 		/*
 		 * Increment the parent's resume counter and resume it if
@@ -408,7 +542,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		 */
 		if (!parent->power.disable_depth
 		    && !parent->power.ignore_children) {
-			__pm_runtime_resume(parent, false);
+			rpm_resume(parent, 0);
 			if (parent->power.runtime_status != RPM_ACTIVE)
 				retval = -EBUSY;
 		}
@@ -420,39 +554,26 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		goto repeat;
 	}
 
-	__update_runtime_status(dev, RPM_RESUMING);
-
-	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) {
-		spin_unlock_irq(&dev->power.lock);
-
-		retval = dev->bus->pm->runtime_resume(dev);
-
-		spin_lock_irq(&dev->power.lock);
-		dev->power.runtime_error = retval;
-	} else if (dev->type && dev->type->pm
-	    && dev->type->pm->runtime_resume) {
-		spin_unlock_irq(&dev->power.lock);
-
-		retval = dev->type->pm->runtime_resume(dev);
+	if (dev->power.no_callbacks)
+		goto no_callback;	/* Assume success. */
 
-		spin_lock_irq(&dev->power.lock);
-		dev->power.runtime_error = retval;
-	} else if (dev->class && dev->class->pm
-	    && dev->class->pm->runtime_resume) {
-		spin_unlock_irq(&dev->power.lock);
-
-		retval = dev->class->pm->runtime_resume(dev);
+	__update_runtime_status(dev, RPM_RESUMING);
 
-		spin_lock_irq(&dev->power.lock);
-		dev->power.runtime_error = retval;
-	} else {
-		retval = -ENOSYS;
-	}
+	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
+		callback = dev->bus->pm->runtime_resume;
+	else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume)
+		callback = dev->type->pm->runtime_resume;
+	else if (dev->class && dev->class->pm)
+		callback = dev->class->pm->runtime_resume;
+	else
+		callback = NULL;
 
+	retval = rpm_callback(callback, dev);
 	if (retval) {
 		__update_runtime_status(dev, RPM_SUSPENDED);
 		pm_runtime_cancel_pending(dev);
 	} else {
+ no_callback:
 		__update_runtime_status(dev, RPM_ACTIVE);
 		if (parent)
 			atomic_inc(&parent->power.child_count);
@@ -460,7 +581,7 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 	wake_up_all(&dev->power.wait_queue);
 
 	if (!retval)
-		__pm_request_idle(dev);
+		rpm_idle(dev, RPM_ASYNC);
 
  out:
 	if (parent) {
@@ -471,28 +592,12 @@ int __pm_runtime_resume(struct device *dev, bool from_wq)
 		spin_lock_irq(&dev->power.lock);
 	}
 
-	dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval);
+	dev_dbg(dev, "%s returns %d\n", __func__, retval);
 
 	return retval;
 }
 
 /**
- * pm_runtime_resume - Carry out run-time resume of given device.
- * @dev: Device to suspend.
- */
-int pm_runtime_resume(struct device *dev)
-{
-	int retval;
-
-	spin_lock_irq(&dev->power.lock);
-	retval = __pm_runtime_resume(dev, false);
-	spin_unlock_irq(&dev->power.lock);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(pm_runtime_resume);
-
-/**
  * pm_runtime_work - Universal run-time PM work function.
  * @work: Work structure used for scheduling the execution of this function.
  *
@@ -517,13 +622,16 @@ static void pm_runtime_work(struct work_struct *work)
 	case RPM_REQ_NONE:
 		break;
 	case RPM_REQ_IDLE:
-		__pm_runtime_idle(dev);
+		rpm_idle(dev, RPM_NOWAIT);
 		break;
 	case RPM_REQ_SUSPEND:
-		__pm_runtime_suspend(dev, true);
+		rpm_suspend(dev, RPM_NOWAIT);
+		break;
+	case RPM_REQ_AUTOSUSPEND:
+		rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO);
 		break;
 	case RPM_REQ_RESUME:
-		__pm_runtime_resume(dev, true);
+		rpm_resume(dev, RPM_NOWAIT);
 		break;
 	}
 
@@ -532,117 +640,10 @@ static void pm_runtime_work(struct work_struct *work)
 }
 
 /**
- * __pm_request_idle - Submit an idle notification request for given device.
- * @dev: Device to handle.
- *
- * Check if the device's run-time PM status is correct for suspending the device
- * and queue up a request to run __pm_runtime_idle() for it.
- *
- * This function must be called under dev->power.lock with interrupts disabled.
- */
-static int __pm_request_idle(struct device *dev)
-{
-	int retval = 0;
-
-	if (dev->power.runtime_error)
-		retval = -EINVAL;
-	else if (atomic_read(&dev->power.usage_count) > 0
-	    || dev->power.disable_depth > 0
-	    || dev->power.runtime_status == RPM_SUSPENDED
-	    || dev->power.runtime_status == RPM_SUSPENDING)
-		retval = -EAGAIN;
-	else if (!pm_children_suspended(dev))
-		retval = -EBUSY;
-	if (retval)
-		return retval;
-
-	if (dev->power.request_pending) {
-		/* Any requests other then RPM_REQ_IDLE take precedence. */
-		if (dev->power.request == RPM_REQ_NONE)
-			dev->power.request = RPM_REQ_IDLE;
-		else if (dev->power.request != RPM_REQ_IDLE)
-			retval = -EAGAIN;
-		return retval;
-	}
-
-	dev->power.request = RPM_REQ_IDLE;
-	dev->power.request_pending = true;
-	queue_work(pm_wq, &dev->power.work);
-
-	return retval;
-}
-
-/**
- * pm_request_idle - Submit an idle notification request for given device.
- * @dev: Device to handle.
- */
-int pm_request_idle(struct device *dev)
-{
-	unsigned long flags;
-	int retval;
-
-	spin_lock_irqsave(&dev->power.lock, flags);
-	retval = __pm_request_idle(dev);
-	spin_unlock_irqrestore(&dev->power.lock, flags);
-
-	return retval;
-}
-EXPORT_SYMBOL_GPL(pm_request_idle);
-
-/**
- * __pm_request_suspend - Submit a suspend request for given device.
- * @dev: Device to suspend.
- *
- * This function must be called under dev->power.lock with interrupts disabled.
- */
-static int __pm_request_suspend(struct device *dev)
-{
-	int retval = 0;
-
-	if (dev->power.runtime_error)
-		return -EINVAL;
-
-	if (dev->power.runtime_status == RPM_SUSPENDED)
-		retval = 1;
-	else if (atomic_read(&dev->power.usage_count) > 0
-	    || dev->power.disable_depth > 0)
-		retval = -EAGAIN;
-	else if (dev->power.runtime_status == RPM_SUSPENDING)
-		retval = -EINPROGRESS;
-	else if (!pm_children_suspended(dev))
-		retval = -EBUSY;
-	if (retval < 0)
-		return retval;
-
-	pm_runtime_deactivate_timer(dev);
-
-	if (dev->power.request_pending) {
-		/*
-		 * Pending resume requests take precedence over us, but we can
-		 * overtake any other pending request.
-		 */
-		if (dev->power.request == RPM_REQ_RESUME)
-			retval = -EAGAIN;
-		else if (dev->power.request != RPM_REQ_SUSPEND)
-			dev->power.request = retval ?
-						RPM_REQ_NONE : RPM_REQ_SUSPEND;
-		return retval;
-	} else if (retval) {
-		return retval;
-	}
-
-	dev->power.request = RPM_REQ_SUSPEND;
-	dev->power.request_pending = true;
-	queue_work(pm_wq, &dev->power.work);
-
-	return 0;
-}
-
-/**
  * pm_suspend_timer_fn - Timer function for pm_schedule_suspend().
  * @data: Device pointer passed by pm_schedule_suspend().
  *
- * Check if the time is right and execute __pm_request_suspend() in that case.
+ * Check if the time is right and queue a suspend request.
  */
 static void pm_suspend_timer_fn(unsigned long data)
 {
@@ -656,7 +657,8 @@ static void pm_suspend_timer_fn(unsigned long data)
 	/* If 'expire' is after 'jiffies' we've been called too early. */
 	if (expires > 0 && !time_after(expires, jiffies)) {
 		dev->power.timer_expires = 0;
-		__pm_request_suspend(dev);
+		rpm_suspend(dev, dev->power.timer_autosuspends ?
+		    (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC);
 	}
 
 	spin_unlock_irqrestore(&dev->power.lock, flags);
@@ -670,47 +672,25 @@ static void pm_suspend_timer_fn(unsigned long data)
 int pm_schedule_suspend(struct device *dev, unsigned int delay)
 {
 	unsigned long flags;
-	int retval = 0;
+	int retval;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
 
-	if (dev->power.runtime_error) {
-		retval = -EINVAL;
-		goto out;
-	}
-
 	if (!delay) {
-		retval = __pm_request_suspend(dev);
+		retval = rpm_suspend(dev, RPM_ASYNC);
 		goto out;
 	}
 
-	pm_runtime_deactivate_timer(dev);
-
-	if (dev->power.request_pending) {
-		/*
-		 * Pending resume requests take precedence over us, but any
-		 * other pending requests have to be canceled.
-		 */
-		if (dev->power.request == RPM_REQ_RESUME) {
-			retval = -EAGAIN;
-			goto out;
-		}
-		dev->power.request = RPM_REQ_NONE;
-	}
-
-	if (dev->power.runtime_status == RPM_SUSPENDED)
-		retval = 1;
-	else if (atomic_read(&dev->power.usage_count) > 0
-	    || dev->power.disable_depth > 0)
-		retval = -EAGAIN;
-	else if (!pm_children_suspended(dev))
-		retval = -EBUSY;
+	retval = rpm_check_suspend_allowed(dev);
 	if (retval)
 		goto out;
 
+	/* Other scheduled or pending requests need to be canceled. */
+	pm_runtime_cancel_pending(dev);
+
 	dev->power.timer_expires = jiffies + msecs_to_jiffies(delay);
-	if (!dev->power.timer_expires)
-		dev->power.timer_expires = 1;
+	dev->power.timer_expires += !dev->power.timer_expires;
+	dev->power.timer_autosuspends = 0;
 	mod_timer(&dev->power.suspend_timer, dev->power.timer_expires);
 
  out:
@@ -721,103 +701,88 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 EXPORT_SYMBOL_GPL(pm_schedule_suspend);
 
 /**
- * pm_request_resume - Submit a resume request for given device.
- * @dev: Device to resume.
+ * __pm_runtime_idle - Entry point for run-time idle operations.
+ * @dev: Device to send idle notification for.
+ * @rpmflags: Flag bits.
  *
- * This function must be called under dev->power.lock with interrupts disabled.
+ * If the RPM_GET_PUT flag is set, decrement the device's usage count and
+ * return immediately if it is larger than zero.  Then carry out an idle
+ * notification, either synchronous or asynchronous.
+ *
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set.
  */
-static int __pm_request_resume(struct device *dev)
+int __pm_runtime_idle(struct device *dev, int rpmflags)
 {
-	int retval = 0;
-
-	if (dev->power.runtime_error)
-		return -EINVAL;
-
-	if (dev->power.runtime_status == RPM_ACTIVE)
-		retval = 1;
-	else if (dev->power.runtime_status == RPM_RESUMING)
-		retval = -EINPROGRESS;
-	else if (dev->power.disable_depth > 0)
-		retval = -EAGAIN;
-	if (retval < 0)
-		return retval;
-
-	pm_runtime_deactivate_timer(dev);
+	unsigned long flags;
+	int retval;
 
-	if (dev->power.runtime_status == RPM_SUSPENDING) {
-		dev->power.deferred_resume = true;
-		return retval;
+	if (rpmflags & RPM_GET_PUT) {
+		if (!atomic_dec_and_test(&dev->power.usage_count))
+			return 0;
 	}
-	if (dev->power.request_pending) {
-		/* If non-resume request is pending, we can overtake it. */
-		dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME;
-		return retval;
-	}
-	if (retval)
-		return retval;
 
-	dev->power.request = RPM_REQ_RESUME;
-	dev->power.request_pending = true;
-	queue_work(pm_wq, &dev->power.work);
+	spin_lock_irqsave(&dev->power.lock, flags);
+	retval = rpm_idle(dev, rpmflags);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return retval;
 }
+EXPORT_SYMBOL_GPL(__pm_runtime_idle);
 
 /**
- * pm_request_resume - Submit a resume request for given device.
- * @dev: Device to resume.
+ * __pm_runtime_suspend - Entry point for run-time put/suspend operations.
+ * @dev: Device to suspend.
+ * @rpmflags: Flag bits.
+ *
+ * If the RPM_GET_PUT flag is set, decrement the device's usage count and
+ * return immediately if it is larger than zero.  Then carry out a suspend,
+ * either synchronous or asynchronous.
+ *
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set.
  */
-int pm_request_resume(struct device *dev)
+int __pm_runtime_suspend(struct device *dev, int rpmflags)
 {
 	unsigned long flags;
 	int retval;
 
+	if (rpmflags & RPM_GET_PUT) {
+		if (!atomic_dec_and_test(&dev->power.usage_count))
+			return 0;
+	}
+
 	spin_lock_irqsave(&dev->power.lock, flags);
-	retval = __pm_request_resume(dev);
+	retval = rpm_suspend(dev, rpmflags);
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(pm_request_resume);
+EXPORT_SYMBOL_GPL(__pm_runtime_suspend);
 
 /**
- * __pm_runtime_get - Reference count a device and wake it up, if necessary.
- * @dev: Device to handle.
- * @sync: If set and the device is suspended, resume it synchronously.
+ * __pm_runtime_resume - Entry point for run-time resume operations.
+ * @dev: Device to resume.
+ * @rpmflags: Flag bits.
+ *
+ * If the RPM_GET_PUT flag is set, increment the device's usage count.  Then
+ * carry out a resume, either synchronous or asynchronous.
  *
- * Increment the usage count of the device and resume it or submit a resume
- * request for it, depending on the value of @sync.
+ * This routine may be called in atomic context if the RPM_ASYNC flag is set.
  */
-int __pm_runtime_get(struct device *dev, bool sync)
+int __pm_runtime_resume(struct device *dev, int rpmflags)
 {
+	unsigned long flags;
 	int retval;
 
-	atomic_inc(&dev->power.usage_count);
-	retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev);
+	if (rpmflags & RPM_GET_PUT)
+		atomic_inc(&dev->power.usage_count);
 
-	return retval;
-}
-EXPORT_SYMBOL_GPL(__pm_runtime_get);
-
-/**
- * __pm_runtime_put - Decrement the device's usage counter and notify its bus.
- * @dev: Device to handle.
- * @sync: If the device's bus type is to be notified, do that synchronously.
- *
- * Decrement the usage count of the device and if it reaches zero, carry out a
- * synchronous idle notification or submit an idle notification request for it,
- * depending on the value of @sync.
- */
-int __pm_runtime_put(struct device *dev, bool sync)
-{
-	int retval = 0;
-
-	if (atomic_dec_and_test(&dev->power.usage_count))
-		retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev);
+	spin_lock_irqsave(&dev->power.lock, flags);
+	retval = rpm_resume(dev, rpmflags);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(__pm_runtime_put);
+EXPORT_SYMBOL_GPL(__pm_runtime_resume);
 
 /**
  * __pm_runtime_set_status - Set run-time PM status of a device.
@@ -968,7 +933,7 @@ int pm_runtime_barrier(struct device *dev)
 
 	if (dev->power.request_pending
 	    && dev->power.request == RPM_REQ_RESUME) {
-		__pm_runtime_resume(dev, false);
+		rpm_resume(dev, 0);
 		retval = 1;
 	}
 
@@ -1017,7 +982,7 @@ void __pm_runtime_disable(struct device *dev, bool check_resume)
 		 */
 		pm_runtime_get_noresume(dev);
 
-		__pm_runtime_resume(dev, false);
+		rpm_resume(dev, 0);
 
 		pm_runtime_put_noidle(dev);
 	}
@@ -1065,7 +1030,7 @@ void pm_runtime_forbid(struct device *dev)
 
 	dev->power.runtime_auto = false;
 	atomic_inc(&dev->power.usage_count);
-	__pm_runtime_resume(dev, false);
+	rpm_resume(dev, 0);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1086,7 +1051,7 @@ void pm_runtime_allow(struct device *dev)
 
 	dev->power.runtime_auto = true;
 	if (atomic_dec_and_test(&dev->power.usage_count))
-		__pm_runtime_idle(dev);
+		rpm_idle(dev, RPM_AUTO);
 
  out:
 	spin_unlock_irq(&dev->power.lock);
@@ -1094,13 +1059,110 @@ void pm_runtime_allow(struct device *dev)
 EXPORT_SYMBOL_GPL(pm_runtime_allow);
 
 /**
+ * pm_runtime_no_callbacks - Ignore run-time PM callbacks for a device.
+ * @dev: Device to handle.
+ *
+ * Set the power.no_callbacks flag, which tells the PM core that this
+ * device is power-managed through its parent and has no run-time PM
+ * callbacks of its own.  The run-time sysfs attributes will be removed.
+ *
+ */
+void pm_runtime_no_callbacks(struct device *dev)
+{
+	spin_lock_irq(&dev->power.lock);
+	dev->power.no_callbacks = 1;
+	spin_unlock_irq(&dev->power.lock);
+	if (device_is_registered(dev))
+		rpm_sysfs_remove(dev);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks);
+
+/**
+ * update_autosuspend - Handle a change to a device's autosuspend settings.
+ * @dev: Device to handle.
+ * @old_delay: The former autosuspend_delay value.
+ * @old_use: The former use_autosuspend value.
+ *
+ * Prevent runtime suspend if the new delay is negative and use_autosuspend is
+ * set; otherwise allow it.  Send an idle notification if suspends are allowed.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static void update_autosuspend(struct device *dev, int old_delay, int old_use)
+{
+	int delay = dev->power.autosuspend_delay;
+
+	/* Should runtime suspend be prevented now? */
+	if (dev->power.use_autosuspend && delay < 0) {
+
+		/* If it used to be allowed then prevent it. */
+		if (!old_use || old_delay >= 0) {
+			atomic_inc(&dev->power.usage_count);
+			rpm_resume(dev, 0);
+		}
+	}
+
+	/* Runtime suspend should be allowed now. */
+	else {
+
+		/* If it used to be prevented then allow it. */
+		if (old_use && old_delay < 0)
+			atomic_dec(&dev->power.usage_count);
+
+		/* Maybe we can autosuspend now. */
+		rpm_idle(dev, RPM_AUTO);
+	}
+}
+
+/**
+ * pm_runtime_set_autosuspend_delay - Set a device's autosuspend_delay value.
+ * @dev: Device to handle.
+ * @delay: Value of the new delay in milliseconds.
+ *
+ * Set the device's power.autosuspend_delay value.  If it changes to negative
+ * and the power.use_autosuspend flag is set, prevent run-time suspends.  If it
+ * changes the other way, allow run-time suspends.
+ */
+void pm_runtime_set_autosuspend_delay(struct device *dev, int delay)
+{
+	int old_delay, old_use;
+
+	spin_lock_irq(&dev->power.lock);
+	old_delay = dev->power.autosuspend_delay;
+	old_use = dev->power.use_autosuspend;
+	dev->power.autosuspend_delay = delay;
+	update_autosuspend(dev, old_delay, old_use);
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay);
+
+/**
+ * __pm_runtime_use_autosuspend - Set a device's use_autosuspend flag.
+ * @dev: Device to handle.
+ * @use: New value for use_autosuspend.
+ *
+ * Set the device's power.use_autosuspend flag, and allow or prevent run-time
+ * suspends as needed.
+ */
+void __pm_runtime_use_autosuspend(struct device *dev, bool use)
+{
+	int old_delay, old_use;
+
+	spin_lock_irq(&dev->power.lock);
+	old_delay = dev->power.autosuspend_delay;
+	old_use = dev->power.use_autosuspend;
+	dev->power.use_autosuspend = use;
+	update_autosuspend(dev, old_delay, old_use);
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend);
+
+/**
  * pm_runtime_init - Initialize run-time PM fields in given device object.
  * @dev: Device object to initialize.
  */
 void pm_runtime_init(struct device *dev)
 {
-	spin_lock_init(&dev->power.lock);
-
 	dev->power.runtime_status = RPM_SUSPENDED;
 	dev->power.idle_notification = false;
 
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index e56b4388fe61..0b1e46bf3e56 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -75,12 +75,27 @@
  *	attribute is set to "enabled" by bus type code or device drivers and in
  *	that cases it should be safe to leave the default value.
  *
+ *	autosuspend_delay_ms - Report/change a device's autosuspend_delay value
+ *
+ *	Some drivers don't want to carry out a runtime suspend as soon as a
+ *	device becomes idle; they want it always to remain idle for some period
+ *	of time before suspending it.  This period is the autosuspend_delay
+ *	value (expressed in milliseconds) and it can be controlled by the user.
+ *	If the value is negative then the device will never be runtime
+ *	suspended.
+ *
+ *	NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay
+ *	value are used only if the driver calls pm_runtime_use_autosuspend().
+ *
  *	wakeup_count - Report the number of wakeup events related to the device
  */
 
 static const char enabled[] = "enabled";
 static const char disabled[] = "disabled";
 
+const char power_group_name[] = "power";
+EXPORT_SYMBOL_GPL(power_group_name);
+
 #ifdef CONFIG_PM_RUNTIME
 static const char ctrl_auto[] = "auto";
 static const char ctrl_on[] = "on";
@@ -170,6 +185,33 @@ static ssize_t rtpm_status_show(struct device *dev,
 }
 
 static DEVICE_ATTR(runtime_status, 0444, rtpm_status_show, NULL);
+
+static ssize_t autosuspend_delay_ms_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	if (!dev->power.use_autosuspend)
+		return -EIO;
+	return sprintf(buf, "%d\n", dev->power.autosuspend_delay);
+}
+
+static ssize_t autosuspend_delay_ms_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t n)
+{
+	long delay;
+
+	if (!dev->power.use_autosuspend)
+		return -EIO;
+
+	if (strict_strtol(buf, 10, &delay) != 0 || delay != (int) delay)
+		return -EINVAL;
+
+	pm_runtime_set_autosuspend_delay(dev, delay);
+	return n;
+}
+
+static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show,
+		autosuspend_delay_ms_store);
+
 #endif
 
 static ssize_t
@@ -210,11 +252,122 @@ static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store);
 static ssize_t wakeup_count_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%lu\n", dev->power.wakeup_count);
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->event_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
 }
 
 static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL);
-#endif
+
+static ssize_t wakeup_active_count_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->active_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_active_count, 0444, wakeup_active_count_show, NULL);
+
+static ssize_t wakeup_hit_count_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned long count = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		count = dev->power.wakeup->hit_count;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lu\n", count) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_hit_count, 0444, wakeup_hit_count_show, NULL);
+
+static ssize_t wakeup_active_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	unsigned int active = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		active = dev->power.wakeup->active;
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%u\n", active) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_active, 0444, wakeup_active_show, NULL);
+
+static ssize_t wakeup_total_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->total_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_total_time_ms, 0444, wakeup_total_time_show, NULL);
+
+static ssize_t wakeup_max_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->max_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_max_time_ms, 0444, wakeup_max_time_show, NULL);
+
+static ssize_t wakeup_last_time_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	s64 msec = 0;
+	bool enabled = false;
+
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		msec = ktime_to_ms(dev->power.wakeup->last_time);
+		enabled = true;
+	}
+	spin_unlock_irq(&dev->power.lock);
+	return enabled ? sprintf(buf, "%lld\n", msec) : sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR(wakeup_last_time_ms, 0444, wakeup_last_time_show, NULL);
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 #ifdef CONFIG_PM_RUNTIME
@@ -279,19 +432,20 @@ static DEVICE_ATTR(async, 0644, async_show, async_store);
 #endif /* CONFIG_PM_ADVANCED_DEBUG */
 
 static struct attribute * power_attrs[] = {
-#ifdef CONFIG_PM_RUNTIME
-	&dev_attr_control.attr,
-	&dev_attr_runtime_status.attr,
-	&dev_attr_runtime_suspended_time.attr,
-	&dev_attr_runtime_active_time.attr,
-#endif
 	&dev_attr_wakeup.attr,
 #ifdef CONFIG_PM_SLEEP
 	&dev_attr_wakeup_count.attr,
+	&dev_attr_wakeup_active_count.attr,
+	&dev_attr_wakeup_hit_count.attr,
+	&dev_attr_wakeup_active.attr,
+	&dev_attr_wakeup_total_time_ms.attr,
+	&dev_attr_wakeup_max_time_ms.attr,
+	&dev_attr_wakeup_last_time_ms.attr,
 #endif
 #ifdef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_async.attr,
 #ifdef CONFIG_PM_RUNTIME
+	&dev_attr_runtime_status.attr,
 	&dev_attr_runtime_usage.attr,
 	&dev_attr_runtime_active_kids.attr,
 	&dev_attr_runtime_enabled.attr,
@@ -300,10 +454,53 @@ static struct attribute * power_attrs[] = {
 	NULL,
 };
 static struct attribute_group pm_attr_group = {
-	.name	= "power",
+	.name	= power_group_name,
 	.attrs	= power_attrs,
 };
 
+#ifdef CONFIG_PM_RUNTIME
+
+static struct attribute *runtime_attrs[] = {
+#ifndef CONFIG_PM_ADVANCED_DEBUG
+	&dev_attr_runtime_status.attr,
+#endif
+	&dev_attr_control.attr,
+	&dev_attr_runtime_suspended_time.attr,
+	&dev_attr_runtime_active_time.attr,
+	&dev_attr_autosuspend_delay_ms.attr,
+	NULL,
+};
+static struct attribute_group pm_runtime_attr_group = {
+	.name	= power_group_name,
+	.attrs	= runtime_attrs,
+};
+
+int dpm_sysfs_add(struct device *dev)
+{
+	int rc;
+
+	rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
+	if (rc == 0 && !dev->power.no_callbacks) {
+		rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
+		if (rc)
+			sysfs_remove_group(&dev->kobj, &pm_attr_group);
+	}
+	return rc;
+}
+
+void rpm_sysfs_remove(struct device *dev)
+{
+	sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
+}
+
+void dpm_sysfs_remove(struct device *dev)
+{
+	rpm_sysfs_remove(dev);
+	sysfs_remove_group(&dev->kobj, &pm_attr_group);
+}
+
+#else /* CONFIG_PM_RUNTIME */
+
 int dpm_sysfs_add(struct device * dev)
 {
 	return sysfs_create_group(&dev->kobj, &pm_attr_group);
@@ -313,3 +510,5 @@ void dpm_sysfs_remove(struct device * dev)
 {
 	sysfs_remove_group(&dev->kobj, &pm_attr_group);
 }
+
+#endif
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 0a1a2c4dbc6e..9f4258df4cfd 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -188,8 +188,10 @@ static int show_file_hash(unsigned int value)
 static int show_dev_hash(unsigned int value)
 {
 	int match = 0;
-	struct list_head *entry = dpm_list.prev;
+	struct list_head *entry;
 
+	device_pm_lock();
+	entry = dpm_list.prev;
 	while (entry != &dpm_list) {
 		struct device * dev = to_device(entry);
 		unsigned int hash = hash_string(DEVSEED, dev_name(dev), DEVHASH);
@@ -199,11 +201,43 @@ static int show_dev_hash(unsigned int value)
 		}
 		entry = entry->prev;
 	}
+	device_pm_unlock();
 	return match;
 }
 
 static unsigned int hash_value_early_read;
 
+int show_trace_dev_match(char *buf, size_t size)
+{
+	unsigned int value = hash_value_early_read / (USERHASH * FILEHASH);
+	int ret = 0;
+	struct list_head *entry;
+
+	/*
+	 * It's possible that multiple devices will match the hash and we can't
+	 * tell which is the culprit, so it's best to output them all.
+	 */
+	device_pm_lock();
+	entry = dpm_list.prev;
+	while (size && entry != &dpm_list) {
+		struct device *dev = to_device(entry);
+		unsigned int hash = hash_string(DEVSEED, dev_name(dev),
+						DEVHASH);
+		if (hash == value) {
+			int len = snprintf(buf, size, "%s\n",
+					    dev_driver_string(dev));
+			if (len > size)
+				len = size;
+			buf += len;
+			ret += len;
+			size -= len;
+		}
+		entry = entry->prev;
+	}
+	device_pm_unlock();
+	return ret;
+}
+
 static int early_resume_init(void)
 {
 	hash_value_early_read = read_magic_time();
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index eb594facfc3f..71c5528e1c35 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -11,7 +11,12 @@
 #include <linux/sched.h>
 #include <linux/capability.h>
 #include <linux/suspend.h>
-#include <linux/pm.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+
+#include "power.h"
+
+#define TIMEOUT		100
 
 /*
  * If set, the suspend/hibernate code will abort transitions to a sleep state
@@ -20,18 +25,244 @@
 bool events_check_enabled;
 
 /* The counter of registered wakeup events. */
-static unsigned long event_count;
+static atomic_t event_count = ATOMIC_INIT(0);
 /* A preserved old value of event_count. */
-static unsigned long saved_event_count;
+static unsigned int saved_count;
 /* The counter of wakeup events being processed. */
-static unsigned long events_in_progress;
+static atomic_t events_in_progress = ATOMIC_INIT(0);
 
 static DEFINE_SPINLOCK(events_lock);
 
 static void pm_wakeup_timer_fn(unsigned long data);
 
-static DEFINE_TIMER(events_timer, pm_wakeup_timer_fn, 0, 0);
-static unsigned long events_timer_expires;
+static LIST_HEAD(wakeup_sources);
+
+/**
+ * wakeup_source_create - Create a struct wakeup_source object.
+ * @name: Name of the new wakeup source.
+ */
+struct wakeup_source *wakeup_source_create(const char *name)
+{
+	struct wakeup_source *ws;
+
+	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+	if (!ws)
+		return NULL;
+
+	spin_lock_init(&ws->lock);
+	if (name)
+		ws->name = kstrdup(name, GFP_KERNEL);
+
+	return ws;
+}
+EXPORT_SYMBOL_GPL(wakeup_source_create);
+
+/**
+ * wakeup_source_destroy - Destroy a struct wakeup_source object.
+ * @ws: Wakeup source to destroy.
+ */
+void wakeup_source_destroy(struct wakeup_source *ws)
+{
+	if (!ws)
+		return;
+
+	spin_lock_irq(&ws->lock);
+	while (ws->active) {
+		spin_unlock_irq(&ws->lock);
+
+		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
+
+		spin_lock_irq(&ws->lock);
+	}
+	spin_unlock_irq(&ws->lock);
+
+	kfree(ws->name);
+	kfree(ws);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_destroy);
+
+/**
+ * wakeup_source_add - Add given object to the list of wakeup sources.
+ * @ws: Wakeup source object to add to the list.
+ */
+void wakeup_source_add(struct wakeup_source *ws)
+{
+	if (WARN_ON(!ws))
+		return;
+
+	setup_timer(&ws->timer, pm_wakeup_timer_fn, (unsigned long)ws);
+	ws->active = false;
+
+	spin_lock_irq(&events_lock);
+	list_add_rcu(&ws->entry, &wakeup_sources);
+	spin_unlock_irq(&events_lock);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(wakeup_source_add);
+
+/**
+ * wakeup_source_remove - Remove given object from the wakeup sources list.
+ * @ws: Wakeup source object to remove from the list.
+ */
+void wakeup_source_remove(struct wakeup_source *ws)
+{
+	if (WARN_ON(!ws))
+		return;
+
+	spin_lock_irq(&events_lock);
+	list_del_rcu(&ws->entry);
+	spin_unlock_irq(&events_lock);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(wakeup_source_remove);
+
+/**
+ * wakeup_source_register - Create wakeup source and add it to the list.
+ * @name: Name of the wakeup source to register.
+ */
+struct wakeup_source *wakeup_source_register(const char *name)
+{
+	struct wakeup_source *ws;
+
+	ws = wakeup_source_create(name);
+	if (ws)
+		wakeup_source_add(ws);
+
+	return ws;
+}
+EXPORT_SYMBOL_GPL(wakeup_source_register);
+
+/**
+ * wakeup_source_unregister - Remove wakeup source from the list and remove it.
+ * @ws: Wakeup source object to unregister.
+ */
+void wakeup_source_unregister(struct wakeup_source *ws)
+{
+	wakeup_source_remove(ws);
+	wakeup_source_destroy(ws);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_unregister);
+
+/**
+ * device_wakeup_attach - Attach a wakeup source object to a device object.
+ * @dev: Device to handle.
+ * @ws: Wakeup source object to attach to @dev.
+ *
+ * This causes @dev to be treated as a wakeup device.
+ */
+static int device_wakeup_attach(struct device *dev, struct wakeup_source *ws)
+{
+	spin_lock_irq(&dev->power.lock);
+	if (dev->power.wakeup) {
+		spin_unlock_irq(&dev->power.lock);
+		return -EEXIST;
+	}
+	dev->power.wakeup = ws;
+	spin_unlock_irq(&dev->power.lock);
+	return 0;
+}
+
+/**
+ * device_wakeup_enable - Enable given device to be a wakeup source.
+ * @dev: Device to handle.
+ *
+ * Create a wakeup source object, register it and attach it to @dev.
+ */
+int device_wakeup_enable(struct device *dev)
+{
+	struct wakeup_source *ws;
+	int ret;
+
+	if (!dev || !dev->power.can_wakeup)
+		return -EINVAL;
+
+	ws = wakeup_source_register(dev_name(dev));
+	if (!ws)
+		return -ENOMEM;
+
+	ret = device_wakeup_attach(dev, ws);
+	if (ret)
+		wakeup_source_unregister(ws);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(device_wakeup_enable);
+
+/**
+ * device_wakeup_detach - Detach a device's wakeup source object from it.
+ * @dev: Device to detach the wakeup source object from.
+ *
+ * After it returns, @dev will not be treated as a wakeup device any more.
+ */
+static struct wakeup_source *device_wakeup_detach(struct device *dev)
+{
+	struct wakeup_source *ws;
+
+	spin_lock_irq(&dev->power.lock);
+	ws = dev->power.wakeup;
+	dev->power.wakeup = NULL;
+	spin_unlock_irq(&dev->power.lock);
+	return ws;
+}
+
+/**
+ * device_wakeup_disable - Do not regard a device as a wakeup source any more.
+ * @dev: Device to handle.
+ *
+ * Detach the @dev's wakeup source object from it, unregister this wakeup source
+ * object and destroy it.
+ */
+int device_wakeup_disable(struct device *dev)
+{
+	struct wakeup_source *ws;
+
+	if (!dev || !dev->power.can_wakeup)
+		return -EINVAL;
+
+	ws = device_wakeup_detach(dev);
+	if (ws)
+		wakeup_source_unregister(ws);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(device_wakeup_disable);
+
+/**
+ * device_init_wakeup - Device wakeup initialization.
+ * @dev: Device to handle.
+ * @enable: Whether or not to enable @dev as a wakeup device.
+ *
+ * By default, most devices should leave wakeup disabled.  The exceptions are
+ * devices that everyone expects to be wakeup sources: keyboards, power buttons,
+ * possibly network interfaces, etc.
+ */
+int device_init_wakeup(struct device *dev, bool enable)
+{
+	int ret = 0;
+
+	if (enable) {
+		device_set_wakeup_capable(dev, true);
+		ret = device_wakeup_enable(dev);
+	} else {
+		device_set_wakeup_capable(dev, false);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(device_init_wakeup);
+
+/**
+ * device_set_wakeup_enable - Enable or disable a device to wake up the system.
+ * @dev: Device to handle.
+ */
+int device_set_wakeup_enable(struct device *dev, bool enable)
+{
+	if (!dev || !dev->power.can_wakeup)
+		return -EINVAL;
+
+	return enable ? device_wakeup_enable(dev) : device_wakeup_disable(dev);
+}
+EXPORT_SYMBOL_GPL(device_set_wakeup_enable);
 
 /*
  * The functions below use the observation that each wakeup event starts a
@@ -55,118 +286,259 @@ static unsigned long events_timer_expires;
  * knowledge, however, may not be available to it, so it can simply specify time
  * to wait before the system can be suspended and pass it as the second
  * argument of pm_wakeup_event().
+ *
+ * It is valid to call pm_relax() after pm_wakeup_event(), in which case the
+ * "no suspend" period will be ended either by the pm_relax(), or by the timer
+ * function executed when the timer expires, whichever comes first.
  */
 
 /**
+ * wakup_source_activate - Mark given wakeup source as active.
+ * @ws: Wakeup source to handle.
+ *
+ * Update the @ws' statistics and, if @ws has just been activated, notify the PM
+ * core of the event by incrementing the counter of of wakeup events being
+ * processed.
+ */
+static void wakeup_source_activate(struct wakeup_source *ws)
+{
+	ws->active = true;
+	ws->active_count++;
+	ws->timer_expires = jiffies;
+	ws->last_time = ktime_get();
+
+	atomic_inc(&events_in_progress);
+}
+
+/**
+ * __pm_stay_awake - Notify the PM core of a wakeup event.
+ * @ws: Wakeup source object associated with the source of the event.
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void __pm_stay_awake(struct wakeup_source *ws)
+{
+	unsigned long flags;
+
+	if (!ws)
+		return;
+
+	spin_lock_irqsave(&ws->lock, flags);
+	ws->event_count++;
+	if (!ws->active)
+		wakeup_source_activate(ws);
+	spin_unlock_irqrestore(&ws->lock, flags);
+}
+EXPORT_SYMBOL_GPL(__pm_stay_awake);
+
+/**
  * pm_stay_awake - Notify the PM core that a wakeup event is being processed.
  * @dev: Device the wakeup event is related to.
  *
- * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the
- * counter of wakeup events being processed.  If @dev is not NULL, the counter
- * of wakeup events related to @dev is incremented too.
+ * Notify the PM core of a wakeup event (signaled by @dev) by calling
+ * __pm_stay_awake for the @dev's wakeup source object.
  *
  * Call this function after detecting of a wakeup event if pm_relax() is going
  * to be called directly after processing the event (and possibly passing it to
  * user space for further processing).
- *
- * It is safe to call this function from interrupt context.
  */
 void pm_stay_awake(struct device *dev)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&events_lock, flags);
-	if (dev)
-		dev->power.wakeup_count++;
+	if (!dev)
+		return;
 
-	events_in_progress++;
-	spin_unlock_irqrestore(&events_lock, flags);
+	spin_lock_irqsave(&dev->power.lock, flags);
+	__pm_stay_awake(dev->power.wakeup);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
+EXPORT_SYMBOL_GPL(pm_stay_awake);
 
 /**
- * pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ * wakup_source_deactivate - Mark given wakeup source as inactive.
+ * @ws: Wakeup source to handle.
  *
- * Notify the PM core that a wakeup event has been processed by decrementing
- * the counter of wakeup events being processed and incrementing the counter
- * of registered wakeup events.
+ * Update the @ws' statistics and notify the PM core that the wakeup source has
+ * become inactive by decrementing the counter of wakeup events being processed
+ * and incrementing the counter of registered wakeup events.
+ */
+static void wakeup_source_deactivate(struct wakeup_source *ws)
+{
+	ktime_t duration;
+	ktime_t now;
+
+	ws->relax_count++;
+	/*
+	 * __pm_relax() may be called directly or from a timer function.
+	 * If it is called directly right after the timer function has been
+	 * started, but before the timer function calls __pm_relax(), it is
+	 * possible that __pm_stay_awake() will be called in the meantime and
+	 * will set ws->active.  Then, ws->active may be cleared immediately
+	 * by the __pm_relax() called from the timer function, but in such a
+	 * case ws->relax_count will be different from ws->active_count.
+	 */
+	if (ws->relax_count != ws->active_count) {
+		ws->relax_count--;
+		return;
+	}
+
+	ws->active = false;
+
+	now = ktime_get();
+	duration = ktime_sub(now, ws->last_time);
+	ws->total_time = ktime_add(ws->total_time, duration);
+	if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
+		ws->max_time = duration;
+
+	del_timer(&ws->timer);
+
+	/*
+	 * event_count has to be incremented before events_in_progress is
+	 * modified, so that the callers of pm_check_wakeup_events() and
+	 * pm_save_wakeup_count() don't see the old value of event_count and
+	 * events_in_progress equal to zero at the same time.
+	 */
+	atomic_inc(&event_count);
+	smp_mb__before_atomic_dec();
+	atomic_dec(&events_in_progress);
+}
+
+/**
+ * __pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ * @ws: Wakeup source object associated with the source of the event.
  *
  * Call this function for wakeup events whose processing started with calling
- * pm_stay_awake().
+ * __pm_stay_awake().
  *
  * It is safe to call it from interrupt context.
  */
-void pm_relax(void)
+void __pm_relax(struct wakeup_source *ws)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&events_lock, flags);
-	if (events_in_progress) {
-		events_in_progress--;
-		event_count++;
-	}
-	spin_unlock_irqrestore(&events_lock, flags);
+	if (!ws)
+		return;
+
+	spin_lock_irqsave(&ws->lock, flags);
+	if (ws->active)
+		wakeup_source_deactivate(ws);
+	spin_unlock_irqrestore(&ws->lock, flags);
+}
+EXPORT_SYMBOL_GPL(__pm_relax);
+
+/**
+ * pm_relax - Notify the PM core that processing of a wakeup event has ended.
+ * @dev: Device that signaled the event.
+ *
+ * Execute __pm_relax() for the @dev's wakeup source object.
+ */
+void pm_relax(struct device *dev)
+{
+	unsigned long flags;
+
+	if (!dev)
+		return;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+	__pm_relax(dev->power.wakeup);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
+EXPORT_SYMBOL_GPL(pm_relax);
 
 /**
  * pm_wakeup_timer_fn - Delayed finalization of a wakeup event.
+ * @data: Address of the wakeup source object associated with the event source.
  *
- * Decrease the counter of wakeup events being processed after it was increased
- * by pm_wakeup_event().
+ * Call __pm_relax() for the wakeup source whose address is stored in @data.
  */
 static void pm_wakeup_timer_fn(unsigned long data)
 {
+	__pm_relax((struct wakeup_source *)data);
+}
+
+/**
+ * __pm_wakeup_event - Notify the PM core of a wakeup event.
+ * @ws: Wakeup source object associated with the event source.
+ * @msec: Anticipated event processing time (in milliseconds).
+ *
+ * Notify the PM core of a wakeup event whose source is @ws that will take
+ * approximately @msec milliseconds to be processed by the kernel.  If @ws is
+ * not active, activate it.  If @msec is nonzero, set up the @ws' timer to
+ * execute pm_wakeup_timer_fn() in future.
+ *
+ * It is safe to call this function from interrupt context.
+ */
+void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
+{
 	unsigned long flags;
+	unsigned long expires;
 
-	spin_lock_irqsave(&events_lock, flags);
-	if (events_timer_expires
-	    && time_before_eq(events_timer_expires, jiffies)) {
-		events_in_progress--;
-		events_timer_expires = 0;
+	if (!ws)
+		return;
+
+	spin_lock_irqsave(&ws->lock, flags);
+
+	ws->event_count++;
+	if (!ws->active)
+		wakeup_source_activate(ws);
+
+	if (!msec) {
+		wakeup_source_deactivate(ws);
+		goto unlock;
 	}
-	spin_unlock_irqrestore(&events_lock, flags);
+
+	expires = jiffies + msecs_to_jiffies(msec);
+	if (!expires)
+		expires = 1;
+
+	if (time_after(expires, ws->timer_expires)) {
+		mod_timer(&ws->timer, expires);
+		ws->timer_expires = expires;
+	}
+
+ unlock:
+	spin_unlock_irqrestore(&ws->lock, flags);
 }
+EXPORT_SYMBOL_GPL(__pm_wakeup_event);
+
 
 /**
  * pm_wakeup_event - Notify the PM core of a wakeup event.
  * @dev: Device the wakeup event is related to.
  * @msec: Anticipated event processing time (in milliseconds).
  *
- * Notify the PM core of a wakeup event (signaled by @dev) that will take
- * approximately @msec milliseconds to be processed by the kernel.  Increment
- * the counter of registered wakeup events and (if @msec is nonzero) set up
- * the wakeup events timer to execute pm_wakeup_timer_fn() in future (if the
- * timer has not been set up already, increment the counter of wakeup events
- * being processed).  If @dev is not NULL, the counter of wakeup events related
- * to @dev is incremented too.
- *
- * It is safe to call this function from interrupt context.
+ * Call __pm_wakeup_event() for the @dev's wakeup source object.
  */
 void pm_wakeup_event(struct device *dev, unsigned int msec)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&events_lock, flags);
-	event_count++;
-	if (dev)
-		dev->power.wakeup_count++;
-
-	if (msec) {
-		unsigned long expires;
+	if (!dev)
+		return;
 
-		expires = jiffies + msecs_to_jiffies(msec);
-		if (!expires)
-			expires = 1;
+	spin_lock_irqsave(&dev->power.lock, flags);
+	__pm_wakeup_event(dev->power.wakeup, msec);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+EXPORT_SYMBOL_GPL(pm_wakeup_event);
 
-		if (!events_timer_expires
-		    || time_after(expires, events_timer_expires)) {
-			if (!events_timer_expires)
-				events_in_progress++;
+/**
+ * pm_wakeup_update_hit_counts - Update hit counts of all active wakeup sources.
+ */
+static void pm_wakeup_update_hit_counts(void)
+{
+	unsigned long flags;
+	struct wakeup_source *ws;
 
-			mod_timer(&events_timer, expires);
-			events_timer_expires = expires;
-		}
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		spin_lock_irqsave(&ws->lock, flags);
+		if (ws->active)
+			ws->hit_count++;
+		spin_unlock_irqrestore(&ws->lock, flags);
 	}
-	spin_unlock_irqrestore(&events_lock, flags);
+	rcu_read_unlock();
 }
 
 /**
@@ -184,10 +556,13 @@ bool pm_check_wakeup_events(void)
 
 	spin_lock_irqsave(&events_lock, flags);
 	if (events_check_enabled) {
-		ret = (event_count == saved_event_count) && !events_in_progress;
+		ret = ((unsigned int)atomic_read(&event_count) == saved_count)
+			&& !atomic_read(&events_in_progress);
 		events_check_enabled = ret;
 	}
 	spin_unlock_irqrestore(&events_lock, flags);
+	if (!ret)
+		pm_wakeup_update_hit_counts();
 	return ret;
 }
 
@@ -202,24 +577,20 @@ bool pm_check_wakeup_events(void)
  * drop down to zero has been interrupted by a signal (and the current number
  * of wakeup events being processed is still nonzero).  Otherwise return true.
  */
-bool pm_get_wakeup_count(unsigned long *count)
+bool pm_get_wakeup_count(unsigned int *count)
 {
 	bool ret;
 
-	spin_lock_irq(&events_lock);
 	if (capable(CAP_SYS_ADMIN))
 		events_check_enabled = false;
 
-	while (events_in_progress && !signal_pending(current)) {
-		spin_unlock_irq(&events_lock);
-
-		schedule_timeout_interruptible(msecs_to_jiffies(100));
-
-		spin_lock_irq(&events_lock);
+	while (atomic_read(&events_in_progress) && !signal_pending(current)) {
+		pm_wakeup_update_hit_counts();
+		schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
 	}
-	*count = event_count;
-	ret = !events_in_progress;
-	spin_unlock_irq(&events_lock);
+
+	ret = !atomic_read(&events_in_progress);
+	*count = atomic_read(&event_count);
 	return ret;
 }
 
@@ -232,16 +603,102 @@ bool pm_get_wakeup_count(unsigned long *count)
  * old number of registered wakeup events to be used by pm_check_wakeup_events()
  * and return true.  Otherwise return false.
  */
-bool pm_save_wakeup_count(unsigned long count)
+bool pm_save_wakeup_count(unsigned int count)
 {
 	bool ret = false;
 
 	spin_lock_irq(&events_lock);
-	if (count == event_count && !events_in_progress) {
-		saved_event_count = count;
+	if (count == (unsigned int)atomic_read(&event_count)
+	    && !atomic_read(&events_in_progress)) {
+		saved_count = count;
 		events_check_enabled = true;
 		ret = true;
 	}
 	spin_unlock_irq(&events_lock);
+	if (!ret)
+		pm_wakeup_update_hit_counts();
+	return ret;
+}
+
+static struct dentry *wakeup_sources_stats_dentry;
+
+/**
+ * print_wakeup_source_stats - Print wakeup source statistics information.
+ * @m: seq_file to print the statistics into.
+ * @ws: Wakeup source object to print the statistics for.
+ */
+static int print_wakeup_source_stats(struct seq_file *m,
+				     struct wakeup_source *ws)
+{
+	unsigned long flags;
+	ktime_t total_time;
+	ktime_t max_time;
+	unsigned long active_count;
+	ktime_t active_time;
+	int ret;
+
+	spin_lock_irqsave(&ws->lock, flags);
+
+	total_time = ws->total_time;
+	max_time = ws->max_time;
+	active_count = ws->active_count;
+	if (ws->active) {
+		active_time = ktime_sub(ktime_get(), ws->last_time);
+		total_time = ktime_add(total_time, active_time);
+		if (active_time.tv64 > max_time.tv64)
+			max_time = active_time;
+	} else {
+		active_time = ktime_set(0, 0);
+	}
+
+	ret = seq_printf(m, "%-12s\t%lu\t\t%lu\t\t%lu\t\t"
+			"%lld\t\t%lld\t\t%lld\t\t%lld\n",
+			ws->name, active_count, ws->event_count, ws->hit_count,
+			ktime_to_ms(active_time), ktime_to_ms(total_time),
+			ktime_to_ms(max_time), ktime_to_ms(ws->last_time));
+
+	spin_unlock_irqrestore(&ws->lock, flags);
+
 	return ret;
 }
+
+/**
+ * wakeup_sources_stats_show - Print wakeup sources statistics information.
+ * @m: seq_file to print the statistics into.
+ */
+static int wakeup_sources_stats_show(struct seq_file *m, void *unused)
+{
+	struct wakeup_source *ws;
+
+	seq_puts(m, "name\t\tactive_count\tevent_count\thit_count\t"
+		"active_since\ttotal_time\tmax_time\tlast_change\n");
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry)
+		print_wakeup_source_stats(m, ws);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static int wakeup_sources_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wakeup_sources_stats_show, NULL);
+}
+
+static const struct file_operations wakeup_sources_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = wakeup_sources_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __init wakeup_sources_debugfs_init(void)
+{
+	wakeup_sources_stats_dentry = debugfs_create_file("wakeup_sources",
+			S_IRUGO, NULL, NULL, &wakeup_sources_stats_fops);
+	return 0;
+}
+
+postcore_initcall(wakeup_sources_debugfs_init);
author	Linus Torvalds <torvalds@linux-foundation.org>	2010-10-21 14:53:17 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-10-21 14:53:17 -0700
commit	a8cbf22559ceefdcdfac00701e8e6da7518b7e8e (patch)
tree	63ebd5779a37f809f7daed77dbf27aa3f1e1110c /drivers
parent	e36f561a2c88394ef2708f1ab300fe8a79e9f651 (diff)
parent	9c034392533f3e9f00656d5c58478cff2560ef81 (diff)
download	linux-a8cbf22559ceefdcdfac00701e8e6da7518b7e8e.tar.gz