From e172662d113ceb22db727a979bb35b9c02f703b5 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 2 Dec 2010 14:31:13 -0800 Subject: vmstat: fix dirty threshold ordering The nr_dirty_[background_]threshold fields are misplaced before the numa_* fields, and users will read strange values. This is the right order. Before patch, nr_dirty_background_threshold will read as 0 (the value from numa_miss). numa_hit 128501 numa_miss 0 numa_foreign 0 numa_interleave 7388 numa_local 128501 numa_other 0 nr_dirty_threshold 144291 nr_dirty_background_threshold 72145 Signed-off-by: Wu Fengguang Cc: Michael Rubin Reviewed-by: KOSAKI Motohiro Reviewed-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 42eac4d33216..8f62f17ee1c7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -750,8 +750,6 @@ static const char * const vmstat_text[] = { "nr_shmem", "nr_dirtied", "nr_written", - "nr_dirty_threshold", - "nr_dirty_background_threshold", #ifdef CONFIG_NUMA "numa_hit", @@ -761,6 +759,8 @@ static const char * const vmstat_text[] = { "numa_local", "numa_other", #endif + "nr_dirty_threshold", + "nr_dirty_background_threshold", #ifdef CONFIG_VM_EVENT_COUNTERS "pgpgin", -- cgit 1.4.1 From afe2c511fb2d75f1515081ff1be15bd79cfe722d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Dec 2010 16:21:17 +0100 Subject: workqueue: convert cancel_rearming_delayed_work[queue]() users to cancel_delayed_work_sync() cancel_rearming_delayed_work[queue]() has been superceded by cancel_delayed_work_sync() quite some time ago. Convert all the in-kernel users. The conversions are completely equivalent and trivial. Signed-off-by: Tejun Heo Acked-by: "David S. Miller" Acked-by: Greg Kroah-Hartman Acked-by: Evgeniy Polyakov Cc: Jeff Garzik Cc: Benjamin Herrenschmidt Cc: Mauro Carvalho Chehab Cc: netdev@vger.kernel.org Cc: Anton Vorontsov Cc: David Woodhouse Cc: "J. Bruce Fields" Cc: Neil Brown Cc: Alex Elder Cc: xfs-masters@oss.sgi.com Cc: Christoph Lameter Cc: Pekka Enberg Cc: Andrew Morton Cc: netfilter-devel@vger.kernel.org Cc: Trond Myklebust Cc: linux-nfs@vger.kernel.org --- drivers/ata/libata-core.c | 2 +- drivers/ata/libata-sff.c | 2 +- drivers/macintosh/rack-meter.c | 4 ++-- drivers/media/dvb/dvb-usb/dvb-usb-remote.c | 2 +- drivers/media/video/em28xx/em28xx-input.c | 2 +- drivers/net/chelsio/my3126.c | 2 +- drivers/net/ibm_newemac/core.c | 4 ++-- drivers/net/wireless/zd1211rw/zd_mac.c | 3 +-- drivers/power/ds2760_battery.c | 6 ++---- drivers/power/intel_mid_battery.c | 6 ++---- drivers/staging/pohmelfs/inode.c | 4 ++-- drivers/usb/atm/cxacru.c | 2 +- drivers/video/fb_defio.c | 2 +- drivers/video/omap/lcd_mipid.c | 2 +- fs/nfsd/nfs4state.c | 2 +- fs/xfs/xfs_mru_cache.c | 2 +- mm/slab.c | 2 +- mm/vmstat.c | 2 +- net/atm/lec.c | 2 +- net/core/netpoll.c | 2 +- net/netfilter/ipvs/ip_vs_ctl.c | 2 +- net/sunrpc/xprtsock.c | 2 +- 22 files changed, 27 insertions(+), 32 deletions(-) (limited to 'mm/vmstat.c') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 7f77c67d267c..6669b44044fb 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6122,7 +6122,7 @@ static void ata_port_detach(struct ata_port *ap) /* it better be dead now */ WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED)); - cancel_rearming_delayed_work(&ap->hotplug_task); + cancel_delayed_work_sync(&ap->hotplug_task); skip_eh: if (ap->pmp_link) { diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index d05387d1e14b..8660a70f164a 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1320,7 +1320,7 @@ void ata_sff_flush_pio_task(struct ata_port *ap) { DPRINTK("ENTER\n"); - cancel_rearming_delayed_work(&ap->sff_pio_task); + cancel_delayed_work_sync(&ap->sff_pio_task); ap->hsm_task_state = HSM_ST_IDLE; if (ata_msg_ctl(ap)) diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c index 53cce3a5da23..39f660b2a60d 100644 --- a/drivers/macintosh/rack-meter.c +++ b/drivers/macintosh/rack-meter.c @@ -285,8 +285,8 @@ static void __devinit rackmeter_init_cpu_sniffer(struct rackmeter *rm) static void __devexit rackmeter_stop_cpu_sniffer(struct rackmeter *rm) { - cancel_rearming_delayed_work(&rm->cpu[0].sniffer); - cancel_rearming_delayed_work(&rm->cpu[1].sniffer); + cancel_delayed_work_sync(&rm->cpu[0].sniffer); + cancel_delayed_work_sync(&rm->cpu[1].sniffer); } static int __devinit rackmeter_setup(struct rackmeter *rm) diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c index b579fed3ab3f..0831469af69f 100644 --- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c +++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c @@ -298,7 +298,7 @@ int dvb_usb_remote_init(struct dvb_usb_device *d) int dvb_usb_remote_exit(struct dvb_usb_device *d) { if (d->state & DVB_USB_STATE_REMOTE) { - cancel_rearming_delayed_work(&d->rc_query_work); + cancel_delayed_work_sync(&d->rc_query_work); flush_scheduled_work(); if (d->props.rc.mode == DVB_RC_LEGACY) input_unregister_device(d->rc_input_dev); diff --git a/drivers/media/video/em28xx/em28xx-input.c b/drivers/media/video/em28xx/em28xx-input.c index 6759cd5570dd..99403c720e3a 100644 --- a/drivers/media/video/em28xx/em28xx-input.c +++ b/drivers/media/video/em28xx/em28xx-input.c @@ -557,7 +557,7 @@ void em28xx_deregister_snapshot_button(struct em28xx *dev) { if (dev->sbutton_input_dev != NULL) { em28xx_info("Deregistering snapshot button\n"); - cancel_rearming_delayed_work(&dev->sbutton_query_work); + cancel_delayed_work_sync(&dev->sbutton_query_work); input_unregister_device(dev->sbutton_input_dev); dev->sbutton_input_dev = NULL; } diff --git a/drivers/net/chelsio/my3126.c b/drivers/net/chelsio/my3126.c index 4c6028512d10..a683fd3bb624 100644 --- a/drivers/net/chelsio/my3126.c +++ b/drivers/net/chelsio/my3126.c @@ -22,7 +22,7 @@ static int my3126_interrupt_enable(struct cphy *cphy) static int my3126_interrupt_disable(struct cphy *cphy) { - cancel_rearming_delayed_work(&cphy->phy_update); + cancel_delayed_work_sync(&cphy->phy_update); return 0; } diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c index 06bb9b799458..e209efaa01b9 100644 --- a/drivers/net/ibm_newemac/core.c +++ b/drivers/net/ibm_newemac/core.c @@ -1279,7 +1279,7 @@ static void emac_force_link_update(struct emac_instance *dev) netif_carrier_off(dev->ndev); smp_rmb(); if (dev->link_polling) { - cancel_rearming_delayed_work(&dev->link_work); + cancel_delayed_work_sync(&dev->link_work); if (dev->link_polling) schedule_delayed_work(&dev->link_work, PHY_POLL_LINK_OFF); } @@ -1294,7 +1294,7 @@ static int emac_close(struct net_device *ndev) if (dev->phy.address >= 0) { dev->link_polling = 0; - cancel_rearming_delayed_work(&dev->link_work); + cancel_delayed_work_sync(&dev->link_work); } mutex_lock(&dev->link_lock); emac_netif_stop(dev); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 43307bd42a69..6107304cb94c 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -1207,7 +1207,6 @@ static void housekeeping_enable(struct zd_mac *mac) static void housekeeping_disable(struct zd_mac *mac) { dev_dbg_f(zd_mac_dev(mac), "\n"); - cancel_rearming_delayed_workqueue(zd_workqueue, - &mac->housekeeping.link_led_work); + cancel_delayed_work_sync(&mac->housekeeping.link_led_work); zd_chip_control_leds(&mac->chip, ZD_LED_OFF); } diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c index b3c01c16a164..e7f89785beef 100644 --- a/drivers/power/ds2760_battery.c +++ b/drivers/power/ds2760_battery.c @@ -580,10 +580,8 @@ static int ds2760_battery_remove(struct platform_device *pdev) { struct ds2760_device_info *di = platform_get_drvdata(pdev); - cancel_rearming_delayed_workqueue(di->monitor_wqueue, - &di->monitor_work); - cancel_rearming_delayed_workqueue(di->monitor_wqueue, - &di->set_charged_work); + cancel_delayed_work_sync(&di->monitor_work); + cancel_delayed_work_sync(&di->set_charged_work); destroy_workqueue(di->monitor_wqueue); power_supply_unregister(&di->bat); kfree(di); diff --git a/drivers/power/intel_mid_battery.c b/drivers/power/intel_mid_battery.c index 2a10cd361181..36cf402c0677 100644 --- a/drivers/power/intel_mid_battery.c +++ b/drivers/power/intel_mid_battery.c @@ -730,8 +730,7 @@ static __devinit int probe(int irq, struct device *dev) power_reg_failed_1: power_supply_unregister(&pbi->batt); power_reg_failed: - cancel_rearming_delayed_workqueue(pbi->monitor_wqueue, - &pbi->monitor_battery); + cancel_delayed_work_sync(&pbi->monitor_battery); requestirq_failed: destroy_workqueue(pbi->monitor_wqueue); wqueue_failed: @@ -760,8 +759,7 @@ static int __devexit platform_pmic_battery_remove(struct platform_device *pdev) struct pmic_power_module_info *pbi = dev_get_drvdata(&pdev->dev); free_irq(pbi->irq, pbi); - cancel_rearming_delayed_workqueue(pbi->monitor_wqueue, - &pbi->monitor_battery); + cancel_delayed_work_sync(&pbi->monitor_battery); destroy_workqueue(pbi->monitor_wqueue); power_supply_unregister(&pbi->usb); diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index 61685ccceda8..d4a1f204b9d5 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -1318,8 +1318,8 @@ static void pohmelfs_put_super(struct super_block *sb) } psb->trans_scan_timeout = psb->drop_scan_timeout = 0; - cancel_rearming_delayed_work(&psb->dwork); - cancel_rearming_delayed_work(&psb->drop_dwork); + cancel_delayed_work_sync(&psb->dwork); + cancel_delayed_work_sync(&psb->drop_dwork); flush_scheduled_work(); dprintk("%s: stopped workqueues.\n", __func__); diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index f383cb42b1d7..a845f8b8382f 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -1247,7 +1247,7 @@ static void cxacru_unbind(struct usbatm_data *usbatm_instance, mutex_unlock(&instance->poll_state_serialize); if (is_polling) - cancel_rearming_delayed_work(&instance->poll_work); + cancel_delayed_work_sync(&instance->poll_work); usb_kill_urb(instance->snd_urb); usb_kill_urb(instance->rcv_urb); diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 6b93ef93cb12..804000183c5e 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -75,7 +75,7 @@ int fb_deferred_io_fsync(struct file *file, int datasync) return 0; /* Kill off the delayed work */ - cancel_rearming_delayed_work(&info->deferred_work); + cancel_delayed_work_sync(&info->deferred_work); /* Run it immediately */ return schedule_delayed_work(&info->deferred_work, 0); diff --git a/drivers/video/omap/lcd_mipid.c b/drivers/video/omap/lcd_mipid.c index 64dcc7439c99..90e3bdd1b7ab 100644 --- a/drivers/video/omap/lcd_mipid.c +++ b/drivers/video/omap/lcd_mipid.c @@ -396,7 +396,7 @@ static void mipid_esd_start_check(struct mipid_device *md) static void mipid_esd_stop_check(struct mipid_device *md) { if (md->esd_check != NULL) - cancel_rearming_delayed_workqueue(md->esd_wq, &md->esd_work); + cancel_delayed_work_sync(&md->esd_work); } static void mipid_esd_work(struct work_struct *work) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 116cab970e0f..fbd18c3074bb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4336,7 +4336,7 @@ __nfs4_state_shutdown(void) void nfs4_state_shutdown(void) { - cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work); + cancel_delayed_work_sync(&laundromat_work); destroy_workqueue(laundry_wq); locks_end_grace(&nfsd4_manager); nfs4_lock_state(); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 45ce15dc5b2b..edfa178bafb6 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -408,7 +408,7 @@ xfs_mru_cache_flush( spin_lock(&mru->lock); if (mru->queued) { spin_unlock(&mru->lock); - cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); + cancel_delayed_work_sync(&mru->work); spin_lock(&mru->lock); } diff --git a/mm/slab.c b/mm/slab.c index b1e40dafbab3..dc983867682b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1293,7 +1293,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, * anything expensive but will only modify reap_work * and reschedule the timer. */ - cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu)); + cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu)); /* Now the cache_reaper is guaranteed to be not running. */ per_cpu(slab_reap_work, cpu).work.func = NULL; break; diff --git a/mm/vmstat.c b/mm/vmstat.c index 42eac4d33216..d1f3cb63a8a9 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,7 +1033,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - cancel_rearming_delayed_work(&per_cpu(vmstat_work, cpu)); + cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); per_cpu(vmstat_work, cpu).work.func = NULL; break; case CPU_DOWN_FAILED: diff --git a/net/atm/lec.c b/net/atm/lec.c index 181d70c73d70..96a4a4bd2304 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1608,7 +1608,7 @@ static void lec_arp_destroy(struct lec_priv *priv) struct lec_arp_table *entry; int i; - cancel_rearming_delayed_work(&priv->lec_arp_work); + cancel_delayed_work_sync(&priv->lec_arp_work); /* * Remove all entries diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 4e98ffac3af0..d2910947a3ac 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -925,7 +925,7 @@ void __netpoll_cleanup(struct netpoll *np) skb_queue_purge(&npinfo->arp_tx); skb_queue_purge(&npinfo->txq); - cancel_rearming_delayed_work(&npinfo->tx_work); + cancel_delayed_work_sync(&npinfo->tx_work); /* clean after last, unfinished work */ __skb_queue_purge(&npinfo->txq); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5f5daa30b0af..96334e0fd04e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3432,7 +3432,7 @@ void ip_vs_control_cleanup(void) { EnterFunction(2); ip_vs_trash_cleanup(); - cancel_rearming_delayed_work(&defense_work); + cancel_delayed_work_sync(&defense_work); cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); unregister_sysctl_table(sysctl_header); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfcab5ac65af..96549df836ee 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -770,7 +770,7 @@ static void xs_destroy(struct rpc_xprt *xprt) dprintk("RPC: xs_destroy xprt %p\n", xprt); - cancel_rearming_delayed_work(&transport->connect_worker); + cancel_delayed_work_sync(&transport->connect_worker); xs_close(xprt); xs_free_peer_addresses(xprt); -- cgit 1.4.1 From 12938a9220a38d555e38dc9b40021e664b99a1f1 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 6 Dec 2010 11:16:20 -0600 Subject: vmstat: Optimize zone counter modifications through the use of this cpu operations this cpu operations can be used to slightly optimize the function. The changes will avoid some address calculations and replace them with the use of the percpu segment register. If one would have this_cpu_inc_return and this_cpu_dec_return then it would be possible to optimize inc_zone_page_state and dec_zone_page_state even more. V1->V2: - Fix __dec_zone_state overflow handling - Use s8 variables for temporary storage. V2->V3: - Put __percpu annotations in correct places. Reviewed-by: Pekka Enberg Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/vmstat.c | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 8f62f17ee1c7..3ad909d9600f 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -167,18 +167,20 @@ static void refresh_zone_stat_thresholds(void) void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) { - struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); - - s8 *p = pcp->vm_stat_diff + item; + struct per_cpu_pageset __percpu *pcp = zone->pageset; + s8 __percpu *p = pcp->vm_stat_diff + item; long x; + long t; + + x = delta + __this_cpu_read(*p); - x = delta + *p; + t = __this_cpu_read(pcp->stat_threshold); - if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) { + if (unlikely(x > t || x < -t)) { zone_page_state_add(x, zone, item); x = 0; } - *p = x; + __this_cpu_write(*p, x); } EXPORT_SYMBOL(__mod_zone_page_state); @@ -221,16 +223,19 @@ EXPORT_SYMBOL(mod_zone_page_state); */ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); - s8 *p = pcp->vm_stat_diff + item; + struct per_cpu_pageset __percpu *pcp = zone->pageset; + s8 __percpu *p = pcp->vm_stat_diff + item; + s8 v, t; - (*p)++; + __this_cpu_inc(*p); - if (unlikely(*p > pcp->stat_threshold)) { - int overstep = pcp->stat_threshold / 2; + v = __this_cpu_read(*p); + t = __this_cpu_read(pcp->stat_threshold); + if (unlikely(v > t)) { + s8 overstep = t >> 1; - zone_page_state_add(*p + overstep, zone, item); - *p = -overstep; + zone_page_state_add(v + overstep, zone, item); + __this_cpu_write(*p, -overstep); } } @@ -242,16 +247,19 @@ EXPORT_SYMBOL(__inc_zone_page_state); void __dec_zone_state(struct zone *zone, enum zone_stat_item item) { - struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); - s8 *p = pcp->vm_stat_diff + item; + struct per_cpu_pageset __percpu *pcp = zone->pageset; + s8 __percpu *p = pcp->vm_stat_diff + item; + s8 v, t; - (*p)--; + __this_cpu_dec(*p); - if (unlikely(*p < - pcp->stat_threshold)) { - int overstep = pcp->stat_threshold / 2; + v = __this_cpu_read(*p); + t = __this_cpu_read(pcp->stat_threshold); + if (unlikely(v < - t)) { + s8 overstep = t >> 1; - zone_page_state_add(*p - overstep, zone, item); - *p = overstep; + zone_page_state_add(v - overstep, zone, item); + __this_cpu_write(*p, overstep); } } -- cgit 1.4.1 From 908ee0f122bf2a67414854af5b90c6621d186a71 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 6 Dec 2010 11:40:02 -0600 Subject: vmstat: Use this_cpu_inc_return for vm statistics this_cpu_inc_return() saves us a memory access there. Code size does not change. V1->V2: - Fixed the location of the __per_cpu pointer attributes - Sparse checked V2->V3: - Move fixes to __percpu attribute usage to earlier patch Reviewed-by: Pekka Enberg Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/vmstat.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index 3ad909d9600f..f9a7bc89fd10 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -227,9 +227,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; - __this_cpu_inc(*p); - - v = __this_cpu_read(*p); + v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { s8 overstep = t >> 1; @@ -251,9 +249,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; - __this_cpu_dec(*p); - - v = __this_cpu_read(*p); + v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { s8 overstep = t >> 1; -- cgit 1.4.1 From 7c83912062c801738d7d19acaf8f7fec25ea663c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 14 Dec 2010 10:28:46 -0600 Subject: vmstat: User per cpu atomics to avoid interrupt disable / enable Currently the operations to increment vm counters must disable interrupts in order to not mess up their housekeeping of counters. So use this_cpu_cmpxchg() to avoid the overhead. Since we can no longer count on preremption being disabled we still have some minor issues. The fetching of the counter thresholds is racy. A threshold from another cpu may be applied if we happen to be rescheduled on another cpu. However, the following vmstat operation will then bring the counter again under the threshold limit. The operations for __xxx_zone_state are not changed since the caller has taken care of the synchronization needs (and therefore the cycle count is even less than the optimized version for the irq disable case provided here). The optimization using this_cpu_cmpxchg will only be used if the arch supports efficient this_cpu_ops (must have CONFIG_CMPXCHG_LOCAL set!) The use of this_cpu_cmpxchg reduces the cycle count for the counter operations by %80 (inc_zone_page_state goes from 170 cycles to 32). Signed-off-by: Christoph Lameter --- mm/vmstat.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 14 deletions(-) (limited to 'mm/vmstat.c') diff --git a/mm/vmstat.c b/mm/vmstat.c index f9a7bc89fd10..7329eb8a08aa 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -184,20 +184,6 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, } EXPORT_SYMBOL(__mod_zone_page_state); -/* - * For an unknown interrupt state - */ -void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, - int delta) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_zone_page_state(zone, item, delta); - local_irq_restore(flags); -} -EXPORT_SYMBOL(mod_zone_page_state); - /* * Optimized increment and decrement functions. * @@ -265,6 +251,92 @@ void __dec_zone_page_state(struct page *page, enum zone_stat_item item) } EXPORT_SYMBOL(__dec_zone_page_state); +#ifdef CONFIG_CMPXCHG_LOCAL +/* + * If we have cmpxchg_local support then we do not need to incur the overhead + * that comes with local_irq_save/restore if we use this_cpu_cmpxchg. + * + * mod_state() modifies the zone counter state through atomic per cpu + * operations. + * + * Overstep mode specifies how overstep should handled: + * 0 No overstepping + * 1 Overstepping half of threshold + * -1 Overstepping minus half of threshold +*/ +static inline void mod_state(struct zone *zone, + enum zone_stat_item item, int delta, int overstep_mode) +{ + struct per_cpu_pageset __percpu *pcp = zone->pageset; + s8 __percpu *p = pcp->vm_stat_diff + item; + long o, n, t, z; + + do { + z = 0; /* overflow to zone counters */ + + /* + * The fetching of the stat_threshold is racy. We may apply + * a counter threshold to the wrong the cpu if we get + * rescheduled while executing here. However, the following + * will apply the threshold again and therefore bring the + * counter under the threshold. + */ + t = this_cpu_read(pcp->stat_threshold); + + o = this_cpu_read(*p); + n = delta + o; + + if (n > t || n < -t) { + int os = overstep_mode * (t >> 1) ; + + /* Overflow must be added to zone counters */ + z = n + os; + n = -os; + } + } while (this_cpu_cmpxchg(*p, o, n) != o); + + if (z) + zone_page_state_add(z, zone, item); +} + +void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, + int delta) +{ + mod_state(zone, item, delta, 0); +} +EXPORT_SYMBOL(mod_zone_page_state); + +void inc_zone_state(struct zone *zone, enum zone_stat_item item) +{ + mod_state(zone, item, 1, 1); +} + +void inc_zone_page_state(struct page *page, enum zone_stat_item item) +{ + mod_state(page_zone(page), item, 1, 1); +} +EXPORT_SYMBOL(inc_zone_page_state); + +void dec_zone_page_state(struct page *page, enum zone_stat_item item) +{ + mod_state(page_zone(page), item, -1, -1); +} +EXPORT_SYMBOL(dec_zone_page_state); +#else +/* + * Use interrupt disable to serialize counter updates + */ +void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, + int delta) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_zone_page_state(zone, item, delta); + local_irq_restore(flags); +} +EXPORT_SYMBOL(mod_zone_page_state); + void inc_zone_state(struct zone *zone, enum zone_stat_item item) { unsigned long flags; @@ -295,6 +367,7 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item) local_irq_restore(flags); } EXPORT_SYMBOL(dec_zone_page_state); +#endif /* * Update the zone counters for one cpu. -- cgit 1.4.1