summary refs log tree commit diff
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2008-04-17 10:12:27 -0700
committerTony Luck <tony.luck@intel.com>2008-04-17 10:12:27 -0700
commit14d0647c98f52e2d76113d44de6b771ec87a934f (patch)
tree4b9407b00ad0b5baf5ee6245a4157a07062680b5
parent2a467d5f7d6bdc90c365db167a10022dd8351894 (diff)
parentb64f34cdfe5bef9dfed1304c513220b0f2862eca (diff)
downloadlinux-14d0647c98f52e2d76113d44de6b771ec87a934f.tar.gz
Pull virt-cpu-accounting into release branch
-rw-r--r--arch/ia64/Kconfig11
-rw-r--r--arch/ia64/ia32/elfcore32.h14
-rw-r--r--arch/ia64/kernel/asm-offsets.c6
-rw-r--r--arch/ia64/kernel/entry.S65
-rw-r--r--arch/ia64/kernel/fsys.S26
-rw-r--r--arch/ia64/kernel/head.S20
-rw-r--r--arch/ia64/kernel/ivt.S69
-rw-r--r--arch/ia64/kernel/minstate.h14
-rw-r--r--arch/ia64/kernel/time.c78
-rw-r--r--include/asm-ia64/cputime.h104
-rw-r--r--include/asm-ia64/system.h12
-rw-r--r--include/asm-ia64/thread_info.h14
12 files changed, 432 insertions, 1 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8fa3faf5ef1b..eef457fda08f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -283,6 +283,17 @@ config FORCE_MAX_ZONEORDER
 	default "17" if HUGETLB_PAGE
 	default "11"
 
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	default n
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.
+	  If in doubt, say N here.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	help
diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h
index 446c9aac924d..9a3abf58cea3 100644
--- a/arch/ia64/ia32/elfcore32.h
+++ b/arch/ia64/ia32/elfcore32.h
@@ -30,7 +30,19 @@ struct elf_siginfo
 	int	si_errno;			/* errno */
 };
 
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Hacks are here since types between compat_timeval (= pair of s32) and
+ * ia64-native timeval (= pair of s64) are not compatible, at least a file
+ * arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
+ * use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
+ */
+#define cputime_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
+#else
+#define jiffies_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
+#endif
 
 struct elf_prstatus
 {
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 0aebc6f79e95..5865130b0a92 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -39,6 +39,12 @@ void foo(void)
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
 
 	BLANK();
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3c331c464b40..b0be4a280174 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -710,6 +710,16 @@ ENTRY(ia64_leave_syscall)
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
 .work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	adds r2=PT(LOADRS)+16,r12
+(pUStk)	mov.m r22=ar.itc			// fetch time at leave
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
+	;;
+#else
 	adds r2=PT(LOADRS)+16,r12
 	adds r3=PT(AR_BSPSTORE)+16,r12
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -718,6 +728,7 @@ ENTRY(ia64_leave_syscall)
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
 	nop.i 0
 	;;
+#endif
 	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
@@ -737,12 +748,21 @@ ENTRY(ia64_leave_syscall)
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#else
 	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r25=[r3],16		// M0|1 load ar.unat
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
+#endif
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
 (pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
 	nop 0
@@ -759,7 +779,11 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) st1 [r15]=r17				// M2|3
+#else
 (pUStk) st1 [r14]=r17				// M2|3
+#endif
 	ld8.fill r13=[r3],16			// M0|1
 	mov f8=f0				// F    clear f8
 	;;
@@ -775,12 +799,22 @@ ENTRY(ia64_leave_syscall)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	cover				// B    add current frame into dirty partition & set cr.ifs
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	st8 [r14]=r22			// M	save time at leave
+	mov f10=f0			// F    clear f10
+
+	mov r22=r0			// A	clear r22
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+#else
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov f10=f0			// F    clear f10
 
 	nop.m 0
 	movl r14=__kernel_syscall_via_epc // X
 	;;
+#endif
 	mov.m ar.csd=r0			// M2   clear ar.csd
 	mov.m ar.ccv=r0			// M2   clear ar.ccv
 	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
@@ -913,10 +947,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+(pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
+	nop.i 0
+	;;
+#else
 (pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
 	nop.i 0
 	nop.i 0
 	;;
+#endif
 	ld8 r29=[r16],16	// load cr.ipsr
 	ld8 r28=[r17],16	// load cr.iip
 	;;
@@ -938,15 +980,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
+#endif
 	;;
 	ld8.fill r14=[r16],16
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
+	//  mib  :  mov add br        ->  mib  : ld8 add br
+	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
+	//
+	//  no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk)	st8 [r3]=r22		// save time at leave
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	ld8.fill r3=[r16]	// deferred
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
+	mov r16=ar.bsp		// get existing backing store pointer
+#else
 	ld8.fill r3=[r16]
 (pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
@@ -954,6 +1018,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov r16=ar.bsp		// get existing backing store pointer
 	LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)	br.cond.dpnt skip_rbs_switch
+#endif
 
 	/*
 	 * Restore user backing store.
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 6a72db7ddecc..357b7e2adc63 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -656,7 +656,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	nop.m 0
+#endif
 	nop.i 0
 	;;
 	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
@@ -678,6 +682,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index d3a41d5f8d12..ddeab4e36fd5 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
 	br.ret.sptk.many rp
 END(sched_clock)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+GLOBAL_ENTRY(cycle_to_cputime)
+	alloc r16=ar.pfs,1,0,0,0
+	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r32
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 GLOBAL_ENTRY(start_kernel_thread)
 	.prologue
 	.save rp, r0				// this is the end of the call-chain
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 34f44d8be00d..6678c49daba3 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -805,8 +805,13 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	;;
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#else
 	nop.i 0
 	;;
+#endif
 
 	mov.m r25=ar.unat			// M2 (5 cyc)
 	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
@@ -817,7 +822,11 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	mov b6=r30				// I0   setup syscall handler branch reg early
+#endif
 	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
 
 	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
@@ -829,6 +838,30 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
+(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
+	ld8 r21=[r17]				// M  cumulated utime
+	sub r22=r19,r18				// A  stime before leave
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
+	sub r18=r30,r19				// A  elapsed time in user
+	;;
+	add r20=r20,r22				// A  sum stime
+	add r21=r21,r18				// A  sum utime
+	;;
+	st8 [r16]=r20				// M  update stime
+	st8 [r17]=r21				// M  update utime
+	;;
+.skip_accounting:
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	nop 0
 	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
@@ -928,6 +961,7 @@ END(interrupt)
 	 *	- r27: saved ar.rsc
 	 *	- r28: saved cr.iip
 	 *	- r29: saved cr.ipsr
+	 *	- r30: ar.itc for accounting (don't touch)
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
@@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault)
 	DBG_FAULT(16)
 	FAULT(16)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/*
+	 * There is no particular reason for this code to be here, other than
+	 * that there happens to be space here that would go unused otherwise.
+	 * If this fault ever gets "unreserved", simply moved the following
+	 * code to a more suitable spot...
+	 *
+	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
+	 * enabled and if the macro is entered from user mode.
+	 */
+ENTRY(account_sys_enter)
+	// mov.m r20=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
+        ;;
+	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r20,r19				// elapsed time in user mode
+	;;
+	add r23=r23,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r23				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+	br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
+
 	.org ia64_ivt+0x4400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index c9ac8bada786..7c548ac52bbc 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -3,6 +3,18 @@
 
 #include "entry.h"
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -122,11 +134,13 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
+	ACCOUNT_SYS_ENTER									\
 	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
 	;;
 
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 17fda5293c67..48e15a51782f 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(next);
+	cputime_t delta_stime, delta_utime;
+	__u64 now;
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
+	account_system_time(prev, 0, delta_stime);
+	account_system_time_scaled(prev, delta_stime);
+
+	if (pi->ac_utime) {
+		delta_utime = cycle_to_cputime(pi->ac_utime);
+		account_user_time(prev, delta_utime);
+		account_user_time_scaled(prev, delta_utime);
+	}
+
+	pi->ac_stamp = ni->ac_stamp = now;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	unsigned long flags;
+	cputime_t delta_stime;
+	__u64 now;
+
+	local_irq_save(flags);
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	account_system_time(tsk, 0, delta_stime);
+	account_system_time_scaled(tsk, delta_stime);
+	ti->ac_stime = 0;
+
+	ti->ac_stamp = now;
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Called from the timer interrupt handler to charge accumulated user time
+ * to the current process.  Must be called with interrupts disabled.
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	struct thread_info *ti = task_thread_info(p);
+	cputime_t delta_utime;
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(p, delta_utime);
+		account_user_time_scaled(p, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
 {
diff --git a/include/asm-ia64/cputime.h b/include/asm-ia64/cputime.h
index 72400a78002a..f9abdec6577a 100644
--- a/include/asm-ia64/cputime.h
+++ b/include/asm-ia64/cputime.h
@@ -1,6 +1,110 @@
+/*
+ * include/asm-ia64/cputime.h:
+ *		Definitions for measuring cputime on ia64 machines.
+ *
+ * Based on <asm-powerpc/cputime.h>.
+ *
+ * Copyright (C) 2007 FUJITSU LIMITED
+ * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec.
+ * Otherwise we measure cpu time in jiffies using the generic definitions.
+ */
+
 #ifndef __IA64_CPUTIME_H
 #define __IA64_CPUTIME_H
 
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #include <asm-generic/cputime.h>
+#else
+
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <asm/processor.h>
+
+typedef u64 cputime_t;
+typedef u64 cputime64_t;
+
+#define cputime_zero			((cputime_t)0)
+#define cputime_max			((~((cputime_t)0) >> 1) - 1)
+#define cputime_add(__a, __b)		((__a) +  (__b))
+#define cputime_sub(__a, __b)		((__a) -  (__b))
+#define cputime_div(__a, __n)		((__a) /  (__n))
+#define cputime_halve(__a)		((__a) >> 1)
+#define cputime_eq(__a, __b)		((__a) == (__b))
+#define cputime_gt(__a, __b)		((__a) >  (__b))
+#define cputime_ge(__a, __b)		((__a) >= (__b))
+#define cputime_lt(__a, __b)		((__a) <  (__b))
+#define cputime_le(__a, __b)		((__a) <= (__b))
+
+#define cputime64_zero			((cputime64_t)0)
+#define cputime64_add(__a, __b)		((__a) + (__b))
+#define cputime64_sub(__a, __b)		((__a) - (__b))
+#define cputime_to_cputime64(__ct)	(__ct)
+
+/*
+ * Convert cputime <-> jiffies (HZ)
+ */
+#define cputime_to_jiffies(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)	((__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)	((__jif) * (NSEC_PER_SEC / HZ))
+
+/*
+ * Convert cputime <-> milliseconds
+ */
+#define cputime_to_msecs(__ct)		((__ct) / NSEC_PER_MSEC)
+#define msecs_to_cputime(__msecs)	((__msecs) * NSEC_PER_MSEC)
+
+/*
+ * Convert cputime <-> seconds
+ */
+#define cputime_to_secs(__ct)		((__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)		((__secs) * NSEC_PER_SEC)
+
+/*
+ * Convert cputime <-> timespec (nsec)
+ */
+static inline cputime_t timespec_to_cputime(const struct timespec *val)
+{
+	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
+	return (ret + val->tv_nsec);
+}
+static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
+{
+	val->tv_sec  = ct / NSEC_PER_SEC;
+	val->tv_nsec = ct % NSEC_PER_SEC;
+}
+
+/*
+ * Convert cputime <-> timeval (msec)
+ */
+static inline cputime_t timeval_to_cputime(struct timeval *val)
+{
+	cputime_t ret = val->tv_sec * NSEC_PER_SEC;
+	return (ret + val->tv_usec * NSEC_PER_USEC);
+}
+static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
+{
+	val->tv_sec = ct / NSEC_PER_SEC;
+	val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+}
+
+/*
+ * Convert cputime <-> clock (USER_HZ)
+ */
+#define cputime_to_clock_t(__ct)	((__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)		((__x) * (NSEC_PER_SEC / USER_HZ))
+
+/*
+ * Convert cputime64 to clock.
+ */
+#define cputime64_to_clock_t(__ct)      cputime_to_clock_t((cputime_t)__ct)
 
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 595112bca3cc..dff8128fa58e 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -210,6 +210,13 @@ struct task_struct;
 extern void ia64_save_extra (struct task_struct *task);
 extern void ia64_load_extra (struct task_struct *task);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
+# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
+#else
+# define IA64_ACCOUNT_ON_SWITCH(p,n)
+#endif
+
 #ifdef CONFIG_PERFMON
   DECLARE_PER_CPU(unsigned long, pfm_syst_info);
 # define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
@@ -222,6 +229,7 @@ extern void ia64_load_extra (struct task_struct *task);
 	 || IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE())
 
 #define __switch_to(prev,next,last) do {							 \
+	IA64_ACCOUNT_ON_SWITCH(prev, next);							 \
 	if (IA64_HAS_EXTRA_STATE(prev))								 \
 		ia64_save_extra(prev);								 \
 	if (IA64_HAS_EXTRA_STATE(next))								 \
@@ -266,6 +274,10 @@ void cpu_idle_wait(void);
 
 void default_idle(void);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void account_system_vtime(struct task_struct *);
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 93d83cbe0c8c..6da8069a0f77 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -31,6 +31,12 @@ struct thread_info {
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	struct restart_block restart_block;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	__u64 ac_stamp;
+	__u64 ac_leave;
+	__u64 ac_stime;
+	__u64 ac_utime;
+#endif
 };
 
 #define THREAD_SIZE			KERNEL_STACK_SIZE
@@ -62,9 +68,17 @@ struct thread_info {
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#define setup_thread_stack(p, org)			\
+	*task_thread_info(p) = *task_thread_info(org);	\
+	task_thread_info(p)->ac_stime = 0;		\
+	task_thread_info(p)->ac_utime = 0;		\
+	task_thread_info(p)->task = (p);
+#else
 #define setup_thread_stack(p, org) \
 	*task_thread_info(p) = *task_thread_info(org); \
 	task_thread_info(p)->task = (p);
+#endif
 #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET)
 
 #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR