From 6c260d586343f7f78239d90aa9e2cfed02f74ff3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Feb 2012 19:46:04 +0000 Subject: x86: vdso: Remove bogus locking in update_vsyscall_tz() Changing the sequence count in update_vsyscall_tz() is completely pointless. The vdso code copies the data unprotected. There is no point to change this as sys_tz is nowhere protected at all. See sys_gettimeofday(). Reviewed-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/kernel/vsyscall_64.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index b07ba9393564..33385c18e5d3 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -80,12 +80,7 @@ early_param("vsyscall", vsyscall_setup); void update_vsyscall_tz(void) { - unsigned long flags; - - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); - /* sys_tz has changed */ vsyscall_gtod_data.sys_tz = sys_tz; - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, -- cgit 1.4.1 From 2ab516575f2f273b19d95140d02c54612201e80a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 Feb 2012 19:46:04 +0000 Subject: x86: vdso: Use seqcount instead of seqlock The update of the vdso data happens under xtime_lock, so adding a nested lock is pointless. Just use a seqcount to sync the readers. Reviewed-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/include/asm/vgtod.h | 2 +- arch/x86/kernel/vsyscall_64.c | 11 +++-------- arch/x86/vdso/vclock_gettime.c | 16 ++++++++-------- 3 files changed, 12 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 815285bcaceb..1f007178c813 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -5,7 +5,7 @@ #include struct vsyscall_gtod_data { - seqlock_t lock; + seqcount_t seq; /* open coded 'struct timespec' */ time_t wall_time_sec; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 33385c18e5d3..cdc95a707cd1 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -52,10 +52,7 @@ #include "vsyscall_trace.h" DEFINE_VVAR(int, vgetcpu_mode); -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = -{ - .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), -}; +DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; @@ -86,9 +83,7 @@ void update_vsyscall_tz(void) void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, struct clocksource *clock, u32 mult) { - unsigned long flags; - - write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + write_seqcount_begin(&vsyscall_gtod_data.seq); /* copy vsyscall data */ vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; @@ -101,7 +96,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, vsyscall_gtod_data.wall_to_monotonic = *wtm; vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); - write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); + write_seqcount_end(&vsyscall_gtod_data.seq); } static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 7eeb1f6188ee..944c5e5d6b6a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -100,12 +100,12 @@ notrace static noinline int do_realtime(struct timespec *ts) int mode; do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; ts->tv_nsec = gtod->wall_time_nsec; ns = vgetns(); - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); timespec_add_ns(ts, ns); return mode; @@ -117,13 +117,13 @@ notrace static noinline int do_monotonic(struct timespec *ts) int mode; do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; secs = gtod->wall_time_sec; ns = gtod->wall_time_nsec + vgetns(); secs += gtod->wall_to_monotonic.tv_sec; ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec * are all guaranteed to be nonnegative. @@ -142,10 +142,10 @@ notrace static noinline int do_realtime_coarse(struct timespec *ts) { unsigned long seq; do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); ts->tv_sec = gtod->wall_time_coarse.tv_sec; ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); return 0; } @@ -153,12 +153,12 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts) { unsigned long seq, ns, secs; do { - seq = read_seqbegin(>od->lock); + seq = read_seqcount_begin(>od->seq); secs = gtod->wall_time_coarse.tv_sec; ns = gtod->wall_time_coarse.tv_nsec; secs += gtod->wall_to_monotonic.tv_sec; ns += gtod->wall_to_monotonic.tv_nsec; - } while (unlikely(read_seqretry(>od->lock, seq))); + } while (unlikely(read_seqcount_retry(>od->seq, seq))); /* wall_time_nsec and wall_to_monotonic.tv_nsec are * guaranteed to be between 0 and NSEC_PER_SEC. -- cgit 1.4.1 From 57779dc2b3b75bee05ef5d1ada47f615f7a13932 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 21 Feb 2012 18:19:55 -0800 Subject: x86, tsc: Skip refined tsc calibration on systems with reliable TSC While running the latest Linux as guest under VMware in highly over-committed situations, we have seen cases when the refined TSC algorithm fails to get a valid tsc_start value in tsc_refine_calibration_work from multiple attempts. As a result the kernel keeps on scheduling the tsc_irqwork task for later. Subsequently after several attempts when it gets a valid start value it goes through the refined calibration and either bails out or uses the new results. Given that the kernel originally read the TSC frequency from the platform, which is the best it can get, I don't think there is much value in refining it. So for systems which get the TSC frequency from the platform we should skip the refined tsc algorithm. We can use the TSC_RELIABLE cpu cap flag to detect this, right now it is set only on VMware and for Moorestown Penwell both of which have there own TSC calibration methods. Signed-off-by: Alok N Kataria Cc: John Stultz Cc: Dirk Brandewie Cc: Alan Cox Cc: stable@kernel.org [jstultz: Reworked to simply not schedule the refining work, rather then scheduling the work and bombing out later] Signed-off-by: John Stultz --- arch/x86/kernel/tsc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a62c201c97ec..6fcfcb3865c2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -932,6 +932,16 @@ static int __init init_tsc_clocksource(void) clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; } + + /* + * Trust the results of the earlier calibration on systems + * exporting a reliable TSC. + */ + if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { + clocksource_register_khz(&clocksource_tsc, tsc_khz); + return 0; + } + schedule_delayed_work(&tsc_irqwork, 0); return 0; } -- cgit 1.4.1 From 91ec87d57fc38c529034e853687dfb7756de5406 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 22 Mar 2012 21:15:51 -0700 Subject: x86-64: Simplify and optimize vdso clock_gettime monotonic variants We used to store the wall-to-monotonic offset and the realtime base. It's faster to precompute the monotonic base. This is about a 3% speedup on Sandy Bridge for CLOCK_MONOTONIC. It's much more impressive for CLOCK_MONOTONIC_COARSE. Signed-off-by: Andy Lutomirski Signed-off-by: John Stultz --- arch/x86/include/asm/vgtod.h | 15 +++++++++------ arch/x86/kernel/vsyscall_64.c | 10 +++++++++- arch/x86/vdso/vclock_gettime.c | 38 ++++++++------------------------------ 3 files changed, 26 insertions(+), 37 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 1f007178c813..8b38be2de9e1 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -7,11 +7,6 @@ struct vsyscall_gtod_data { seqcount_t seq; - /* open coded 'struct timespec' */ - time_t wall_time_sec; - u32 wall_time_nsec; - - struct timezone sys_tz; struct { /* extract of a clocksource struct */ int vclock_mode; cycle_t cycle_last; @@ -19,8 +14,16 @@ struct vsyscall_gtod_data { u32 mult; u32 shift; } clock; - struct timespec wall_to_monotonic; + + /* open coded 'struct timespec' */ + time_t wall_time_sec; + u32 wall_time_nsec; + u32 monotonic_time_nsec; + time_t monotonic_time_sec; + + struct timezone sys_tz; struct timespec wall_time_coarse; + struct timespec monotonic_time_coarse; }; extern struct vsyscall_gtod_data vsyscall_gtod_data; diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index cdc95a707cd1..4285f1f404c2 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -84,6 +84,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, struct clocksource *clock, u32 mult) { write_seqcount_begin(&vsyscall_gtod_data.seq); + struct timespec monotonic; /* copy vsyscall data */ vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; @@ -91,10 +92,17 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, vsyscall_gtod_data.clock.mask = clock->mask; vsyscall_gtod_data.clock.mult = mult; vsyscall_gtod_data.clock.shift = clock->shift; + vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.wall_to_monotonic = *wtm; + + monotonic = timespec_add(*wall_time, *wtm); + vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec; + vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec; + vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); + vsyscall_gtod_data.monotonic_time_coarse = + timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm); write_seqcount_end(&vsyscall_gtod_data.seq); } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 944c5e5d6b6a..6eea70b8f384 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -113,27 +113,17 @@ notrace static noinline int do_realtime(struct timespec *ts) notrace static noinline int do_monotonic(struct timespec *ts) { - unsigned long seq, ns, secs; + unsigned long seq, ns; int mode; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; - secs = gtod->wall_time_sec; - ns = gtod->wall_time_nsec + vgetns(); - secs += gtod->wall_to_monotonic.tv_sec; - ns += gtod->wall_to_monotonic.tv_nsec; + ts->tv_sec = gtod->monotonic_time_sec; + ts->tv_nsec = gtod->monotonic_time_nsec; + ns = vgetns(); } while (unlikely(read_seqcount_retry(>od->seq, seq))); - - /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec - * are all guaranteed to be nonnegative. - */ - while (ns >= NSEC_PER_SEC) { - ns -= NSEC_PER_SEC; - ++secs; - } - ts->tv_sec = secs; - ts->tv_nsec = ns; + timespec_add_ns(ts, ns); return mode; } @@ -151,25 +141,13 @@ notrace static noinline int do_realtime_coarse(struct timespec *ts) notrace static noinline int do_monotonic_coarse(struct timespec *ts) { - unsigned long seq, ns, secs; + unsigned long seq; do { seq = read_seqcount_begin(>od->seq); - secs = gtod->wall_time_coarse.tv_sec; - ns = gtod->wall_time_coarse.tv_nsec; - secs += gtod->wall_to_monotonic.tv_sec; - ns += gtod->wall_to_monotonic.tv_nsec; + ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; + ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; } while (unlikely(read_seqcount_retry(>od->seq, seq))); - /* wall_time_nsec and wall_to_monotonic.tv_nsec are - * guaranteed to be between 0 and NSEC_PER_SEC. - */ - if (ns >= NSEC_PER_SEC) { - ns -= NSEC_PER_SEC; - ++secs; - } - ts->tv_sec = secs; - ts->tv_nsec = ns; - return 0; } -- cgit 1.4.1 From 68fe7b23d559763a2e19e5fc1cf7036e4aaecb10 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 Mar 2012 09:29:22 +0100 Subject: x86: vdso: Put declaration before code Sigh, warnings are there for a reason. Signed-off-by: Thomas Gleixner Cc: Andy Lutomirski Cc: John Stultz --- arch/x86/kernel/vsyscall_64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 4285f1f404c2..d5c69860b524 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -83,9 +83,10 @@ void update_vsyscall_tz(void) void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, struct clocksource *clock, u32 mult) { - write_seqcount_begin(&vsyscall_gtod_data.seq); struct timespec monotonic; + write_seqcount_begin(&vsyscall_gtod_data.seq); + /* copy vsyscall data */ vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; -- cgit 1.4.1