From eccaf52fee8305d5207ff110950a82c100e459bc Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Thu, 2 May 2013 22:07:01 +0800 Subject: x86, efi: initial the local variable of DataSize to zero That will be better initial the value of DataSize to zero for the input of GetVariable(), otherwise we will feed a random value. The debug log of input DataSize like this: ... [ 195.915612] EFI Variables Facility v0.08 2004-May-17 [ 195.915819] efi: size: 18446744071581821342 [ 195.915969] efi: size': 18446744071581821342 [ 195.916324] efi: size: 18446612150714306560 [ 195.916632] efi: size': 18446612150714306560 [ 195.917159] efi: size: 18446612150714306560 [ 195.917453] efi: size': 18446612150714306560 ... The size' is value that was returned by BIOS. After applied this patch: [ 82.442042] EFI Variables Facility v0.08 2004-May-17 [ 82.442202] efi: size: 0 [ 82.442360] efi: size': 1039 [ 82.443828] efi: size: 0 [ 82.444127] efi: size': 2616 [ 82.447057] efi: size: 0 [ 82.447356] efi: size': 5832 ... Found on Acer Aspire V3 BIOS, it will not return the size of data if we input a non-zero DataSize. Cc: Matthew Garrett Cc: H. Peter Anvin Signed-off-by: Lee, Chun-Yi Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 55856b2310d3..82089d8b1954 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -206,7 +206,7 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, } if (boot_used_size && !finished) { - unsigned long size; + unsigned long size = 0; u32 attr; efi_status_t s; void *tmp; -- cgit 1.4.1 From de614e561b9c633073caae8f86399aa8923ef85d Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 21 May 2013 17:09:41 +0300 Subject: crypto: sha256_ssse3 - fix stack corruption with SSSE3 and AVX implementations The _XFER stack element size was set too small, 8 bytes, when it needs to be 16 bytes. As _XFER is the last stack element used by these implementations, the 16 byte stores with 'movdqa' corrupt the stack where the value of register %r12 is temporarily stored. As these implementations align the stack pointer to 16 bytes, this corruption did not happen every time. Patch corrects this issue. Reported-by: Julian Wollrath Signed-off-by: Jussi Kivilinna Tested-by: Julian Wollrath Acked-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-avx-asm.S | 2 +- arch/x86/crypto/sha256-ssse3-asm.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 56610c4bf31b..642f15687a0a 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -118,7 +118,7 @@ y2 = %r15d _INP_END_SIZE = 8 _INP_SIZE = 8 -_XFER_SIZE = 8 +_XFER_SIZE = 16 _XMM_SAVE_SIZE = 0 _INP_END = 0 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 98d3c391da81..f833b74d902b 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -111,7 +111,7 @@ y2 = %r15d _INP_END_SIZE = 8 _INP_SIZE = 8 -_XFER_SIZE = 8 +_XFER_SIZE = 16 _XMM_SAVE_SIZE = 0 _INP_END = 0 -- cgit 1.4.1 From e9d0626ed43a41a3fc526d1df06122b0d4eac174 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Tue, 14 May 2013 14:48:58 +0800 Subject: x86-64, init: Fix a possible wraparound bug in switchover in head_64.S In head_64.S, a switchover has been used to handle kernel crossing 1G, 512G boundaries. And commit 8170e6bed465b4b0c7687f93e9948aca4358a33b x86, 64bit: Use a #PF handler to materialize early mappings on demand said: During the switchover in head_64.S, before #PF handler is available, we use three pages to handle kernel crossing 1G, 512G boundaries with sharing page by playing games with page aliasing: the same page is mapped twice in the higher-level tables with appropriate wraparound. But from the switchover code, when we set up the PUD table: 114 addq $4096, %rdx 115 movq %rdi, %rax 116 shrq $PUD_SHIFT, %rax 117 andl $(PTRS_PER_PUD-1), %eax 118 movq %rdx, (4096+0)(%rbx,%rax,8) 119 movq %rdx, (4096+8)(%rbx,%rax,8) It seems line 119 has a potential bug there. For example, if the kernel is loaded at physical address 511G+1008M, that is 000000000 111111111 111111000 000000000000000000000 and the kernel _end is 512G+2M, that is 000000001 000000000 000000001 000000000000000000000 So in this example, when using the 2nd page to setup PUD (line 114~119), rax is 511. In line 118, we put rdx which is the address of the PMD page (the 3rd page) into entry 511 of the PUD table. But in line 119, the entry we calculate from (4096+8)(%rbx,%rax,8) has exceeded the PUD page. IMO, the entry in line 119 should be wraparound into entry 0 of the PUD table. The patch fixes the bug. Signed-off-by: Zhang Yanfei Link: http://lkml.kernel.org/r/5191DE5A.3020302@cn.fujitsu.com Signed-off-by: Yinghai Lu Cc: v3.9 Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_64.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 08f7e8039099..321d65ebaffe 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -115,8 +115,10 @@ startup_64: movq %rdi, %rax shrq $PUD_SHIFT, %rax andl $(PTRS_PER_PUD-1), %eax - movq %rdx, (4096+0)(%rbx,%rax,8) - movq %rdx, (4096+8)(%rbx,%rax,8) + movq %rdx, 4096(%rbx,%rax,8) + incl %eax + andl $(PTRS_PER_PUD-1), %eax + movq %rdx, 4096(%rbx,%rax,8) addq $8192, %rbx movq %rdi, %rax -- cgit 1.4.1 From 1db01b4903639fcfaec213701a494fe3fb2c490b Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Wed, 8 May 2013 16:37:35 +0200 Subject: xen: Clean up apic ipi interface Commit f447d56d36af18c5104ff29dcb1327c0c0ac3634 introduced the implementation of the PV apic ipi interface. But there were some odd things (it seems none of which cause really any issue but maybe they should be cleaned up anyway): - xen_send_IPI_mask_allbutself (and by that xen_send_IPI_allbutself) ignore the passed in vector and only use the CALL_FUNCTION_SINGLE vector. While xen_send_IPI_all and xen_send_IPI_mask use the vector. - physflat_send_IPI_allbutself is declared unnecessarily. It is never used. This patch tries to clean up those things. Signed-off-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 10 ++++------ arch/x86/xen/smp.h | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 8ff37995d54e..fb44426fe931 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -576,24 +576,22 @@ void xen_send_IPI_mask_allbutself(const struct cpumask *mask, { unsigned cpu; unsigned int this_cpu = smp_processor_id(); + int xen_vector = xen_map_vector(vector); - if (!(num_online_cpus() > 1)) + if (!(num_online_cpus() > 1) || (xen_vector < 0)) return; for_each_cpu_and(cpu, mask, cpu_online_mask) { if (this_cpu == cpu) continue; - xen_smp_send_call_function_single_ipi(cpu); + xen_send_IPI_one(cpu, xen_vector); } } void xen_send_IPI_allbutself(int vector) { - int xen_vector = xen_map_vector(vector); - - if (xen_vector >= 0) - xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector); + xen_send_IPI_mask_allbutself(cpu_online_mask, vector); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 8981a76d081a..c7c2d89efd76 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -5,7 +5,6 @@ extern void xen_send_IPI_mask(const struct cpumask *mask, extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); extern void xen_send_IPI_allbutself(int vector); -extern void physflat_send_IPI_allbutself(int vector); extern void xen_send_IPI_all(int vector); extern void xen_send_IPI_self(int vector); -- cgit 1.4.1 From 2baad6121e2b2fa3428ee6cb2298107be11ab23a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 29 May 2013 13:43:54 +0100 Subject: x86, crc32-pclmul: Fix build with older binutils binutils prior to 2.18 (e.g. the ones found on SLE10) don't support assembling PEXTRD, so a macro based approach like the one for PCLMULQDQ in the same file should be used. This requires making the helper macros capable of recognizing 32-bit general purpose register operands. [ hpa: tagging for stable as it is a low risk build fix ] Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/51A6142A02000078000D99D8@nat28.tlf.novell.com Cc: Alexander Boyko Cc: Herbert Xu Cc: Huang Ying Cc: v3.9 Signed-off-by: H. Peter Anvin --- arch/x86/crypto/crc32-pclmul_asm.S | 2 +- arch/x86/include/asm/inst.h | 74 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index 94c27df8a549..f247304299a2 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -240,7 +240,7 @@ fold_64: pand %xmm3, %xmm1 PCLMULQDQ 0x00, CONSTANT, %xmm1 pxor %xmm2, %xmm1 - pextrd $0x01, %xmm1, %eax + PEXTRD 0x01, %xmm1, %eax ret ENDPROC(crc32_pclmul_le_16) diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index 280bf7fb6aba..3e115273ed88 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -9,12 +9,68 @@ #define REG_NUM_INVALID 100 -#define REG_TYPE_R64 0 -#define REG_TYPE_XMM 1 +#define REG_TYPE_R32 0 +#define REG_TYPE_R64 1 +#define REG_TYPE_XMM 2 #define REG_TYPE_INVALID 100 + .macro R32_NUM opd r32 + \opd = REG_NUM_INVALID + .ifc \r32,%eax + \opd = 0 + .endif + .ifc \r32,%ecx + \opd = 1 + .endif + .ifc \r32,%edx + \opd = 2 + .endif + .ifc \r32,%ebx + \opd = 3 + .endif + .ifc \r32,%esp + \opd = 4 + .endif + .ifc \r32,%ebp + \opd = 5 + .endif + .ifc \r32,%esi + \opd = 6 + .endif + .ifc \r32,%edi + \opd = 7 + .endif +#ifdef CONFIG_X86_64 + .ifc \r32,%r8d + \opd = 8 + .endif + .ifc \r32,%r9d + \opd = 9 + .endif + .ifc \r32,%r10d + \opd = 10 + .endif + .ifc \r32,%r11d + \opd = 11 + .endif + .ifc \r32,%r12d + \opd = 12 + .endif + .ifc \r32,%r13d + \opd = 13 + .endif + .ifc \r32,%r14d + \opd = 14 + .endif + .ifc \r32,%r15d + \opd = 15 + .endif +#endif + .endm + .macro R64_NUM opd r64 \opd = REG_NUM_INVALID +#ifdef CONFIG_X86_64 .ifc \r64,%rax \opd = 0 .endif @@ -63,6 +119,7 @@ .ifc \r64,%r15 \opd = 15 .endif +#endif .endm .macro XMM_NUM opd xmm @@ -118,10 +175,13 @@ .endm .macro REG_TYPE type reg + R32_NUM reg_type_r32 \reg R64_NUM reg_type_r64 \reg XMM_NUM reg_type_xmm \reg .if reg_type_r64 <> REG_NUM_INVALID \type = REG_TYPE_R64 + .elseif reg_type_r32 <> REG_NUM_INVALID + \type = REG_TYPE_R32 .elseif reg_type_xmm <> REG_NUM_INVALID \type = REG_TYPE_XMM .else @@ -162,6 +222,16 @@ .byte \imm8 .endm + .macro PEXTRD imm8 xmm gpr + R32_NUM extrd_opd1 \gpr + XMM_NUM extrd_opd2 \xmm + PFX_OPD_SIZE + PFX_REX extrd_opd1 extrd_opd2 + .byte 0x0f, 0x3a, 0x16 + MODRM 0xc0 extrd_opd1 extrd_opd2 + .byte \imm8 + .endm + .macro AESKEYGENASSIST rcon xmm1 xmm2 XMM_NUM aeskeygen_opd1 \xmm1 XMM_NUM aeskeygen_opd2 \xmm2 -- cgit 1.4.1 From 5187b28ff08249ab8a162e802209ed04e271ca02 Mon Sep 17 00:00:00 2001 From: Pekka Riikonen Date: Mon, 13 May 2013 14:32:07 +0200 Subject: x86: Allow FPU to be used at interrupt time even with eagerfpu With the addition of eagerfpu the irq_fpu_usable() now returns false negatives especially in the case of ksoftirqd and interrupted idle task, two common cases for FPU use for example in networking/crypto. With eagerfpu=off FPU use is possible in those contexts. This is because of the eagerfpu check in interrupted_kernel_fpu_idle(): ... * For now, with eagerfpu we will return interrupted kernel FPU * state as not-idle. TBD: Ideally we can change the return value * to something like __thread_has_fpu(current). But we need to * be careful of doing __thread_clear_has_fpu() before saving * the FPU etc for supporting nested uses etc. For now, take * the simple route! ... if (use_eager_fpu()) return 0; As eagerfpu is automatically "on" on those CPUs that also have the features like AES-NI this patch changes the eagerfpu check to return 1 in case the kernel_fpu_begin() has not been said yet. Once it has been the __thread_has_fpu() will start returning 0. Notice that with eagerfpu the __thread_has_fpu is always true initially. FPU use is thus always possible no matter what task is under us, unless the state has already been saved with kernel_fpu_begin(). [ hpa: this is a performance regression, not a correctness regression, but since it can be quite serious on CPUs which need encryption at interrupt time I am marking this for urgent/stable. ] Signed-off-by: Pekka Riikonen Link: http://lkml.kernel.org/r/alpine.GSO.2.00.1305131356320.18@git.silcnet.org Cc: v3.7+ Signed-off-by: H. Peter Anvin --- arch/x86/kernel/i387.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 245a71db401a..cb339097b9ea 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -22,23 +22,19 @@ /* * Were we in an interrupt that interrupted kernel mode? * - * For now, with eagerfpu we will return interrupted kernel FPU - * state as not-idle. TBD: Ideally we can change the return value - * to something like __thread_has_fpu(current). But we need to - * be careful of doing __thread_clear_has_fpu() before saving - * the FPU etc for supporting nested uses etc. For now, take - * the simple route! - * * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that * pair does nothing at all: the thread must not have fpu (so * that we don't try to save the FPU state), and TS must * be set (so that the clts/stts pair does nothing that is * visible in the interrupted kernel thread). + * + * Except for the eagerfpu case when we return 1 unless we've already + * been eager and saved the state in kernel_fpu_begin(). */ static inline bool interrupted_kernel_fpu_idle(void) { if (use_eager_fpu()) - return 0; + return __thread_has_fpu(current); return !__thread_has_fpu(current) && (read_cr0() & X86_CR0_TS); @@ -78,8 +74,8 @@ void __kernel_fpu_begin(void) struct task_struct *me = current; if (__thread_has_fpu(me)) { - __save_init_fpu(me); __thread_clear_has_fpu(me); + __save_init_fpu(me); /* We do 'stts()' in __kernel_fpu_end() */ } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); -- cgit 1.4.1 From b0bc225d0e5de887340d4d92a8c594ef0f60d412 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 29 May 2013 14:48:15 +0200 Subject: sched/x86: Construct all sibling maps if smt Commit 316ad248307fb ("sched/x86: Rewrite set_cpu_sibling_map()") broke the construction of sibling maps, which also broke the booted_cores accounting. Before the rewrite, if smt was present, then each map was updated for each smt sibling. After the rewrite only cpu_sibling_mask gets updated, as the llc and core maps depend on 'has_mc = x86_max_cores > 1' instead. This leads to problems with topologies like the following (qemu -smp sockets=2,cores=1,threads=2) processor : 0 physical id : 0 siblings : 1 <= should be 2 core id : 0 cpu cores : 1 processor : 1 physical id : 0 siblings : 1 <= should be 2 core id : 0 cpu cores : 0 <= should be 1 processor : 2 physical id : 1 siblings : 1 <= should be 2 core id : 0 cpu cores : 1 processor : 3 physical id : 1 siblings : 1 <= should be 2 core id : 0 cpu cores : 0 <= should be 1 This patch restores the former construction by defining has_mc as (has_smt || x86_max_cores > 1). This should be fine as there were no (has_smt && !has_mc) conditions in the context. Aso rename has_mc to has_mp now that it's not just for cores. Signed-off-by: Andrew Jones Acked-by: Peter Zijlstra Cc: a.p.zijlstra@chello.nl Cc: fenghua.yu@intel.com Link: http://lkml.kernel.org/r/1369831695-11970-1-git-send-email-drjones@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9c73b51817e4..bfd348e99369 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -372,15 +372,15 @@ static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) void __cpuinit set_cpu_sibling_map(int cpu) { - bool has_mc = boot_cpu_data.x86_max_cores > 1; bool has_smt = smp_num_siblings > 1; + bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); - if (!has_smt && !has_mc) { + if (!has_mp) { cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); cpumask_set_cpu(cpu, cpu_core_mask(cpu)); @@ -394,7 +394,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) if ((i == cpu) || (has_smt && match_smt(c, o))) link_mask(sibling, cpu, i); - if ((i == cpu) || (has_mc && match_llc(c, o))) + if ((i == cpu) || (has_mp && match_llc(c, o))) link_mask(llc_shared, cpu, i); } @@ -406,7 +406,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) for_each_cpu(i, cpu_sibling_setup_mask) { o = &cpu_data(i); - if ((i == cpu) || (has_mc && match_mc(c, o))) { + if ((i == cpu) || (has_mp && match_mc(c, o))) { link_mask(core, cpu, i); /* -- cgit 1.4.1 From 7de3d66b1387ddf5a37d9689e5eb8510fb75c765 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 31 May 2013 08:53:07 -0700 Subject: x86: Fix adjust_range_size_mask calling position Commit 8d57470d x86, mm: setup page table in top-down causes a kernel panic while setting mem=2G. [mem 0x00000000-0x000fffff] page 4k [mem 0x7fe00000-0x7fffffff] page 1G [mem 0x7c000000-0x7fdfffff] page 1G [mem 0x00100000-0x001fffff] page 4k [mem 0x00200000-0x7bffffff] page 2M for last entry is not what we want, we should have [mem 0x00200000-0x3fffffff] page 2M [mem 0x40000000-0x7bffffff] page 1G Actually we merge the continuous ranges with same page size too early. in this case, before merging we have [mem 0x00200000-0x3fffffff] page 2M [mem 0x40000000-0x7bffffff] page 2M after merging them, will get [mem 0x00200000-0x7bffffff] page 2M even we can use 1G page to map [mem 0x40000000-0x7bffffff] that will cause problem, because we already map [mem 0x7fe00000-0x7fffffff] page 1G [mem 0x7c000000-0x7fdfffff] page 1G with 1G page, aka [0x40000000-0x7fffffff] is mapped with 1G page already. During phys_pud_init() for [0x40000000-0x7bffffff], it will not reuse existing that pud page, and allocate new one then try to use 2M page to map it instead, as page_size_mask does not include PG_LEVEL_1G. At end will have [7c000000-0x7fffffff] not mapped, loop in phys_pmd_init stop mapping at 0x7bffffff. That is right behavoir, it maps exact range with exact page size that we ask, and we should explicitly call it to map [7c000000-0x7fffffff] before or after mapping 0x40000000-0x7bffffff. Anyway we need to make sure ranges' page_size_mask correct and consistent after split_mem_range for each range. Fix that by calling adjust_range_size_mask before merging range with same page size. -v2: update change log. -v3: add more explanation why [7c000000-0x7fffffff] is not mapped, and it causes panic. Bisected-by: "Xie, ChanglongX" Bisected-by: Yuanhan Liu Reported-and-tested-by: Yuanhan Liu Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1370015587-20835-1-git-send-email-yinghai@kernel.org Cc: v3.9 Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index eaac1743def7..1f34e9219775 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -277,6 +277,9 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, end_pfn = limit_pfn; nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + if (!after_bootmem) + adjust_range_page_size_mask(mr, nr_range); + /* try to merge same page size and continuous */ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { unsigned long old_start; @@ -291,9 +294,6 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, nr_range--; } - if (!after_bootmem) - adjust_range_page_size_mask(mr, nr_range); - for (i = 0; i < nr_range; i++) printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", mr[i].start, mr[i].end - 1, -- cgit 1.4.1 From 103f98ea64a1b0a67d8a1b23070b4db3533db2b8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 30 May 2013 13:22:39 +0200 Subject: KVM: Emulate multibyte NOP This is encountered when booting RHEL5.9 64-bit. There is another bug after this one that is not a simple emulation failure, but this one lets the boot proceed a bit. Cc: # 3.9 Signed-off-by: Paolo Bonzini Signed-off-by: Gleb Natapov --- arch/x86/kvm/emulate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8db0010ed150..0f42c2a48166 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3997,7 +3997,8 @@ static const struct opcode twobyte_table[256] = { DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM), N, N, /* 0x10 - 0x1F */ - N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N, + N, N, N, N, N, N, N, N, + D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), /* 0x20 - 0x2F */ DIP(ModRM | DstMem | Priv | Op3264, cr_read, check_cr_read), DIP(ModRM | DstMem | Priv | Op3264, dr_read, check_dr_read), @@ -4836,6 +4837,7 @@ twobyte_insn: case 0x08: /* invd */ case 0x0d: /* GrpP (prefetch) */ case 0x18: /* Grp16 (prefetch/nop) */ + case 0x1f: /* nop */ break; case 0x20: /* mov cr, reg */ ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg); -- cgit 1.4.1 From 8acb42070ec4c87a9baab5c7bac626030d5bef28 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 30 May 2013 16:35:55 +0200 Subject: KVM: fix sil/dil/bpl/spl in the mod/rm fields The x86-64 extended low-byte registers were fetched correctly from reg, but not from mod/rm. This fixes another bug in the boot of RHEL5.9 64-bit, but it is still not enough. Cc: # 3.9 Signed-off-by: Paolo Bonzini Signed-off-by: Gleb Natapov --- arch/x86/kvm/emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0f42c2a48166..5953dcea752d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1240,9 +1240,12 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, ctxt->modrm_seg = VCPU_SREG_DS; if (ctxt->modrm_mod == 3) { + int highbyte_regs = ctxt->rex_prefix == 0; + op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; - op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp); + op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, + highbyte_regs && (ctxt->d & ByteOp)); if (ctxt->d & Sse) { op->type = OP_XMM; op->bytes = 16; -- cgit 1.4.1 From 299018f44ac553dce3caf84df1d14c4764faa279 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 3 Jun 2013 11:30:02 +0300 Subject: KVM: Fix race in apic->pending_events processing apic->pending_events processing has a race that may cause INIT and SIPI processing to be reordered: vpu0: vcpu1: set INIT test_and_clear_bit(KVM_APIC_INIT) process INIT set INIT set SIPI test_and_clear_bit(KVM_APIC_SIPI) process SIPI At the end INIT is left pending in pending_events. The following patch fixes this by latching pending event before processing them. Signed-off-by: Gleb Natapov --- arch/x86/kvm/lapic.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e1adbb4aca75..0eee2c8b64d1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1861,11 +1861,14 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; unsigned int sipi_vector; + unsigned long pe; - if (!kvm_vcpu_has_lapic(vcpu)) + if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events) return; - if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) { + pe = xchg(&apic->pending_events, 0); + + if (test_bit(KVM_APIC_INIT, &pe)) { kvm_lapic_reset(vcpu); kvm_vcpu_reset(vcpu); if (kvm_vcpu_is_bsp(apic->vcpu)) @@ -1873,7 +1876,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) else vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; } - if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events) && + if (test_bit(KVM_APIC_SIPI, &pe) && vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { /* evaluate pending_events before reading the vector */ smp_rmb(); -- cgit 1.4.1 From 466318a87f28cb3ba0d08a3b7ef1a37ae73d5aa7 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 3 Jun 2013 10:33:55 -0400 Subject: xen/smp: Fixup NOHZ per cpu data when onlining an offline CPU. The xen_play_dead is an undead function. When the vCPU is told to offline it ends up calling xen_play_dead wherin it calls the VCPUOP_down hypercall which offlines the vCPU. However, when the vCPU is onlined back, it resumes execution right after VCPUOP_down hypercall. That was OK (albeit the API for play_dead assumes that the CPU stays dead and never returns) but with commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) that is no longer safe as said commit resets the ts->inidle which at the start of the cpu_idle loop was set. The net effect is that we get this warn: Broke affinity for irq 16 installing Xen timer for CPU 1 cpu 1 spinlock event irq 48 ------------[ cut here ]------------ WARNING: at /home/konrad/linux-linus/kernel/time/tick-sched.c:935 tick_nohz_idle_exit+0x195/0x1b0() Modules linked in: dm_multipath dm_mod xen_evtchn iscsi_boot_sysfs CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.10.0-rc3upstream-00068-gdcdbe33 #1 Hardware name: BIOSTAR Group N61PB-M2S/N61PB-M2S, BIOS 6.00 PG 09/03/2009 ffffffff8193b448 ffff880039da5e60 ffffffff816707c8 ffff880039da5ea0 ffffffff8108ce8b ffff880039da4010 ffff88003fa8e500 ffff880039da4010 0000000000000001 ffff880039da4000 ffff880039da4010 ffff880039da5eb0 Call Trace: [] dump_stack+0x19/0x1b [] warn_slowpath_common+0x6b/0xa0 [] warn_slowpath_null+0x15/0x20 [] tick_nohz_idle_exit+0x195/0x1b0 [] cpu_startup_entry+0x205/0x250 [] cpu_bringup_and_idle+0x13/0x15 ---[ end trace 915c8c486004dda1 ]--- b/c ts_inidle is set to zero. Thomas suggested that we just add a workaround to call tick_nohz_idle_enter before returning from xen_play_dead() - and that is what this patch does and fixes the issue. We also add the stable part b/c git commit 4b0c0f294 is on the stable tree. CC: stable@vger.kernel.org Suggested-by: Thomas Gleixner Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index fb44426fe931..d99cae8147d1 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -447,6 +448,13 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); cpu_bringup(); + /* + * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) + * clears certain data that the cpu_idle loop (which called us + * and that we return from) expects. The only way to get that + * data back is to call: + */ + tick_nohz_idle_enter(); } #else /* !CONFIG_HOTPLUG_CPU */ -- cgit 1.4.1 From 65694c5aaddfedd9da082e4e150cafc6b3fc8a6a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 5 Jun 2013 15:15:41 +0100 Subject: x86/PCI: Map PCI setup data with ioremap() so it can be in highmem f9a37be0f0 ("x86: Use PCI setup data") added support for using PCI ROM images from setup_data. This used phys_to_virt(), which is not valid for highmem addresses, and can cause a crash when booting a 32-bit kernel via the EFI boot stub. pcibios_add_device() assumes that the physical addresses stored in setup_data are accessible via the direct kernel mapping, and that calling phys_to_virt() is valid. This isn't guaranteed to be true on x86 where the direct mapping range is much smaller than on x86-64. Calling phys_to_virt() on a highmem address results in the following: BUG: unable to handle kernel paging request at 39a3c198 IP: [] pcibios_add_device+0x2f/0x90 ... Call Trace: [] pci_device_add+0xe3/0x130 [] pci_scan_single_device+0x8b/0xb0 [] pci_scan_slot+0x48/0x100 [] pci_scan_child_bus+0x24/0xc0 [] pci_acpi_scan_root+0x2c0/0x490 [] acpi_pci_root_add+0x312/0x42f ... The solution is to use ioremap() instead of phys_to_virt() to map the setup data into the kernel address space. [bhelgaas: changelog] Tested-by: Jani Nikula Signed-off-by: Matt Fleming Signed-off-by: Bjorn Helgaas Cc: Matthew Garrett Cc: Seth Forshee Cc: Jesse Barnes Cc: stable@vger.kernel.org # v3.8+ --- arch/x86/pci/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 305c68b8d538..981c2dbd72cc 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -628,7 +628,9 @@ int pcibios_add_device(struct pci_dev *dev) pa_data = boot_params.hdr.setup_data; while (pa_data) { - data = phys_to_virt(pa_data); + data = ioremap(pa_data, sizeof(*rom)); + if (!data) + return -ENOMEM; if (data->type == SETUP_PCI) { rom = (struct pci_setup_rom *)data; @@ -645,6 +647,7 @@ int pcibios_add_device(struct pci_dev *dev) } } pa_data = data->next; + iounmap(data); } return 0; } -- cgit 1.4.1 From f8b8404337de4e2466e2e1139ea68b1f8295974f Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Sat, 1 Jun 2013 16:06:20 -0400 Subject: Modify UEFI anti-bricking code This patch reworks the UEFI anti-bricking code, including an effective reversion of cc5a080c and 31ff2f20. It turns out that calling QueryVariableInfo() from boot services results in some firmware implementations jumping to physical addresses even after entering virtual mode, so until we have 1:1 mappings for UEFI runtime space this isn't going to work so well. Reverting these gets us back to the situation where we'd refuse to create variables on some systems because they classify deleted variables as "used" until the firmware triggers a garbage collection run, which they won't do until they reach a lower threshold. This results in it being impossible to install a bootloader, which is unhelpful. Feedback from Samsung indicates that the firmware doesn't need more than 5KB of storage space for its own purposes, so that seems like a reasonable threshold. However, there's still no guarantee that a platform will attempt garbage collection merely because it drops below this threshold. It seems that this is often only triggered if an attempt to write generates a genuine EFI_OUT_OF_RESOURCES error. We can force that by attempting to create a variable larger than the remaining space. This should fail, but if it somehow succeeds we can then immediately delete it. I've tested this on the UEFI machines I have available, but I don't have a Samsung and so can't verify that it avoids the bricking problem. Signed-off-by: Matthew Garrett Signed-off-by: Lee, Chun-Y [ dummy variable cleanup ] Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/eboot.c | 47 --------- arch/x86/include/asm/efi.h | 7 -- arch/x86/include/uapi/asm/bootparam.h | 1 - arch/x86/platform/efi/efi.c | 188 ++++++++++++---------------------- 4 files changed, 65 insertions(+), 178 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 35ee62fccf98..c205035a6b96 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -251,51 +251,6 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) *size = len; } -static efi_status_t setup_efi_vars(struct boot_params *params) -{ - struct setup_data *data; - struct efi_var_bootdata *efidata; - u64 store_size, remaining_size, var_size; - efi_status_t status; - - if (sys_table->runtime->hdr.revision < EFI_2_00_SYSTEM_TABLE_REVISION) - return EFI_UNSUPPORTED; - - data = (struct setup_data *)(unsigned long)params->hdr.setup_data; - - while (data && data->next) - data = (struct setup_data *)(unsigned long)data->next; - - status = efi_call_phys4((void *)sys_table->runtime->query_variable_info, - EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS, &store_size, - &remaining_size, &var_size); - - if (status != EFI_SUCCESS) - return status; - - status = efi_call_phys3(sys_table->boottime->allocate_pool, - EFI_LOADER_DATA, sizeof(*efidata), &efidata); - - if (status != EFI_SUCCESS) - return status; - - efidata->data.type = SETUP_EFI_VARS; - efidata->data.len = sizeof(struct efi_var_bootdata) - - sizeof(struct setup_data); - efidata->data.next = 0; - efidata->store_size = store_size; - efidata->remaining_size = remaining_size; - efidata->max_var_size = var_size; - - if (data) - data->next = (unsigned long)efidata; - else - params->hdr.setup_data = (unsigned long)efidata; - -} - static efi_status_t setup_efi_pci(struct boot_params *params) { efi_pci_io_protocol *pci; @@ -1202,8 +1157,6 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, setup_graphics(boot_params); - setup_efi_vars(boot_params); - setup_efi_pci(boot_params); status = efi_call_phys3(sys_table->boottime->allocate_pool, diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 2fb5d5884e23..60c89f30c727 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -102,13 +102,6 @@ extern void efi_call_phys_epilog(void); extern void efi_unmap_memmap(void); extern void efi_memory_uc(u64 addr, unsigned long size); -struct efi_var_bootdata { - struct setup_data data; - u64 store_size; - u64 remaining_size; - u64 max_var_size; -}; - #ifdef CONFIG_EFI static inline bool efi_is_native(void) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 08744242b8d2..c15ddaf90710 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -6,7 +6,6 @@ #define SETUP_E820_EXT 1 #define SETUP_DTB 2 #define SETUP_PCI 3 -#define SETUP_EFI_VARS 4 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 82089d8b1954..5ae2eb09419e 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include @@ -54,12 +53,12 @@ #define EFI_DEBUG 1 -/* - * There's some additional metadata associated with each - * variable. Intel's reference implementation is 60 bytes - bump that - * to account for potential alignment constraints - */ -#define VAR_METADATA_SIZE 64 +#define EFI_MIN_RESERVE 5120 + +#define EFI_DUMMY_GUID \ + EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) + +static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; struct efi __read_mostly efi = { .mps = EFI_INVALID_TABLE_ADDR, @@ -79,13 +78,6 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; -static u64 efi_var_store_size; -static u64 efi_var_remaining_size; -static u64 efi_var_max_var_size; -static u64 boot_used_size; -static u64 boot_var_size; -static u64 active_size; - unsigned long x86_efi_facility; /* @@ -188,53 +180,8 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { - efi_status_t status; - static bool finished = false; - static u64 var_size; - - status = efi_call_virt3(get_next_variable, - name_size, name, vendor); - - if (status == EFI_NOT_FOUND) { - finished = true; - if (var_size < boot_used_size) { - boot_var_size = boot_used_size - var_size; - active_size += boot_var_size; - } else { - printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n"); - } - } - - if (boot_used_size && !finished) { - unsigned long size = 0; - u32 attr; - efi_status_t s; - void *tmp; - - s = virt_efi_get_variable(name, vendor, &attr, &size, NULL); - - if (s != EFI_BUFFER_TOO_SMALL || !size) - return status; - - tmp = kmalloc(size, GFP_ATOMIC); - - if (!tmp) - return status; - - s = virt_efi_get_variable(name, vendor, &attr, &size, tmp); - - if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) { - var_size += size; - var_size += ucs2_strsize(name, 1024); - active_size += size; - active_size += VAR_METADATA_SIZE; - active_size += ucs2_strsize(name, 1024); - } - - kfree(tmp); - } - - return status; + return efi_call_virt3(get_next_variable, + name_size, name, vendor); } static efi_status_t virt_efi_set_variable(efi_char16_t *name, @@ -243,34 +190,9 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name, unsigned long data_size, void *data) { - efi_status_t status; - u32 orig_attr = 0; - unsigned long orig_size = 0; - - status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size, - NULL); - - if (status != EFI_BUFFER_TOO_SMALL) - orig_size = 0; - - status = efi_call_virt5(set_variable, - name, vendor, attr, - data_size, data); - - if (status == EFI_SUCCESS) { - if (orig_size) { - active_size -= orig_size; - active_size -= ucs2_strsize(name, 1024); - active_size -= VAR_METADATA_SIZE; - } - if (data_size) { - active_size += data_size; - active_size += ucs2_strsize(name, 1024); - active_size += VAR_METADATA_SIZE; - } - } - - return status; + return efi_call_virt5(set_variable, + name, vendor, attr, + data_size, data); } static efi_status_t virt_efi_query_variable_info(u32 attr, @@ -786,9 +708,6 @@ void __init efi_init(void) char vendor[100] = "unknown"; int i = 0; void *tmp; - struct setup_data *data; - struct efi_var_bootdata *efi_var_data; - u64 pa_data; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -806,22 +725,6 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; - pa_data = boot_params.hdr.setup_data; - while (pa_data) { - data = early_ioremap(pa_data, sizeof(*efi_var_data)); - if (data->type == SETUP_EFI_VARS) { - efi_var_data = (struct efi_var_bootdata *)data; - - efi_var_store_size = efi_var_data->store_size; - efi_var_remaining_size = efi_var_data->remaining_size; - efi_var_max_var_size = efi_var_data->max_var_size; - } - pa_data = data->next; - early_iounmap(data, sizeof(*efi_var_data)); - } - - boot_used_size = efi_var_store_size - efi_var_remaining_size; - set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* @@ -1085,6 +988,13 @@ void __init efi_enter_virtual_mode(void) runtime_code_page_mkexec(); kfree(new_memmap); + + /* clean DUMMY object */ + efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); } /* @@ -1136,33 +1046,65 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) efi_status_t status; u64 storage_size, remaining_size, max_size; + if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) + return 0; + status = efi.query_variable_info(attributes, &storage_size, &remaining_size, &max_size); if (status != EFI_SUCCESS) return status; - if (!max_size && remaining_size > size) - printk_once(KERN_ERR FW_BUG "Broken EFI implementation" - " is returning MaxVariableSize=0\n"); /* * Some firmware implementations refuse to boot if there's insufficient * space in the variable store. We account for that by refusing the * write if permitting it would reduce the available space to under - * 50%. However, some firmware won't reclaim variable space until - * after the used (not merely the actively used) space drops below - * a threshold. We can approximate that case with the value calculated - * above. If both the firmware and our calculations indicate that the - * available space would drop below 50%, refuse the write. + * 5KB. This figure was provided by Samsung, so should be safe. */ + if ((remaining_size - size < EFI_MIN_RESERVE) && + !efi_no_storage_paranoia) { + + /* + * Triggering garbage collection may require that the firmware + * generate a real EFI_OUT_OF_RESOURCES error. We can force + * that by attempting to use more space than is available. + */ + unsigned long dummy_size = remaining_size + 1024; + void *dummy = kmalloc(dummy_size, GFP_ATOMIC); + + status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + dummy_size, dummy); + + if (status == EFI_SUCCESS) { + /* + * This should have failed, so if it didn't make sure + * that we delete it... + */ + efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + 0, dummy); + } - if (!storage_size || size > remaining_size || - (max_size && size > max_size)) - return EFI_OUT_OF_RESOURCES; + /* + * The runtime code may now have triggered a garbage collection + * run, so check the variable info again + */ + status = efi.query_variable_info(attributes, &storage_size, + &remaining_size, &max_size); - if (!efi_no_storage_paranoia && - ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) && - (remaining_size - size < storage_size / 2))) - return EFI_OUT_OF_RESOURCES; + if (status != EFI_SUCCESS) + return status; + + /* + * There still isn't enough room, so return an error + */ + if (remaining_size - size < EFI_MIN_RESERVE) + return EFI_OUT_OF_RESOURCES; + } return EFI_SUCCESS; } -- cgit 1.4.1 From d7880812b3594d3c6dcbe3cfd71dabb17347d082 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2013 16:52:03 +0200 Subject: idle: Add the stack canary init to cpu_startup_entry() Moving x86 to the generic idle implementation (commit 7d1a9417 "x86: Use generic idle loop") wreckaged the stack protector. I stupidly missed that boot_init_stack_canary() must be inlined from a function which never returns, but I put that call into arch_cpu_idle_prepare() which of course returns. I pondered to play tricks with arch_cpu_idle_prepare() first, but then I noticed, that the other archs which have implemented the stackprotector (ARM and SH) do not initialize the canary for the non-boot cpus. So I decided to move the boot_init_stack_canary() call into cpu_startup_entry() ifdeffed with an CONFIG_X86 for now. This #ifdef is just a temporary measure as I don't want to inflict the boot_init_stack_canary() call on ARM and SH that late in the cycle. I'll queue a patch for 3.11 which removes the #ifdef if the ARM/SH maintainers have no objection. Reported-by: Wouter van Kesteren Cc: x86@kernel.org Cc: Russell King Cc: Paul Mundt Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 12 ------------ kernel/cpu/idle.c | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4e7a37ff03ab..81a5f5e8f142 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -277,18 +277,6 @@ void exit_idle(void) } #endif -void arch_cpu_idle_prepare(void) -{ - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. CPU0 already has it initialized but no harm in - * doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); -} - void arch_cpu_idle_enter(void) { local_touch_nmi(); diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c index d5585f5e038e..bf2ee1aafa0e 100644 --- a/kernel/cpu/idle.c +++ b/kernel/cpu/idle.c @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -112,6 +113,21 @@ static void cpu_idle_loop(void) void cpu_startup_entry(enum cpuhp_state state) { + /* + * This #ifdef needs to die, but it's too late in the cycle to + * make this generic (arm and sh have never invoked the canary + * init for the non boot cpus!). Will be fixed in 3.11 + */ +#ifdef CONFIG_X86 + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. The boot CPU already has it initialized but no harm + * in doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); +#endif current_set_polling(); arch_cpu_idle_prepare(); cpu_idle_loop(); -- cgit 1.4.1 From b1983b0a7578de09211a696802edab83fd253303 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 11 Jun 2013 11:56:52 -0700 Subject: x86, relocs: Move __vvar_page from S_ABS to S_REL The __vvar_page relocation should actually be listed in S_REL instead of S_ABS. Oddly, this didn't always cause things to break, presumably because there are no users for relocation information on 64 bits yet. [ hpa: Not for stable - new code in 3.10 ] Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20130611185652.GA23674@www.outflux.net Reported-by: Michael Davidson Signed-off-by: H. Peter Anvin --- arch/x86/tools/relocs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 590be1090892..f7bab68a4b83 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -42,9 +42,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "^(xen_irq_disable_direct_reloc$|" "xen_save_fl_direct_reloc$|" "VDSO|" -#if ELF_BITS == 64 - "__vvar_page|" -#endif "__crc_)", /* @@ -72,6 +69,7 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "__per_cpu_load|" "init_per_cpu__.*|" "__end_rodata_hpage_align|" + "__vvar_page|" #endif "_end)$" }; -- cgit 1.4.1 From c8a22d19dd238ede87aa0ac4f7dbea8da039b9c1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 5 Jun 2013 11:47:18 -0700 Subject: x86: Fix typo in kexec register clearing Fixes a typo in register clearing code. Thanks to PaX Team for fixing this originally, and James Troup for pointing it out. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20130605184718.GA8396@www.outflux.net Cc: v2.6.30+ Cc: PaX Team Signed-off-by: H. Peter Anvin --- arch/x86/kernel/relocate_kernel_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 7a6f3b3be3cf..f2bb9c96720a 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -160,7 +160,7 @@ identity_mapped: xorq %rbp, %rbp xorq %r8, %r8 xorq %r9, %r9 - xorq %r10, %r9 + xorq %r10, %r10 xorq %r11, %r11 xorq %r12, %r12 xorq %r13, %r13 -- cgit 1.4.1 From fe6510b5d6349a8999b83ef7c5671e5a561b803a Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Tue, 11 Jun 2013 22:25:22 +0300 Subject: crypto: aesni_intel - fix accessing of unaligned memory The new XTS code for aesni_intel uses input buffers directly as memory operands for pxor instructions, which causes crash if those buffers are not aligned to 16 bytes. Patch changes XTS code to handle unaligned memory correctly, by loading memory with movdqu instead. Reported-by: Dave Jones Tested-by: Dave Jones Signed-off-by: Jussi Kivilinna Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 48 ++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 62fe22cd4cba..477e9d75149b 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2681,56 +2681,68 @@ ENTRY(aesni_xts_crypt8) addq %rcx, KEYP movdqa IV, STATE1 - pxor 0x00(INP), STATE1 + movdqu 0x00(INP), INC + pxor INC, STATE1 movdqu IV, 0x00(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE2 - pxor 0x10(INP), STATE2 + movdqu 0x10(INP), INC + pxor INC, STATE2 movdqu IV, 0x10(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE3 - pxor 0x20(INP), STATE3 + movdqu 0x20(INP), INC + pxor INC, STATE3 movdqu IV, 0x20(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE4 - pxor 0x30(INP), STATE4 + movdqu 0x30(INP), INC + pxor INC, STATE4 movdqu IV, 0x30(OUTP) call *%r11 - pxor 0x00(OUTP), STATE1 + movdqu 0x00(OUTP), INC + pxor INC, STATE1 movdqu STATE1, 0x00(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE1 - pxor 0x40(INP), STATE1 + movdqu 0x40(INP), INC + pxor INC, STATE1 movdqu IV, 0x40(OUTP) - pxor 0x10(OUTP), STATE2 + movdqu 0x10(OUTP), INC + pxor INC, STATE2 movdqu STATE2, 0x10(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE2 - pxor 0x50(INP), STATE2 + movdqu 0x50(INP), INC + pxor INC, STATE2 movdqu IV, 0x50(OUTP) - pxor 0x20(OUTP), STATE3 + movdqu 0x20(OUTP), INC + pxor INC, STATE3 movdqu STATE3, 0x20(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE3 - pxor 0x60(INP), STATE3 + movdqu 0x60(INP), INC + pxor INC, STATE3 movdqu IV, 0x60(OUTP) - pxor 0x30(OUTP), STATE4 + movdqu 0x30(OUTP), INC + pxor INC, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE4 - pxor 0x70(INP), STATE4 + movdqu 0x70(INP), INC + pxor INC, STATE4 movdqu IV, 0x70(OUTP) _aesni_gf128mul_x_ble() @@ -2738,16 +2750,20 @@ ENTRY(aesni_xts_crypt8) call *%r11 - pxor 0x40(OUTP), STATE1 + movdqu 0x40(OUTP), INC + pxor INC, STATE1 movdqu STATE1, 0x40(OUTP) - pxor 0x50(OUTP), STATE2 + movdqu 0x50(OUTP), INC + pxor INC, STATE2 movdqu STATE2, 0x50(OUTP) - pxor 0x60(OUTP), STATE3 + movdqu 0x60(OUTP), INC + pxor INC, STATE3 movdqu STATE3, 0x60(OUTP) - pxor 0x70(OUTP), STATE4 + movdqu 0x70(OUTP), INC + pxor INC, STATE4 movdqu STATE4, 0x70(OUTP) ret -- cgit 1.4.1 From 764bcbc5a6d7a2f3e75c9f0e4caa984e2926e346 Mon Sep 17 00:00:00 2001 From: "Zhanghaoyu (A)" Date: Fri, 14 Jun 2013 07:36:13 +0000 Subject: KVM: x86: remove vcpu's CPL check in host-invoked XCR set __kvm_set_xcr function does the CPL check when set xcr. __kvm_set_xcr is called in two flows, one is invoked by guest, call stack shown as below, handle_xsetbv(or xsetbv_interception) kvm_set_xcr __kvm_set_xcr the other one is invoked by host, for example during system reset: kvm_arch_vcpu_ioctl kvm_vcpu_ioctl_x86_set_xcrs __kvm_set_xcr The former does need the CPL check, but the latter does not. Cc: stable@vger.kernel.org Signed-off-by: Zhang Haoyu [Tweaks to commit message. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 094b5d96ab14..e8ba99c34180 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -582,8 +582,6 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) if (index != XCR_XFEATURE_ENABLED_MASK) return 1; xcr0 = xcr; - if (kvm_x86_ops->get_cpl(vcpu) != 0) - return 1; if (!(xcr0 & XSTATE_FP)) return 1; if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) @@ -597,7 +595,8 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) { - if (__kvm_set_xcr(vcpu, index, xcr)) { + if (kvm_x86_ops->get_cpl(vcpu) != 0 || + __kvm_set_xcr(vcpu, index, xcr)) { kvm_inject_gp(vcpu, 0); return 1; } -- cgit 1.4.1 From d8d386c10630d8f7837700f4c466443d49e12cc0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 13 Jun 2013 13:17:01 -0700 Subject: x86, mtrr: Fix original mtrr range get for mtrr_cleanup Joshua reported: Commit cd7b304dfaf1 (x86, range: fix missing merge during add range) broke mtrr cleanup on his setup in 3.9.5. corresponding commit in upstream is fbe06b7bae7c. *BAD*gran_size: 64K chunk_size: 16M num_reg: 6 lose cover RAM: -0G https://bugzilla.kernel.org/show_bug.cgi?id=59491 So it rejects new var mtrr layout. It turns out we have some problem with initial mtrr range retrieval. The current sequence is: x86_get_mtrr_mem_range ==> bunchs of add_range_with_merge ==> bunchs of subract_range ==> clean_sort_range add_range_with_merge for [0,1M) sort_range() add_range_with_merge could have blank slots, so we can not just sort only, that will have final result have extra blank slot in head. So move that calling add_range_with_merge for [0,1M), with that we could avoid extra clean_sort_range calling. Reported-by: Joshua Covington Tested-by: Joshua Covington Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1371154622-8929-2-git-send-email-yinghai@kernel.org Cc: v3.9 Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/cleanup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 35ffda5d0727..5f90b85ff22e 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -714,15 +714,15 @@ int __init mtrr_cleanup(unsigned address_bits) if (mtrr_tom2) x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; - nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); /* * [0, 1M) should always be covered by var mtrr with WB * and fixed mtrrs should take effect before var mtrr for it: */ - nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, + nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0, 1ULL<<(20 - PAGE_SHIFT)); - /* Sort the ranges: */ - sort_range(range, nr_range); + /* add from var mtrr at last */ + nr_range = x86_get_mtrr_mem_range(range, nr_range, + x_remove_base, x_remove_size); range_sums = sum_ranges(range, nr_range); printk(KERN_INFO "total RAM covered: %ldM\n", -- cgit 1.4.1 From d1603990ea626668c78527376d9ec084d634202d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 18 Jun 2013 12:33:40 -0700 Subject: x86: fix build error and kconfig for ia32_emulation and binfmt Fix kconfig warning and build errors on x86_64 by selecting BINFMT_ELF when COMPAT_BINFMT_ELF is being selected. warning: (IA32_EMULATION) selects COMPAT_BINFMT_ELF which has unmet direct dependencies (COMPAT && BINFMT_ELF) fs/built-in.o: In function `elf_core_dump': compat_binfmt_elf.c:(.text+0x3e093): undefined reference to `elf_core_extra_phdrs' compat_binfmt_elf.c:(.text+0x3ebcd): undefined reference to `elf_core_extra_data_size' compat_binfmt_elf.c:(.text+0x3eddd): undefined reference to `elf_core_write_extra_phdrs' compat_binfmt_elf.c:(.text+0x3f004): undefined reference to `elf_core_write_extra_data' [ hpa: This was sent to me for -next but it is a low risk build fix ] Signed-off-by: Randy Dunlap Link: http://lkml.kernel.org/r/51C0B614.5000708@infradead.org Cc: Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 685692c94f05..fe120da25625 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2265,6 +2265,7 @@ source "fs/Kconfig.binfmt" config IA32_EMULATION bool "IA32 Emulation" depends on X86_64 + select BINFMT_ELF select COMPAT_BINFMT_ELF select HAVE_UID16 ---help--- -- cgit 1.4.1 From 07868fc6aaf57847b0f3a3d53086b7556eb83f4a Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 10 Jun 2013 18:31:11 +0200 Subject: x86: kvmclock: zero initialize pvclock shared memory area kernel might hung in pvclock_clocksource_read() due to uninitialized memory might contain odd version value in following cycle: do { version = __pvclock_read_cycles(src, &ret, &flags); } while ((src->version & 1) || version != src->version); if secondary kvmclock is accessed before it's registered with kvm. Clear garbage in pvclock shared memory area right after it's allocated to avoid this issue. Ref: https://bugzilla.kernel.org/show_bug.cgi?id=59521 Signed-off-by: Igor Mammedov [See BZ for analysis. We may want a different fix for 3.11, but this is the safest for now - Paolo] Cc: # 3.8 Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvmclock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d2c381280e3c..3dd37ebd591b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -242,6 +242,7 @@ void __init kvmclock_init(void) if (!mem) return; hv_clock = __va(mem); + memset(hv_clock, 0, size); if (kvm_register_clock("boot clock")) { hv_clock = NULL; -- cgit 1.4.1 From f1a527899ef0a8a241eb3bea619eb2e29d797f44 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 7 Jun 2013 23:22:10 +0200 Subject: perf/x86: Fix broken PEBS-LL support on SNB-EP/IVB-EP This patch fixes broken support of PEBS-LL on SNB-EP/IVB-EP. For some reason, the LDLAT extra reg definition for snb_ep showed up as duplicate in the snb table. This patch moves the definition of LDLAT back into the snb_ep table. Thanks to Don Zickus for tracking this one down. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20130607212210.GA11849@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f60d41ff9a97..a9e22073bd56 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -165,13 +165,13 @@ static struct extra_reg intel_snb_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), - INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), EVENT_EXTRA_END }; static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), + INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), EVENT_EXTRA_END }; -- cgit 1.4.1 From 949785996ec2250fa958fc3a924e5186e9a8fa2c Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 19 Jun 2013 11:15:26 -0400 Subject: x86: Fix section mismatch on load_ucode_ap We are in the process of removing all the __cpuinit annotations. While working on making that change, an existing problem was made evident: WARNING: arch/x86/kernel/built-in.o(.text+0x198f2): Section mismatch in reference from the function cpu_init() to the function .init.text:load_ucode_ap() The function cpu_init() references the function __init load_ucode_ap(). This is often because cpu_init lacks a __init annotation or the annotation of load_ucode_ap is wrong. This now appears because in my working tree, cpu_init() is no longer tagged as __cpuinit, and so the audit picks up the mismatch. The 2nd hypothesis from the audit is the correct one, as there was an incorrect __init tag on the prototype in the header (but __cpuinit was used on the function itself.) The audit is telling us that the prototype's __init annotation took effect and the function did land in the .init.text section. Checking with objdump on a mainline tree that still has __cpuinit shows that the __cpuinit on the function takes precedence over the __init on the prototype, but that won't be true once we make __cpuinit a no-op. Even though we are removing __cpuinit, we temporarily align both the function and the prototype on __cpuinit so that the changeset can be applied to stable trees if desired. [ hpa: build fix only, no object code change ] Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: stable # 3.9+ Signed-off-by: Paul Gortmaker Link: http://lkml.kernel.org/r/1371654926-11729-1-git-send-email-paul.gortmaker@windriver.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 6825e2efd1b4..6bc3985ee473 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -60,11 +60,11 @@ static inline void __exit exit_amd_microcode(void) {} #ifdef CONFIG_MICROCODE_EARLY #define MAX_UCODE_COUNT 128 extern void __init load_ucode_bsp(void); -extern __init void load_ucode_ap(void); +extern void __cpuinit load_ucode_ap(void); extern int __init save_microcode_in_initrd(void); #else static inline void __init load_ucode_bsp(void) {} -static inline __init void load_ucode_ap(void) {} +static inline void __cpuinit load_ucode_ap(void) {} static inline int __init save_microcode_in_initrd(void) { return 0; -- cgit 1.4.1 From b52e0a7c4e4100f8683af508664e60e1603070f9 Mon Sep 17 00:00:00 2001 From: Michel Lespinasse Date: Thu, 6 Jun 2013 04:41:15 -0700 Subject: x86: Fix trigger_all_cpu_backtrace() implementation The following change fixes the x86 implementation of trigger_all_cpu_backtrace(), which was previously (accidentally, as far as I can tell) disabled to always return false as on architectures that do not implement this function. trigger_all_cpu_backtrace(), as defined in include/linux/nmi.h, should call arch_trigger_all_cpu_backtrace() if available, or return false if the underlying arch doesn't implement this function. x86 did provide a suitable arch_trigger_all_cpu_backtrace() implementation, but it wasn't actually being used because it was declared in asm/nmi.h, which linux/nmi.h doesn't include. Also, linux/nmi.h couldn't easily be fixed by including asm/nmi.h, because that file is not available on all architectures. I am proposing to fix this by moving the x86 definition of arch_trigger_all_cpu_backtrace() to asm/irq.h. Tested via: echo l > /proc/sysrq-trigger Before the change, this uses a fallback implementation which shows backtraces on active CPUs (using smp_call_function_interrupt() ) After the change, this shows NMI backtraces on all CPUs Signed-off-by: Michel Lespinasse Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1370518875-1346-1-git-send-email-walken@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq.h | 5 +++++ arch/x86/include/asm/nmi.h | 4 +--- arch/x86/kernel/apic/hw_nmi.c | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ba870bb6dd8e..57873beb3292 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -41,4 +41,9 @@ extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); +#ifdef CONFIG_X86_LOCAL_APIC +void arch_trigger_all_cpu_backtrace(void); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace +#endif + #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c0fa356e90de..86f9301903c8 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -18,9 +18,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; -void arch_trigger_all_cpu_backtrace(void); -#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace -#endif +#endif /* CONFIG_X86_LOCAL_APIC */ #define NMI_FLAG_FIRST 1 diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 31cb9ae992b7..a698d7165c96 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -9,6 +9,7 @@ * */ #include +#include #include #include -- cgit 1.4.1 From b8cb62f82103083a6e8fa5470bfe634a2c06514d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 16 Jun 2013 21:27:12 +0100 Subject: x86/efi: Fix dummy variable buffer allocation 1. Check for allocation failure 2. Clear the buffer contents, as they may actually be written to flash 3. Don't leak the buffer Compile-tested only. [ Tested successfully on my buggy ASUS machine - Matt ] Signed-off-by: Ben Hutchings Cc: stable@vger.kernel.org Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 5ae2eb09419e..d2fbcedcf6ea 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1069,7 +1069,10 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) * that by attempting to use more space than is available. */ unsigned long dummy_size = remaining_size + 1024; - void *dummy = kmalloc(dummy_size, GFP_ATOMIC); + void *dummy = kzalloc(dummy_size, GFP_ATOMIC); + + if (!dummy) + return EFI_OUT_OF_RESOURCES; status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, EFI_VARIABLE_NON_VOLATILE | @@ -1089,6 +1092,8 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) 0, dummy); } + kfree(dummy); + /* * The runtime code may now have triggered a garbage collection * run, so check the variable info again -- cgit 1.4.1 From 945fb136dfcb5291b4fb2abd4fd1edf790de44ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Jun 2013 11:01:38 +0400 Subject: aout32 coredump compat fix dump_seek() does SEEK_CUR, not SEEK_SET; native binfmt_aout handles it correctly (seeks by PAGE_SIZE - sizeof(struct user), getting the current position to PAGE_SIZE), compat one seeks by PAGE_SIZE and ends up at PAGE_SIZE + already written... Signed-off-by: Al Viro --- arch/x86/ia32/ia32_aout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 805078e08013..52ff81cce008 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -192,7 +192,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, /* struct user */ DUMP_WRITE(&dump, sizeof(dump)); /* Now dump all of the user data. Include malloced stuff as well */ - DUMP_SEEK(PAGE_SIZE); + DUMP_SEEK(PAGE_SIZE - sizeof(dump)); /* now we start writing out the user space info */ set_fs(USER_DS); /* Dump the data area */ -- cgit 1.4.1