From 01341fbd0d8d4e717fc1231cdffe00343088ce0b Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Thu, 19 Nov 2020 14:21:25 +0800 Subject: workqueue: Kick a worker based on the actual activation of delayed works In realtime scenario, We do not want to have interference on the isolated cpu cores. but when invoking alloc_workqueue() for percpu wq on the housekeeping cpu, it kick a kworker on the isolated cpu. alloc_workqueue pwq_adjust_max_active wake_up_worker The comment in pwq_adjust_max_active() said: "Need to kick a worker after thawed or an unbound wq's max_active is bumped" So it is unnecessary to kick a kworker for percpu's wq when invoking alloc_workqueue(). this patch only kick a worker based on the actual activation of delayed works. Signed-off-by: Yunfeng Ye Reviewed-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 437935e7a199..0695c7895c89 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3728,17 +3728,24 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) * is updated and visible. */ if (!freezable || !workqueue_freezing) { + bool kick = false; + pwq->max_active = wq->saved_max_active; while (!list_empty(&pwq->delayed_works) && - pwq->nr_active < pwq->max_active) + pwq->nr_active < pwq->max_active) { pwq_activate_first_delayed(pwq); + kick = true; + } /* * Need to kick a worker after thawed or an unbound wq's - * max_active is bumped. It's a slow path. Do it always. + * max_active is bumped. In realtime scenarios, always kicking a + * worker will cause interference on the isolated cpu cores, so + * let's kick iff work items were activated. */ - wake_up_worker(pwq->pool); + if (kick) + wake_up_worker(pwq->pool); } else { pwq->max_active = 0; } -- cgit 1.4.1 From 58315c96651152b9f438e5e56c910994234e2c7a Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Mon, 9 Nov 2020 16:01:11 +0530 Subject: kernel: cgroup: Mundane spelling fixes throughout the file Few spelling fixes throughout the file. Signed-off-by: Bhaskar Chowdhury Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e41c21819ba0..690f477d537c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -244,7 +244,7 @@ bool cgroup_ssid_enabled(int ssid) * * The default hierarchy is the v2 interface of cgroup and this function * can be used to test whether a cgroup is on the default hierarchy for - * cases where a subsystem should behave differnetly depending on the + * cases where a subsystem should behave differently depending on the * interface version. * * List of changed behaviors: @@ -262,7 +262,7 @@ bool cgroup_ssid_enabled(int ssid) * "cgroup.procs" instead. * * - "cgroup.procs" is not sorted. pids will be unique unless they got - * recycled inbetween reads. + * recycled in-between reads. * * - "release_agent" and "notify_on_release" are removed. Replacement * notification mechanism will be implemented. @@ -345,7 +345,7 @@ static bool cgroup_is_mixable(struct cgroup *cgrp) return !cgroup_parent(cgrp); } -/* can @cgrp become a thread root? should always be true for a thread root */ +/* can @cgrp become a thread root? Should always be true for a thread root */ static bool cgroup_can_be_thread_root(struct cgroup *cgrp) { /* mixables don't care */ @@ -530,7 +530,7 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, * the root css is returned, so this function always returns a valid css. * * The returned css is not guaranteed to be online, and therefore it is the - * callers responsiblity to tryget a reference for it. + * callers responsibility to try get a reference for it. */ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, struct cgroup_subsys *ss) @@ -702,7 +702,7 @@ EXPORT_SYMBOL_GPL(of_css); ; \ else -/* walk live descendants in preorder */ +/* walk live descendants in pre order */ #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ if (({ lockdep_assert_held(&cgroup_mutex); \ @@ -936,7 +936,7 @@ void put_css_set_locked(struct css_set *cset) WARN_ON_ONCE(!list_empty(&cset->threaded_csets)); - /* This css_set is dead. unlink it and release cgroup and css refs */ + /* This css_set is dead. Unlink it and release cgroup and css refs */ for_each_subsys(ss, ssid) { list_del(&cset->e_cset_node[ssid]); css_put(cset->subsys[ssid]); @@ -1061,7 +1061,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, /* * Build the set of subsystem state objects that we want to see in the - * new css_set. while subsystems can change globally, the entries here + * new css_set. While subsystems can change globally, the entries here * won't change, so no need for locking. */ for_each_subsys(ss, i) { @@ -1151,7 +1151,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, /* * Always add links to the tail of the lists so that the lists are - * in choronological order. + * in chronological order. */ list_move_tail(&link->cset_link, &cgrp->cset_links); list_add_tail(&link->cgrp_link, &cset->cgrp_links); @@ -4137,7 +4137,7 @@ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, * implies that if we observe !CSS_RELEASED on @pos in this RCU * critical section, the one pointed to by its next pointer is * guaranteed to not have finished its RCU grace period even if we - * have dropped rcu_read_lock() inbetween iterations. + * have dropped rcu_read_lock() in-between iterations. * * If @pos has CSS_RELEASED set, its next pointer can't be * dereferenced; however, as each css is given a monotonically @@ -4385,7 +4385,7 @@ static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it) } /** - * css_task_iter_advance_css_set - advance a task itererator to the next css_set + * css_task_iter_advance_css_set - advance a task iterator to the next css_set * @it: the iterator to advance * * Advance @it to the next css_set to walk. @@ -6320,7 +6320,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) * * Find the cgroup at @path on the default hierarchy, increment its * reference count and return it. Returns pointer to the found cgroup on - * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR) + * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR) * if @path points to a non-directory. */ struct cgroup *cgroup_get_from_path(const char *path) -- cgit 1.4.1 From 5a7b5f32c5aa628841502d19a813c633ff6ecbe4 Mon Sep 17 00:00:00 2001 From: Hui Su Date: Fri, 6 Nov 2020 22:47:40 +0800 Subject: cgroup/cgroup.c: replace 'of->kn->priv' with of_cft() we have supplied the inline function: of_cft() in cgroup.h. So replace the direct use 'of->kn->priv' with inline func of_cft(), which is more readable. Signed-off-by: Hui Su Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 690f477d537c..385f80cf7d0c 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3657,7 +3657,7 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, static int cgroup_file_open(struct kernfs_open_file *of) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); if (cft->open) return cft->open(of); @@ -3666,7 +3666,7 @@ static int cgroup_file_open(struct kernfs_open_file *of) static void cgroup_file_release(struct kernfs_open_file *of) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); if (cft->release) cft->release(of); @@ -3677,7 +3677,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, { struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *cgrp = of->kn->parent->priv; - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; int ret; @@ -3727,7 +3727,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt) { - struct cftype *cft = of->kn->priv; + struct cftype *cft = of_cft(of); if (cft->poll) return cft->poll(of, pt); -- cgit 1.4.1 From 665f1388bc9713c81989dda6eed5cde52d57c255 Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Mon, 30 Nov 2020 12:42:33 +0000 Subject: ARM: omap2plus_defconfig: drop unused POWER_AVS option Commit 785b5bb41b0a ("PM: AVS: Drop the avs directory and the corresponding Kconfig") moved AVS code to SOC-specific folders, and removed corresponding Kconfig from drivers/power, leaving original POWER_AVS config option enabled in omap2plus_defconfig file. Remove the option, which has no references in the tree anymore. Fixes: 785b5bb41b0a ("PM: AVS: Drop the avs directory and the corresponding Kconfig") Signed-off-by: Andrey Zhizhikin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Cc: Nishanth Menon Cc: Ulf Hansson Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 77716f500813..23b53526c4eb 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -297,7 +297,6 @@ CONFIG_GPIO_TWL4030=y CONFIG_W1=m CONFIG_HDQ_MASTER_OMAP=m CONFIG_W1_SLAVE_DS250X=m -CONFIG_POWER_AVS=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_GPIO=y CONFIG_BATTERY_BQ27XXX=m -- cgit 1.4.1 From f1dc15cd7fc146107cad2a926d9c1d005f69002a Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 29 Nov 2020 16:47:10 +0200 Subject: ARM: dts: OMAP3: disable AES on N950/N9 AES needs to be disabled on Nokia N950/N9 as well (HS devices), otherwise kernel fails to boot. Fixes: c312f066314e ("ARM: dts: omap3: Migrate AES from hwmods to sysc-omap2") Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 11d41e86f814..7dde9fbb06d3 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -494,3 +494,11 @@ clock-names = "sysclk"; }; }; + +&aes1_target { + status = "disabled"; +}; + +&aes2_target { + status = "disabled"; +}; -- cgit 1.4.1 From 9836720911cfec25d3fbdead1c438bf87e0f2841 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:52 +0900 Subject: ARC: build: remove non-existing bootpImage from KBUILD_IMAGE The deb-pkg builds for ARCH=arc fail. $ export CROSS_COMPILE= $ make -s ARCH=arc defconfig $ make ARCH=arc bindeb-pkg SORTTAB vmlinux SYSMAP System.map MODPOST Module.symvers make KERNELRELEASE=5.10.0-rc4 ARCH=arc KBUILD_BUILD_VERSION=2 -f ./Makefile intdeb-pkg sh ./scripts/package/builddeb cp: cannot stat 'arch/arc/boot/bootpImage': No such file or directory make[4]: *** [scripts/Makefile.package:87: intdeb-pkg] Error 1 make[3]: *** [Makefile:1527: intdeb-pkg] Error 2 make[2]: *** [debian/rules:13: binary-arch] Error 2 dpkg-buildpackage: error: debian/rules binary subprocess returned exit status 2 make[1]: *** [scripts/Makefile.package:83: bindeb-pkg] Error 2 make: *** [Makefile:1527: bindeb-pkg] Error 2 The reason is obvious; arch/arc/Makefile sets $(boot)/bootpImage as the default image, but there is no rule to build it. Remove the meaningless KBUILD_IMAGE assignment so it will fallback to the default vmlinux. With this change, you can build the deb package. I removed the 'bootpImage' target as well. At best, it provides 'make bootpImage' as an alias of 'make vmlinux', but I do not see much sense in doing so. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 0c6bf0d1df7a..acf99420e161 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,12 +102,6 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -#default target for make without any arguments. -KBUILD_IMAGE := $(boot)/bootpImage - -all: bootpImage -bootpImage: vmlinux - boot_targets += uImage uImage.bin uImage.gz $(boot_targets): vmlinux -- cgit 1.4.1 From f2712ec76a5433e5ec9def2bd52a95df1f96d050 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:53 +0900 Subject: ARC: build: add uImage.lzma to the top-level target arch/arc/boot/Makefile supports uImage.lzma, but you cannot do 'make uImage.lzma' because the corresponding target is missing in arch/arc/Makefile. Add it. I also changed the assignment operator '+=' to ':=' since this is the only place where we expect this variable to be set. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index acf99420e161..61a41123ad4c 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,7 +102,7 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -boot_targets += uImage uImage.bin uImage.gz +boot_targets := uImage uImage.bin uImage.gz uImage.lzma $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -- cgit 1.4.1 From 0cfccb3c04934cdef42ae26042139f16e805b5f7 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:54 +0900 Subject: ARC: build: add boot_targets to PHONY The top-level boot_targets (uImage and uImage.*) should be phony targets. They just let Kbuild descend into arch/arc/boot/ and create files there. If a file exists in the top directory with the same name, the boot image will not be created. You can confirm it by the following steps: $ export CROSS_COMPILE= $ make -s ARCH=arc defconfig all # vmlinux will be built $ touch uImage.gz $ make ARCH=arc uImage.gz CALL scripts/atomic/check-atomics.sh CALL scripts/checksyscalls.sh CHK include/generated/compile.h # arch/arc/boot/uImage.gz is not created Specify the targets as PHONY to fix this. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 61a41123ad4c..cf9da9aea12a 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -104,6 +104,7 @@ boot := arch/arc/boot boot_targets := uImage uImage.bin uImage.gz uImage.lzma +PHONY += $(boot_targets) $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -- cgit 1.4.1 From c5e6ae563c802c4d828d42e134af64004db2e58c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:55 +0900 Subject: ARC: build: move symlink creation to arch/arc/Makefile to avoid race If you run 'make uImage uImage.gz' with the parallel option, uImage.gz will be created by two threads simultaneously. This is because arch/arc/Makefile does not specify the dependency between uImage and uImage.gz. Hence, GNU Make assumes they can be built in parallel. One thread descends into arch/arc/boot/ to create uImage, and another to create uImage.gz. Please notice the same log is displayed twice in the following steps: $ export CROSS_COMPILE= $ make -s ARCH=arc defconfig $ make -j$(nproc) ARCH=arc uImage uImage.gz [ snip ] LD vmlinux SORTTAB vmlinux SYSMAP System.map OBJCOPY arch/arc/boot/vmlinux.bin OBJCOPY arch/arc/boot/vmlinux.bin GZIP arch/arc/boot/vmlinux.bin.gz GZIP arch/arc/boot/vmlinux.bin.gz UIMAGE arch/arc/boot/uImage.gz UIMAGE arch/arc/boot/uImage.gz Image Name: Linux-5.10.0-rc4-00003-g62f23044 Created: Sun Nov 22 02:52:26 2020 Image Type: ARC Linux Kernel Image (gzip compressed) Data Size: 2109376 Bytes = 2059.94 KiB = 2.01 MiB Load Address: 80000000 Entry Point: 80004000 Image arch/arc/boot/uImage is ready Image Name: Linux-5.10.0-rc4-00003-g62f23044 Created: Sun Nov 22 02:52:26 2020 Image Type: ARC Linux Kernel Image (gzip compressed) Data Size: 2815455 Bytes = 2749.47 KiB = 2.69 MiB Load Address: 80000000 Entry Point: 80004000 This is a race between the two threads trying to write to the same file arch/arc/boot/uImage.gz. This is a potential problem that can generate a broken file. I fixed a similar problem for ARM by commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images"). I highly recommend to avoid such build rules that cause a race condition. Move the uImage rule to arch/arc/Makefile. Another strangeness is that arch/arc/boot/Makefile compares the timestamps between $(obj)/uImage and $(obj)/uImage.*: $(obj)/uImage: $(obj)/uImage.$(suffix-y) @ln -sf $(notdir $<) $@ @echo ' Image $@ is ready' This does not work as expected since $(obj)/uImage is a symlink. The symlink should be created in a phony target rule. I used $(kecho) instead of echo to suppress the message 'Image arch/arc/boot/uImage is ready' when the -s option is given. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 13 ++++++++++++- arch/arc/boot/Makefile | 11 +---------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index cf9da9aea12a..578bdbbb0fa7 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -102,11 +102,22 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -boot_targets := uImage uImage.bin uImage.gz uImage.lzma +boot_targets := uImage.bin uImage.gz uImage.lzma PHONY += $(boot_targets) $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ +uimage-default-y := uImage.bin +uimage-default-$(CONFIG_KERNEL_GZIP) := uImage.gz +uimage-default-$(CONFIG_KERNEL_LZMA) := uImage.lzma + +PHONY += uImage +uImage: $(uimage-default-y) + @ln -sf $< $(boot)/uImage + @$(kecho) ' Image $(boot)/uImage is ready' + +CLEAN_FILES += $(boot)/uImage + archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 538b92f4dd25..3b1f8a69a89e 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -targets := vmlinux.bin vmlinux.bin.gz uImage +targets := vmlinux.bin vmlinux.bin.gz # uImage build relies on mkimage being availble on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage @@ -13,11 +13,6 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \ UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) -suffix-y := bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_LZMA) := lzma - -targets += uImage targets += uImage.bin targets += uImage.gz targets += uImage.lzma @@ -42,7 +37,3 @@ $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE $(call if_changed,uimage,lzma) - -$(obj)/uImage: $(obj)/uImage.$(suffix-y) - @ln -sf $(notdir $<) $@ - @echo ' Image $@ is ready' -- cgit 1.4.1 From a4e070cfeb9d4961a169a2f1a614665cf51de963 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:56 +0900 Subject: ARC: build: remove unneeded extra-y Adding vmlinux.* to extra-y has no point because we expect they are built on demand while building uImage.* Add them to 'targets' is enough to include the corresponding .cmd file. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/boot/Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 3b1f8a69a89e..b3870cc100bf 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -targets := vmlinux.bin vmlinux.bin.gz # uImage build relies on mkimage being availble on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage @@ -13,12 +12,12 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \ UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) +targets += vmlinux.bin +targets += vmlinux.bin.gz +targets += vmlinux.bin.lzma targets += uImage.bin targets += uImage.gz targets += uImage.lzma -extra-y += vmlinux.bin -extra-y += vmlinux.bin.gz -extra-y += vmlinux.bin.lzma $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) -- cgit 1.4.1 From 3a71e423133a4b1166ffafcb4a7cfa87ddecb910 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 22 Nov 2020 04:36:57 +0900 Subject: ARC: build: use $(READELF) instead of hard-coded readelf The top Makefile defines READELF as the readelf in the cross-toolchains. Use it rather than the host readelf. Signed-off-by: Masahiro Yamada Signed-off-by: Vineet Gupta --- arch/arc/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index b3870cc100bf..5648748c285f 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -6,7 +6,7 @@ OBJCOPYFLAGS= -O binary -R .note -R .note.gnu.build-id -R .comment -S -LINUX_START_TEXT = $$(readelf -h vmlinux | \ +LINUX_START_TEXT = $$($(READELF) -h vmlinux | \ grep "Entry point address" | grep -o 0x.*) UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) -- cgit 1.4.1 From 1967939462641d8b36bcb3fcf06d48e66cd67a4f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 29 Nov 2020 04:33:35 +0900 Subject: Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK Revert commit cebc04ba9aeb ("add CONFIG_ENABLE_MUST_CHECK"). A lot of warn_unused_result warnings existed in 2006, but until now they have been fixed thanks to people doing allmodconfig tests. Our goal is to always enable __must_check where appropriate, so this CONFIG option is no longer needed. I see a lot of defconfig (arch/*/configs/*_defconfig) files having: # CONFIG_ENABLE_MUST_CHECK is not set I did not touch them for now since it would be a big churn. If arch maintainers want to clean them up, please go ahead. While I was here, I also moved __must_check to compiler_attributes.h from compiler_types.h Signed-off-by: Masahiro Yamada Acked-by: Jason A. Donenfeld Acked-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [Moved addition in compiler_attributes.h to keep it sorted] Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 6 ++++++ include/linux/compiler_types.h | 6 ------ lib/Kconfig.debug | 8 -------- tools/testing/selftests/wireguard/qemu/debug.config | 1 - 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index b2a3f4f641a7..ea5e04e75845 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -272,6 +272,12 @@ */ #define __used __attribute__((__used__)) +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warn_005funused_005fresult-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result + */ +#define __must_check __attribute__((__warn_unused_result__)) + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index ac3fa37a84f9..7ef20d1a6c28 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -110,12 +110,6 @@ struct ftrace_likely_data { unsigned long constant; }; -#ifdef CONFIG_ENABLE_MUST_CHECK -#define __must_check __attribute__((__warn_unused_result__)) -#else -#define __must_check -#endif - #if defined(CC_USING_HOTPATCH) #define notrace __attribute__((hotpatch(0, 0))) #elif defined(CC_USING_PATCHABLE_FUNCTION_ENTRY) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c789b39ed527..cb8ef4fd0d02 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -286,14 +286,6 @@ config GDB_SCRIPTS endif # DEBUG_INFO -config ENABLE_MUST_CHECK - bool "Enable __must_check logic" - default y - help - Enable the __must_check logic in the kernel build. Disable this to - suppress the "warning: ignoring return value of 'foo', declared with - attribute warn_unused_result" messages. - config FRAME_WARN int "Warn for stack frames larger than" range 0 8192 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index b50c2085c1ac..fe07d97df9fa 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -1,5 +1,4 @@ CONFIG_LOCALVERSION="-debug" -CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_POINTER=y CONFIG_STACK_VALIDATION=y CONFIG_DEBUG_KERNEL=y -- cgit 1.4.1 From ec76c2eea903947202098090bbe07a739b5246e9 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 4 Dec 2020 10:55:39 +0100 Subject: ARM: OMAP2+: omap_device: fix idling of devices during probe On the GTA04A5 od->_driver_status was not set to BUS_NOTIFY_BIND_DRIVER during probe of the second mmc used for wifi. Therefore omap_device_late_idle idled the device during probing causing oopses when accessing the registers. It was not set because od->_state was set to OMAP_DEVICE_STATE_IDLE in the notifier callback. Therefore set od->_driver_status also in that case. This came apparent after commit 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") causing this oops: omap_hsmmc 480b4000.mmc: omap_device_late_idle: enabled but no driver. Idling 8<--- cut here --- Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa0b402c ... (omap_hsmmc_set_bus_width) from [] (omap_hsmmc_set_ios+0x11c/0x258) (omap_hsmmc_set_ios) from [] (mmc_power_up.part.8+0x3c/0xd0) (mmc_power_up.part.8) from [] (mmc_start_host+0x88/0x9c) (mmc_start_host) from [] (mmc_add_host+0x58/0x84) (mmc_add_host) from [] (omap_hsmmc_probe+0x5fc/0x8c0) (omap_hsmmc_probe) from [] (platform_drv_probe+0x48/0x98) (platform_drv_probe) from [] (really_probe+0x1dc/0x3b4) Fixes: 04abaf07f6d5 ("ARM: OMAP2+: omap_device: Sync omap_device and pm_runtime after probe defer") Fixes: 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") Acked-by: Ulf Hansson Signed-off-by: Andreas Kemnade [tony@atomide.com: left out extra parens, trimmed description stack trace] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index fc7bb2ca1672..64b23b0cd23c 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -230,10 +230,12 @@ static int _omap_device_notifier_call(struct notifier_block *nb, break; case BUS_NOTIFY_BIND_DRIVER: od = to_omap_device(pdev); - if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) && - pm_runtime_status_suspended(dev)) { + if (od) { od->_driver_status = BUS_NOTIFY_BIND_DRIVER; - pm_runtime_set_active(dev); + if (od->_state == OMAP_DEVICE_STATE_ENABLED && + pm_runtime_status_suspended(dev)) { + pm_runtime_set_active(dev); + } } break; case BUS_NOTIFY_ADD_DEVICE: -- cgit 1.4.1 From 2f6fc9e08bf79f11516edef855283c6212bbe78f Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Tue, 1 Dec 2020 20:12:37 +0100 Subject: ARM: omap2plus_defconfig: enable SPI GPIO GTA04 uses that for controlling the td028ttec1 panel. So for easier testing/bisecting it is useful to have it enabled in the defconfig. Signed-off-by: Andreas Kemnade Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 23b53526c4eb..28add0b64d42 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -280,6 +280,7 @@ CONFIG_SERIAL_OMAP_CONSOLE=y CONFIG_SERIAL_DEV_BUS=y CONFIG_I2C_CHARDEV=y CONFIG_SPI=y +CONFIG_SPI_GPIO=m CONFIG_SPI_OMAP24XX=y CONFIG_SPI_TI_QSPI=m CONFIG_HSI=m -- cgit 1.4.1 From c0bc969c176b10598b31d5d1a5edf9a5261f0a9f Mon Sep 17 00:00:00 2001 From: Carl Philipp Klemm Date: Mon, 7 Dec 2020 20:58:01 +0100 Subject: ARM: omap2: pmic-cpcap: fix maximum voltage to be consistent with defaults on xt875 xt875 comes up with a iva voltage of 1375000 and android runs at this too. fix maximum voltage to be consistent with this. Signed-off-by: Carl Philipp Klemm Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pmic-cpcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c index eab281a5fc9f..09076ad0576d 100644 --- a/arch/arm/mach-omap2/pmic-cpcap.c +++ b/arch/arm/mach-omap2/pmic-cpcap.c @@ -71,7 +71,7 @@ static struct omap_voltdm_pmic omap_cpcap_iva = { .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, .vddmin = 900000, - .vddmax = 1350000, + .vddmax = 1375000, .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, .i2c_slave_addr = 0x44, .volt_reg_addr = 0x0, -- cgit 1.4.1 From 44fd9fb599d3d2be4c6838f4b11eaa459bb33989 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 7 Dec 2020 20:01:36 +0100 Subject: scsi: ufs: Remove unused macro definition POWER_DESC_MAX_SIZE POWER_DESC_MAX_SIZE is unused, remove it. Link: https://lore.kernel.org/r/20201207190137.6858-2-huobean@gmail.com Acked-by: Avri Altman Acked-by: Alim Akhtar Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index d593edb48767..142c2ebd3d7d 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -330,7 +330,6 @@ enum { UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), }; -#define POWER_DESC_MAX_SIZE 0x62 #define POWER_DESC_MAX_ACTV_ICC_LVLS 16 /* Attribute bActiveICCLevel parameter bit masks definitions */ -- cgit 1.4.1 From 1fa0570002e3f66db9b58c32c60de4183b857a19 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 7 Dec 2020 20:01:37 +0100 Subject: scsi: ufs: Fix wrong print message in dev_err() Change dev_err() print message from "dme-reset" to "dme_enable" in function ufshcd_dme_enable(). Link: https://lore.kernel.org/r/20201207190137.6858-3-huobean@gmail.com Acked-by: Alim Akhtar Acked-by: Avri Altman Signed-off-by: Bean Huo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index c1c401b2b69d..6bcedebae4a8 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3641,7 +3641,7 @@ static int ufshcd_dme_enable(struct ufs_hba *hba) ret = ufshcd_send_uic_cmd(hba, &uic_cmd); if (ret) dev_err(hba->dev, - "dme-reset: error code %d\n", ret); + "dme-enable: error code %d\n", ret); return ret; } -- cgit 1.4.1 From 1918651f2d7e8d58c9b7c49755c61e41ed655009 Mon Sep 17 00:00:00 2001 From: Randall Huang Date: Mon, 30 Nov 2020 20:14:02 -0800 Subject: scsi: ufs: Clear UAC for RPMB after ufshcd resets If RPMB is not provisioned, we may see RPMB failure after UFS suspend/resume. Inject request_sense to clear uac in ufshcd reset flow. Link: https://lore.kernel.org/r/20201201041402.3860525-1-jaegeuk@kernel.org Reported-by: kernel test robot Reviewed-by: Stanley Chu Signed-off-by: Randall Huang Signed-off-by: Leo Liou Signed-off-by: Jaegeuk Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 6bcedebae4a8..a84a8759437a 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -225,6 +225,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba); static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd); static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag); static void ufshcd_hba_exit(struct ufs_hba *hba); +static int ufshcd_clear_ua_wluns(struct ufs_hba *hba); static int ufshcd_probe_hba(struct ufs_hba *hba, bool async); static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on); static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba); @@ -6895,7 +6896,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) /* Establish the link again and restore the device */ err = ufshcd_probe_hba(hba, false); - + if (!err) + ufshcd_clear_ua_wluns(hba); out: if (err) dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err); @@ -8379,13 +8381,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * handling context. */ hba->host->eh_noresume = 1; - if (hba->wlun_dev_clr_ua) { - ret = ufshcd_send_request_sense(hba, sdp); - if (ret) - goto out; - /* Unit attention condition is cleared now */ - hba->wlun_dev_clr_ua = false; - } + ufshcd_clear_ua_wluns(hba); cmd[4] = pwr_mode << 4; @@ -8406,7 +8402,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, if (!ret) hba->curr_dev_pwr_mode = pwr_mode; -out: + scsi_device_put(sdp); hba->host->eh_noresume = 0; return ret; -- cgit 1.4.1 From f8162ac70ecf5a3ed638f96dc10e0e19b523ec7f Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Mon, 7 Dec 2020 13:49:54 +0800 Subject: scsi: ufs: Allow regulators being always-on Introduce a flag "always_on" in struct ufs_vreg to allow vendors to keep the regulator always-on. Link: https://lore.kernel.org/r/20201207054955.24366-2-stanley.chu@mediatek.com Reviewed-by: Andy Teng Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 1 + drivers/scsi/ufs/ufshcd.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 142c2ebd3d7d..14dfda735adf 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -512,6 +512,7 @@ struct ufs_query_res { struct ufs_vreg { struct regulator *reg; const char *name; + bool always_on; bool enabled; int min_uV; int max_uV; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index a84a8759437a..4f3ac2566322 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8012,7 +8012,7 @@ static int ufshcd_disable_vreg(struct device *dev, struct ufs_vreg *vreg) { int ret = 0; - if (!vreg || !vreg->enabled) + if (!vreg || !vreg->enabled || vreg->always_on) goto out; ret = regulator_disable(vreg->reg); -- cgit 1.4.1 From b3f3d31a528f78d9903253a23a5e5c6bf5280f40 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Mon, 7 Dec 2020 13:49:55 +0800 Subject: scsi: ufs-mediatek: Keep VCC always-on for specific devices For some devices which need extra delay after VCC power down, VCC shall be kept always-on in some MediaTek UFS platforms to ensure the stability of such devices because the extra delay may not be enough in those platforms. Link: https://lore.kernel.org/r/20201207054955.24366-3-stanley.chu@mediatek.com Reviewed-by: Andy Teng Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek.c | 21 +++++++++++++++++++++ drivers/scsi/ufs/ufs-mediatek.h | 1 + 2 files changed, 22 insertions(+) diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 3522458db3bb..80618af7c872 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -70,6 +70,13 @@ static bool ufs_mtk_is_va09_supported(struct ufs_hba *hba) return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL); } +static bool ufs_mtk_is_broken_vcc(struct ufs_hba *hba) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + + return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC); +} + static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable) { u32 tmp; @@ -514,6 +521,9 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba) if (of_property_read_bool(np, "mediatek,ufs-disable-ah8")) host->caps |= UFS_MTK_CAP_DISABLE_AH8; + if (of_property_read_bool(np, "mediatek,ufs-broken-vcc")) + host->caps |= UFS_MTK_CAP_BROKEN_VCC; + dev_info(hba->dev, "caps: 0x%x", host->caps); } @@ -1003,6 +1013,17 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) static void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba) { ufshcd_fixup_dev_quirks(hba, ufs_mtk_dev_fixups); + + if (ufs_mtk_is_broken_vcc(hba) && hba->vreg_info.vcc && + (hba->dev_quirks & UFS_DEVICE_QUIRK_DELAY_AFTER_LPM)) { + hba->vreg_info.vcc->always_on = true; + /* + * VCC will be kept always-on thus we don't + * need any delay during regulator operations + */ + hba->dev_quirks &= ~(UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | + UFS_DEVICE_QUIRK_DELAY_AFTER_LPM); + } } static void ufs_mtk_event_notify(struct ufs_hba *hba, diff --git a/drivers/scsi/ufs/ufs-mediatek.h b/drivers/scsi/ufs/ufs-mediatek.h index 93d35097dfb0..3f0d3bb769e8 100644 --- a/drivers/scsi/ufs/ufs-mediatek.h +++ b/drivers/scsi/ufs/ufs-mediatek.h @@ -81,6 +81,7 @@ enum ufs_mtk_host_caps { UFS_MTK_CAP_BOOST_CRYPT_ENGINE = 1 << 0, UFS_MTK_CAP_VA09_PWR_CTRL = 1 << 1, UFS_MTK_CAP_DISABLE_AH8 = 1 << 2, + UFS_MTK_CAP_BROKEN_VCC = 1 << 3, }; struct ufs_mtk_crypt_cfg { -- cgit 1.4.1 From c763729a10e538d997744317cf4a1c4f25266066 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:17 +0200 Subject: scsi: ufs-pci: Fix restore from S4 for Intel controllers Currently, ufshcd-pci is the only UFS driver with support for suspend-to-disk PM callbacks (i.e. freeze/thaw/restore/poweroff). These callbacks are set by the macro SET_SYSTEM_SLEEP_PM_OPS to the same functions as system suspend/resume. That will work with spm_lvl 5 because spm_lvl 5 will result in a full restore for the ->restore() callback. In the absence of a full restore, the host controller registers will have values set up by the restore kernel (the kernel that boots and loads the restore image) which are not necessarily the same. However it turns out, the only registers that sometimes need restore are the base address registers. This has gone un-noticed because, depending on IOMMU settings, the kernel can end up allocating the same addresses every time. For Intel controllers, an spm_lvl other than 5 can be used, so to support S4 (suspend-to-disk) with spm_lvl other than 5, restore the base address registers. Link: https://lore.kernel.org/r/20201207083120.26732-2-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index df3a564c3e33..360c25f1f061 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -163,6 +163,24 @@ static void ufs_intel_common_exit(struct ufs_hba *hba) intel_ltr_hide(hba->dev); } +static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) +{ + /* + * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base + * address registers must be restored because the restore kernel can + * have used different addresses. + */ + ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr), + REG_UTP_TRANSFER_REQ_LIST_BASE_L); + ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr), + REG_UTP_TRANSFER_REQ_LIST_BASE_H); + ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr), + REG_UTP_TASK_REQ_LIST_BASE_L); + ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), + REG_UTP_TASK_REQ_LIST_BASE_H); + return 0; +} + static int ufs_intel_ehl_init(struct ufs_hba *hba) { hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; @@ -174,6 +192,7 @@ static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .init = ufs_intel_common_init, .exit = ufs_intel_common_exit, .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, }; static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { @@ -181,6 +200,7 @@ static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { .init = ufs_intel_ehl_init, .exit = ufs_intel_common_exit, .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, }; #ifdef CONFIG_PM_SLEEP -- cgit 1.4.1 From af423534d2de86cd0db729a5ac41f056ca8717de Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:18 +0200 Subject: scsi: ufs-pci: Ensure UFS device is in PowerDown mode for suspend-to-disk ->poweroff() The expectation for suspend-to-disk is that devices will be powered-off, so the UFS device should be put in PowerDown mode. If spm_lvl is not 5, then that will not happen. Change the pm callbacks to force spm_lvl 5 for suspend-to-disk poweroff. Link: https://lore.kernel.org/r/20201207083120.26732-3-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 360c25f1f061..5d33c39fa82f 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -227,6 +227,30 @@ static int ufshcd_pci_resume(struct device *dev) { return ufshcd_system_resume(dev_get_drvdata(dev)); } + +/** + * ufshcd_pci_poweroff - suspend-to-disk poweroff function + * @dev: pointer to PCI device handle + * + * Returns 0 if successful + * Returns non-zero otherwise + */ +static int ufshcd_pci_poweroff(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + int spm_lvl = hba->spm_lvl; + int ret; + + /* + * For poweroff we need to set the UFS device to PowerDown mode. + * Force spm_lvl to ensure that. + */ + hba->spm_lvl = 5; + ret = ufshcd_system_suspend(hba); + hba->spm_lvl = spm_lvl; + return ret; +} + #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM @@ -322,8 +346,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } static const struct dev_pm_ops ufshcd_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ufshcd_pci_suspend, - ufshcd_pci_resume) +#ifdef CONFIG_PM_SLEEP + .suspend = ufshcd_pci_suspend, + .resume = ufshcd_pci_resume, + .freeze = ufshcd_pci_suspend, + .thaw = ufshcd_pci_resume, + .poweroff = ufshcd_pci_poweroff, + .restore = ufshcd_pci_resume, +#endif SET_RUNTIME_PM_OPS(ufshcd_pci_runtime_suspend, ufshcd_pci_runtime_resume, ufshcd_pci_runtime_idle) -- cgit 1.4.1 From 044d5bda7117891d6d0d56f2f807b7b11e120abd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:19 +0200 Subject: scsi: ufs-pci: Fix recovery from hibernate exit errors for Intel controllers Intel controllers can end up in an unrecoverable state after a hibernate exit error unless a full reset and restore is done before anything else. Force that to happen. Link: https://lore.kernel.org/r/20201207083120.26732-4-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 5d33c39fa82f..888d8c9ca3a5 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -178,6 +178,23 @@ static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) REG_UTP_TASK_REQ_LIST_BASE_L); ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), REG_UTP_TASK_REQ_LIST_BASE_H); + + if (ufshcd_is_link_hibern8(hba)) { + int ret = ufshcd_uic_hibern8_exit(hba); + + if (!ret) { + ufshcd_set_link_active(hba); + } else { + dev_err(hba->dev, "%s: hibern8 exit failed %d\n", + __func__, ret); + /* + * Force reset and restore. Any other actions can lead + * to an unrecoverable state. + */ + ufshcd_set_link_off(hba); + } + } + return 0; } -- cgit 1.4.1 From dd78bdb6f810bdcb173b42379af558c676c8e0aa Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 7 Dec 2020 10:31:20 +0200 Subject: scsi: ufs-pci: Enable UFSHCD_CAP_RPM_AUTOSUSPEND for Intel controllers Enable runtime PM auto-suspend by default for Intel host controllers. Link: https://lore.kernel.org/r/20201207083120.26732-5-adrian.hunter@intel.com Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 888d8c9ca3a5..fadd566025b8 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -148,6 +148,8 @@ static int ufs_intel_common_init(struct ufs_hba *hba) { struct intel_host *host; + hba->caps |= UFSHCD_CAP_RPM_AUTOSUSPEND; + host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL); if (!host) return -ENOMEM; -- cgit 1.4.1 From fa4d0f1992a96f6d7c988ef423e3127e613f6ac9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:44 -0800 Subject: scsi: block: Fix a race in the runtime power management code With the current implementation the following race can happen: * blk_pre_runtime_suspend() calls blk_freeze_queue_start() and blk_mq_unfreeze_queue(). * blk_queue_enter() calls blk_queue_pm_only() and that function returns true. * blk_queue_enter() calls blk_pm_request_resume() and that function does not call pm_request_resume() because the queue runtime status is RPM_ACTIVE. * blk_pre_runtime_suspend() changes the queue status into RPM_SUSPENDING. Fix this race by changing the queue runtime status into RPM_SUSPENDING before switching q_usage_counter to atomic mode. Link: https://lore.kernel.org/r/20201209052951.16136-2-bvanassche@acm.org Fixes: 986d413b7c15 ("blk-mq: Enable support for runtime power management") Cc: Ming Lei Cc: Rafael J. Wysocki Cc: stable Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Acked-by: Alan Stern Acked-by: Stanley Chu Co-developed-by: Can Guo Signed-off-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-pm.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/blk-pm.c b/block/blk-pm.c index b85234d758f7..17bd020268d4 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -67,6 +67,10 @@ int blk_pre_runtime_suspend(struct request_queue *q) WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE); + spin_lock_irq(&q->queue_lock); + q->rpm_status = RPM_SUSPENDING; + spin_unlock_irq(&q->queue_lock); + /* * Increase the pm_only counter before checking whether any * non-PM blk_queue_enter() calls are in progress to avoid that any @@ -89,15 +93,14 @@ int blk_pre_runtime_suspend(struct request_queue *q) /* Switch q_usage_counter back to per-cpu mode. */ blk_mq_unfreeze_queue(q); - spin_lock_irq(&q->queue_lock); - if (ret < 0) + if (ret < 0) { + spin_lock_irq(&q->queue_lock); + q->rpm_status = RPM_ACTIVE; pm_runtime_mark_last_busy(q->dev); - else - q->rpm_status = RPM_SUSPENDING; - spin_unlock_irq(&q->queue_lock); + spin_unlock_irq(&q->queue_lock); - if (ret) blk_clear_pm_only(q); + } return ret; } -- cgit 1.4.1 From 0854bcdcdec26aecdc92c303816f349ee1fba2bc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:45 -0800 Subject: scsi: block: Introduce BLK_MQ_REQ_PM Introduce the BLK_MQ_REQ_PM flag. This flag makes the request allocation functions set RQF_PM. This is the first step towards removing BLK_MQ_REQ_PREEMPT. Link: https://lore.kernel.org/r/20201209052951.16136-3-bvanassche@acm.org Cc: Alan Stern Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Can Guo Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-core.c | 7 ++++--- block/blk-mq.c | 2 ++ include/linux/blk-mq.h | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 2db8bda43b6e..10696f9fb6ac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -424,11 +424,11 @@ EXPORT_SYMBOL(blk_cleanup_queue); /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer - * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT + * @flags: BLK_MQ_REQ_NOWAIT, BLK_MQ_REQ_PM and/or BLK_MQ_REQ_PREEMPT */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool pm = flags & BLK_MQ_REQ_PREEMPT; + const bool pm = flags & (BLK_MQ_REQ_PM | BLK_MQ_REQ_PREEMPT); while (true) { bool success = false; @@ -630,7 +630,8 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op, struct request *req; WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT)); + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM | + BLK_MQ_REQ_PREEMPT)); req = blk_mq_alloc_request(q, op, flags); if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) diff --git a/block/blk-mq.c b/block/blk-mq.c index 1b25ec2fe9be..b5880a1fb38d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -292,6 +292,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->mq_hctx = data->hctx; rq->rq_flags = 0; rq->cmd_flags = data->cmd_flags; + if (data->flags & BLK_MQ_REQ_PM) + rq->rq_flags |= RQF_PM; if (data->flags & BLK_MQ_REQ_PREEMPT) rq->rq_flags |= RQF_PREEMPT; if (blk_queue_io_stat(data->q)) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b23eeca4d677..c00e856c6fb1 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -444,6 +444,8 @@ enum { BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), + /* set RQF_PM */ + BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), /* set RQF_PREEMPT */ BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; -- cgit 1.4.1 From 96d86e6a80a3ab9aff81d12f9f1f2a0da2917d38 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:46 -0800 Subject: scsi: ide: Do not set the RQF_PREEMPT flag for sense requests RQF_PREEMPT is used for two different purposes in the legacy IDE code: 1. To mark power management requests. 2. To mark requests that should preempt another request. An (old) explanation of that feature is as follows: "The IDE driver in the Linux kernel normally uses a series of busywait delays during its initialization. When the driver executes these busywaits, the kernel does nothing for the duration of the wait. The time spent in these waits could be used for other initialization activities, if they could be run concurrently with these waits. More specifically, busywait-style delays such as udelay() in module init functions inhibit kernel preemption because the Big Kernel Lock is held, while yielding APIs such as schedule_timeout() allow preemption. This is true because the kernel handles the BKL specially and releases and reacquires it across reschedules allowed by the current thread. This IDE-preempt specification requires that the driver eliminate these busywaits and replace them with a mechanism that allows other work to proceed while the IDE driver is initializing." Since I haven't found an implementation of (2), do not set the PREEMPT flag for sense requests. This patch causes sense requests to be postponed while a drive is suspended instead of being submitted to ide_queue_rq(). If it would ever be necessary to restore the IDE PREEMPT functionality, that can be done by introducing a new flag in struct ide_request. Link: https://lore.kernel.org/r/20201209052951.16136-4-bvanassche@acm.org Cc: David S. Miller Cc: Alan Stern Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ide/ide-atapi.c | 1 - drivers/ide/ide-io.c | 5 ----- 2 files changed, 6 deletions(-) diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2162bc80f09e..013ad33fbbc8 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -223,7 +223,6 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd_flags = REQ_OP_DRV_IN; ide_req(sense_rq)->type = ATA_PRIV_SENSE; - sense_rq->rq_flags |= RQF_PREEMPT; req->cmd[0] = GPCMD_REQUEST_SENSE; req->cmd[4] = cmd_len; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 1a53c7a75224..c210ea3bd02f 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -515,11 +515,6 @@ repeat: * above to return us whatever is in the queue. Since we call * ide_do_request() ourselves, we end up taking requests while * the queue is blocked... - * - * We let requests forced at head of queue with ide-preempt - * though. I hope that doesn't happen too much, hopefully not - * unless the subdriver triggers such a thing in its own PM - * state machine. */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && ata_pm_request(rq) == 0 && -- cgit 1.4.1 From 5ae65383fc7633e0247c31b0c8bf0e6ea63b95a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:47 -0800 Subject: scsi: ide: Mark power management requests with RQF_PM instead of RQF_PREEMPT This is another step that prepares for the removal of RQF_PREEMPT. Link: https://lore.kernel.org/r/20201209052951.16136-5-bvanassche@acm.org Cc: David S. Miller Cc: Alan Stern Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ide/ide-io.c | 2 +- drivers/ide/ide-pm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index c210ea3bd02f..4867b67b60d6 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -518,7 +518,7 @@ repeat: */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && ata_pm_request(rq) == 0 && - (rq->rq_flags & RQF_PREEMPT) == 0) { + (rq->rq_flags & RQF_PM) == 0) { /* there should be no pending command at this point */ ide_unlock_port(hwif); goto plug_device; diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 192e6c65d34e..82ab308f1aaf 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -77,7 +77,7 @@ int generic_ide_resume(struct device *dev) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT); + rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PM); ide_req(rq)->type = ATA_PRIV_PM_RESUME; ide_req(rq)->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; -- cgit 1.4.1 From cfefd9f8240a7b9fdd96fcd54cb029870b6d8d88 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:48 -0800 Subject: scsi: scsi_transport_spi: Set RQF_PM for domain validation commands Disable runtime power management during domain validation. Since a later patch removes RQF_PREEMPT, set RQF_PM for domain validation commands such that these are executed in the quiesced SCSI device state. Link: https://lore.kernel.org/r/20201209052951.16136-6-bvanassche@acm.org Cc: Alan Stern Cc: James Bottomley Cc: Woody Suwalski Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Stan Johnson Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_spi.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index f3d5b1bbd5aa..c37dd15d16d2 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -117,12 +117,16 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd, sshdr = &sshdr_tmp; for(i = 0; i < DV_RETRIES; i++) { + /* + * The purpose of the RQF_PM flag below is to bypass the + * SDEV_QUIESCE state. + */ result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense, sshdr, DV_TIMEOUT, /* retries */ 1, REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER, - 0, NULL); + RQF_PM, NULL); if (driver_byte(result) != DRIVER_SENSE || sshdr->sense_key != UNIT_ATTENTION) break; @@ -1005,23 +1009,26 @@ spi_dv_device(struct scsi_device *sdev) */ lock_system_sleep(); + if (scsi_autopm_get_device(sdev)) + goto unlock_system_sleep; + if (unlikely(spi_dv_in_progress(starget))) - goto unlock; + goto put_autopm; if (unlikely(scsi_device_get(sdev))) - goto unlock; + goto put_autopm; spi_dv_in_progress(starget) = 1; buffer = kzalloc(len, GFP_KERNEL); if (unlikely(!buffer)) - goto out_put; + goto put_sdev; /* We need to verify that the actual device will quiesce; the * later target quiesce is just a nice to have */ if (unlikely(scsi_device_quiesce(sdev))) - goto out_free; + goto free_buffer; scsi_target_quiesce(starget); @@ -1041,12 +1048,16 @@ spi_dv_device(struct scsi_device *sdev) spi_initial_dv(starget) = 1; - out_free: +free_buffer: kfree(buffer); - out_put: + +put_sdev: spi_dv_in_progress(starget) = 0; scsi_device_put(sdev); -unlock: +put_autopm: + scsi_autopm_put_device(sdev); + +unlock_system_sleep: unlock_system_sleep(); } EXPORT_SYMBOL(spi_dv_device); -- cgit 1.4.1 From e6044f714b256259df9611ff49af433e5411c5c8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:49 -0800 Subject: scsi: core: Only process PM requests if rpm_status != RPM_ACTIVE Instead of submitting all SCSI commands submitted with scsi_execute() to a SCSI device if rpm_status != RPM_ACTIVE, only submit RQF_PM (power management requests) if rpm_status != RPM_ACTIVE. This patch makes the SCSI core handle the runtime power management status (rpm_status) as it should be handled. Link: https://lore.kernel.org/r/20201209052951.16136-7-bvanassche@acm.org Cc: Can Guo Cc: Stanley Chu Cc: Alan Stern Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Martin Kepplinger Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index b7ac14571415..91bc39a4c3c3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -249,7 +249,8 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req = blk_get_request(sdev->request_queue, data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, + rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0); if (IS_ERR(req)) return ret; rq = scsi_req(req); @@ -1206,6 +1207,8 @@ static blk_status_t scsi_device_state_check(struct scsi_device *sdev, struct request *req) { switch (sdev->sdev_state) { + case SDEV_CREATED: + return BLK_STS_OK; case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: /* @@ -1232,18 +1235,18 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req) return BLK_STS_RESOURCE; case SDEV_QUIESCE: /* - * If the devices is blocked we defer normal commands. + * If the device is blocked we only accept power management + * commands. */ - if (req && !(req->rq_flags & RQF_PREEMPT)) + if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM))) return BLK_STS_RESOURCE; return BLK_STS_OK; default: /* * For any other not fully online state we only allow - * special commands. In particular any user initiated - * command is not allowed. + * power management commands. */ - if (req && !(req->rq_flags & RQF_PREEMPT)) + if (req && !(req->rq_flags & RQF_PM)) return BLK_STS_IOERR; return BLK_STS_OK; } @@ -2517,15 +2520,13 @@ void sdev_evt_send_simple(struct scsi_device *sdev, EXPORT_SYMBOL_GPL(sdev_evt_send_simple); /** - * scsi_device_quiesce - Block user issued commands. + * scsi_device_quiesce - Block all commands except power management. * @sdev: scsi device to quiesce. * * This works by trying to transition to the SDEV_QUIESCE state * (which must be a legal transition). When the device is in this - * state, only special requests will be accepted, all others will - * be deferred. Since special requests may also be requeued requests, - * a successful return doesn't guarantee the device will be - * totally quiescent. + * state, only power management requests will be accepted, all others will + * be deferred. * * Must be called with user context, may sleep. * @@ -2587,12 +2588,12 @@ void scsi_device_resume(struct scsi_device *sdev) * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); + if (sdev->sdev_state == SDEV_QUIESCE) + scsi_device_set_state(sdev, SDEV_RUNNING); if (sdev->quiesced_by) { sdev->quiesced_by = NULL; blk_clear_pm_only(sdev->request_queue); } - if (sdev->sdev_state == SDEV_QUIESCE) - scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); } EXPORT_SYMBOL(scsi_device_resume); -- cgit 1.4.1 From a4d34da715e3cb7e0741fe603dcd511bed067e00 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 8 Dec 2020 21:29:50 -0800 Subject: scsi: block: Remove RQF_PREEMPT and BLK_MQ_REQ_PREEMPT Remove flag RQF_PREEMPT and BLK_MQ_REQ_PREEMPT since these are no longer used by any kernel code. Link: https://lore.kernel.org/r/20201209052951.16136-8-bvanassche@acm.org Cc: Can Guo Cc: Stanley Chu Cc: Alan Stern Cc: Ming Lei Cc: Rafael J. Wysocki Cc: Martin Kepplinger Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Jens Axboe Reviewed-by: Can Guo Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-core.c | 7 +++---- block/blk-mq-debugfs.c | 1 - block/blk-mq.c | 2 -- include/linux/blk-mq.h | 2 -- include/linux/blkdev.h | 6 +----- 5 files changed, 4 insertions(+), 14 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 10696f9fb6ac..a00bce9f46d8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -424,11 +424,11 @@ EXPORT_SYMBOL(blk_cleanup_queue); /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer - * @flags: BLK_MQ_REQ_NOWAIT, BLK_MQ_REQ_PM and/or BLK_MQ_REQ_PREEMPT + * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM */ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) { - const bool pm = flags & (BLK_MQ_REQ_PM | BLK_MQ_REQ_PREEMPT); + const bool pm = flags & BLK_MQ_REQ_PM; while (true) { bool success = false; @@ -630,8 +630,7 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op, struct request *req; WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM | - BLK_MQ_REQ_PREEMPT)); + WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); req = blk_mq_alloc_request(q, op, flags); if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3094542e12ae..9336a6f8d6ef 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -297,7 +297,6 @@ static const char *const rqf_name[] = { RQF_NAME(MIXED_MERGE), RQF_NAME(MQ_INFLIGHT), RQF_NAME(DONTPREP), - RQF_NAME(PREEMPT), RQF_NAME(FAILED), RQF_NAME(QUIET), RQF_NAME(ELVPRIV), diff --git a/block/blk-mq.c b/block/blk-mq.c index b5880a1fb38d..d50504888b68 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -294,8 +294,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->cmd_flags = data->cmd_flags; if (data->flags & BLK_MQ_REQ_PM) rq->rq_flags |= RQF_PM; - if (data->flags & BLK_MQ_REQ_PREEMPT) - rq->rq_flags |= RQF_PREEMPT; if (blk_queue_io_stat(data->q)) rq->rq_flags |= RQF_IO_STAT; INIT_LIST_HEAD(&rq->queuelist); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c00e856c6fb1..88af1df94308 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -446,8 +446,6 @@ enum { BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), /* set RQF_PM */ BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), - /* set RQF_PREEMPT */ - BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 639cae2c158b..7d4b746f7e6a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,9 +79,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_MQ_INFLIGHT ((__force req_flags_t)(1 << 6)) /* don't call prep for this one */ #define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) -/* set for "ide_preempt" requests and also for requests for which the SCSI - "quiesce" state must be ignored. */ -#define RQF_PREEMPT ((__force req_flags_t)(1 << 8)) /* vaguely specified driver internal error. Ignored by the block layer */ #define RQF_FAILED ((__force req_flags_t)(1 << 10)) /* don't warn about errors */ @@ -430,8 +427,7 @@ struct request_queue { unsigned long queue_flags; /* * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM and RQF_PREEMPT requests are - * processed. + * counter is above zero then only RQF_PM requests are processed. */ atomic_t pm_only; -- cgit 1.4.1 From 52abca64fd9410ea6c9a3a74eab25663b403d7da Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 8 Dec 2020 21:29:51 -0800 Subject: scsi: block: Do not accept any requests while suspended blk_queue_enter() accepts BLK_MQ_REQ_PM requests independent of the runtime power management state. Now that SCSI domain validation no longer depends on this behavior, modify the behavior of blk_queue_enter() as follows: - Do not accept any requests while suspended. - Only process power management requests while suspending or resuming. Submitting BLK_MQ_REQ_PM requests to a device that is runtime suspended causes runtime-suspended devices not to resume as they should. The request which should cause a runtime resume instead gets issued directly, without resuming the device first. Of course the device can't handle it properly, the I/O fails, and the device remains suspended. The problem is fixed by checking that the queue's runtime-PM status isn't RPM_SUSPENDED before allowing a request to be issued, and queuing a runtime-resume request if it is. In particular, the inline blk_pm_request_resume() routine is renamed blk_pm_resume_queue() and the code is unified by merging the surrounding checks into the routine. If the queue isn't set up for runtime PM, or there currently is no restriction on allowed requests, the request is allowed. Likewise if the BLK_MQ_REQ_PM flag is set and the status isn't RPM_SUSPENDED. Otherwise a runtime resume is queued and the request is blocked until conditions are more suitable. [ bvanassche: modified commit message and removed Cc: stable because without the previous patches from this series this patch would break parallel SCSI domain validation + introduced queue_rpm_status() ] Link: https://lore.kernel.org/r/20201209052951.16136-9-bvanassche@acm.org Cc: Jens Axboe Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Can Guo Cc: Stanley Chu Cc: Ming Lei Cc: Rafael J. Wysocki Reported-and-tested-by: Martin Kepplinger Reviewed-by: Hannes Reinecke Reviewed-by: Can Guo Signed-off-by: Alan Stern Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- block/blk-core.c | 7 ++++--- block/blk-pm.h | 14 +++++++++----- include/linux/blkdev.h | 12 ++++++++++++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index a00bce9f46d8..2d53e2ff48ff 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -440,7 +441,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) * responsible for ensuring that that counter is * globally visible before the queue is unfrozen. */ - if (pm || !blk_queue_pm_only(q)) { + if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) || + !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -465,8 +467,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && - (pm || (blk_pm_request_resume(q), - !blk_queue_pm_only(q)))) || + blk_pm_resume_queue(pm, q)) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; diff --git a/block/blk-pm.h b/block/blk-pm.h index ea5507d23e75..a2283cc9f716 100644 --- a/block/blk-pm.h +++ b/block/blk-pm.h @@ -6,11 +6,14 @@ #include #ifdef CONFIG_PM -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { - if (q->dev && (q->rpm_status == RPM_SUSPENDED || - q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); + if (!q->dev || !blk_queue_pm_only(q)) + return 1; /* Nothing to do */ + if (pm && q->rpm_status != RPM_SUSPENDED) + return 1; /* Request allowed */ + pm_request_resume(q->dev); + return 0; } static inline void blk_pm_mark_last_busy(struct request *rq) @@ -44,8 +47,9 @@ static inline void blk_pm_put_request(struct request *rq) --rq->q->nr_pending; } #else -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { + return 1; } static inline void blk_pm_mark_last_busy(struct request *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d4b746f7e6a..2b6fc3fb3a99 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -692,6 +692,18 @@ static inline bool queue_is_mq(struct request_queue *q) return q->mq_ops; } +#ifdef CONFIG_PM +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return q->rpm_status; +} +#else +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return RPM_ACTIVE; +} +#endif + static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { -- cgit 1.4.1 From 8b3c8035297e71abb9e6d0f50ceab50d33c0d64b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 9 Dec 2020 17:03:11 +0300 Subject: scsi: mpt3sas: Signedness bug in _base_get_diag_triggers() The "trigger_flags" variable needs to be signed for the error checking to work. Link: https://lore.kernel.org/r/X9DZH37bYPHwSQRP@mwanda Fixes: aec93e8e2385 ("scsi: mpt3sas: Add persistent trigger pages support") Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b129f3734ed0..26537d503a8b 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -5027,7 +5027,7 @@ _base_check_for_trigger_pages_support(struct MPT3SAS_ADAPTER *ioc) static void _base_get_diag_triggers(struct MPT3SAS_ADAPTER *ioc) { - u16 trigger_flags; + int trigger_flags; /* * Default setting of master trigger. -- cgit 1.4.1 From 5213dc7940e0aa4c094413e015790c4a310ef36c Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Wed, 9 Dec 2020 14:31:44 +0800 Subject: scsi: ufs-mediatek: Use correct path to fix compile error When the kernel is compiled with allmodconfig, the following error is reported: In file included from drivers/scsi/ufs/ufs-mediatek-trace.h:36:0, from drivers/scsi/ufs/ufs-mediatek.c:28: ./include/trace/define_trace.h:95:42: fatal error: ./ufs-mediatek-trace.h: No such file or directory #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) The comment in include/trace/define_trace.h specifies that: TRACE_INCLUDE_PATH: Note, the path is relative to define_trace.h, not the file including it. Full path names for out of tree modules must be used. So without "CFLAGS_ufs-mediatek.o := -I$(src)", the current directory "." is "include/trace/", the relative path of ufs-mediatek-trace.h is "../../drivers/scsi/ufs/". Link: https://lore.kernel.org/r/20201209063144.1840-2-thunder.leizhen@huawei.com Fixes: ca1bb061d644 ("scsi: ufs-mediatek: Introduce event_notify implementation") Reviewed-by: Stanley Chu Signed-off-by: Zhen Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs-mediatek-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-mediatek-trace.h b/drivers/scsi/ufs/ufs-mediatek-trace.h index fd6f84c1b4e2..895e82ea6ece 100644 --- a/drivers/scsi/ufs/ufs-mediatek-trace.h +++ b/drivers/scsi/ufs/ufs-mediatek-trace.h @@ -31,6 +31,6 @@ TRACE_EVENT(ufs_mtk_event, #undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_PATH ../../drivers/scsi/ufs/ #define TRACE_INCLUDE_FILE ufs-mediatek-trace #include -- cgit 1.4.1 From bd14bf0e4a084514aa62d24d2109e0f09a93822f Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 8 Dec 2020 21:56:34 +0800 Subject: scsi: ufs: Re-enable WriteBooster after device reset UFS 3.1 specification mentions that the WriteBooster flags listed below will be set to their default values, i.e. disabled, after power cycle or any type of reset event. Thus we need to reset the flag variables kept in struct hba to align with the device status and ensure that WriteBooster-related functions are configured properly after device reset. Without this fix, WriteBooster will not be enabled successfully after by ufshcd_wb_ctrl() after device reset because hba->wb_enabled remains true. Flags required to be reset to default values: - fWriteBoosterEn: hba->wb_enabled - fWriteBoosterBufferFlushEn: hba->wb_buf_flush_enabled - fWriteBoosterBufferFlushDuringHibernate: No variable mapped Link: https://lore.kernel.org/r/20201208135635.15326-2-stanley.chu@mediatek.com Fixes: 3d17b9b5ab11 ("scsi: ufs: Add write booster feature support") Reviewed-by: Bean Huo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 08c8a591e6b0..36d367eb8139 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1221,8 +1221,13 @@ static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) if (hba->vops && hba->vops->device_reset) { int err = hba->vops->device_reset(hba); - if (!err) + if (!err) { ufshcd_set_ufs_dev_active(hba); + if (ufshcd_is_wb_allowed(hba)) { + hba->wb_enabled = false; + hba->wb_buf_flush_enabled = false; + } + } if (err != -EOPNOTSUPP) ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); } -- cgit 1.4.1 From 31a5d9cafff163473abf9496318b6a53022d48f7 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 8 Dec 2020 21:56:35 +0800 Subject: scsi: ufs: Un-inline ufshcd_vops_device_reset function More and more statements are being added to ufshcd_vops_device_reset() and this function is being called from multiple locations in the driver. Un-inline the function to allow the compiler to make better decisions. Link: https://lore.kernel.org/r/20201208135635.15326-3-stanley.chu@mediatek.com Reviewed-by: Bean Huo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 27 ++++++++++++++++++++++----- drivers/scsi/ufs/ufshcd.h | 19 +++++-------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 4f3ac2566322..e221add25a7e 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -581,6 +581,23 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba) hba->pwr_info.hs_rate); } +static void ufshcd_device_reset(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_vops_device_reset(hba); + + if (!err) { + ufshcd_set_ufs_dev_active(hba); + if (ufshcd_is_wb_allowed(hba)) { + hba->wb_enabled = false; + hba->wb_buf_flush_enabled = false; + } + } + if (err != -EOPNOTSUPP) + ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); +} + void ufshcd_delay_us(unsigned long us, unsigned long tolerance) { if (!us) @@ -3933,7 +3950,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba) spin_unlock_irqrestore(hba->host->host_lock, flags); /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ret = ufshcd_host_reset_and_restore(hba); @@ -6935,7 +6952,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba) do { /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); err = ufshcd_host_reset_and_restore(hba); } while (err && --retries); @@ -8708,7 +8725,7 @@ set_link_active: * further below. */ if (ufshcd_is_ufs_dev_deepsleep(hba)) { - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); WARN_ON(!ufshcd_is_link_off(hba)); } if (ufshcd_is_link_hibern8(hba) && !ufshcd_uic_hibern8_exit(hba)) @@ -8718,7 +8735,7 @@ set_link_active: set_dev_active: /* Can also get here needing to exit DeepSleep */ if (ufshcd_is_ufs_dev_deepsleep(hba)) { - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ufshcd_host_reset_and_restore(hba); } if (!ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE)) @@ -9317,7 +9334,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) } /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ufshcd_init_crypto(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 36d367eb8139..9bb5f0ed4124 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1216,21 +1216,12 @@ static inline void ufshcd_vops_dbg_register_dump(struct ufs_hba *hba) hba->vops->dbg_register_dump(hba); } -static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) +static inline int ufshcd_vops_device_reset(struct ufs_hba *hba) { - if (hba->vops && hba->vops->device_reset) { - int err = hba->vops->device_reset(hba); - - if (!err) { - ufshcd_set_ufs_dev_active(hba); - if (ufshcd_is_wb_allowed(hba)) { - hba->wb_enabled = false; - hba->wb_buf_flush_enabled = false; - } - } - if (err != -EOPNOTSUPP) - ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); - } + if (hba->vops && hba->vops->device_reset) + return hba->vops->device_reset(hba); + + return -EOPNOTSUPP; } static inline void ufshcd_vops_config_scaling_param(struct ufs_hba *hba, -- cgit 1.4.1 From cb5253198f10a4cd79b7523c581e6173c7d49ddb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 8 Dec 2020 14:05:05 -0800 Subject: scsi: cxgb4i: Fix TLS dependency SCSI_CXGB4_ISCSI selects CHELSIO_T4. The latter depends on TLS || TLS=n, so since 'select' does not check dependencies of the selected symbol, SCSI_CXGB4_ISCSI should also depend on TLS || TLS=n. This prevents the following kconfig warning and restricts SCSI_CXGB4_ISCSI to 'm' whenever TLS=m. WARNING: unmet direct dependencies detected for CHELSIO_T4 Depends on [m]: NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_CHELSIO [=y] && PCI [=y] && (IPV6 [=y] || IPV6 [=y]=n) && (TLS [=m] || TLS [=m]=n) Selected by [y]: - SCSI_CXGB4_ISCSI [=y] && SCSI_LOWLEVEL [=y] && SCSI [=y] && PCI [=y] && INET [=y] && (IPV6 [=y] || IPV6 [=y]=n) && ETHERNET [=y] Link: https://lore.kernel.org/r/20201208220505.24488-1-rdunlap@infradead.org Fixes: 7b36b6e03b0d ("[SCSI] cxgb4i v5: iscsi driver") Cc: Karen Xie Cc: linux-scsi@vger.kernel.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Signed-off-by: Randy Dunlap Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/cxgb4i/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig index b206e266b4e7..8b0deece9758 100644 --- a/drivers/scsi/cxgbi/cxgb4i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig @@ -4,6 +4,7 @@ config SCSI_CXGB4_ISCSI depends on PCI && INET && (IPV6 || IPV6=n) depends on THERMAL || !THERMAL depends on ETHERNET + depends on TLS || TLS=n select NET_VENDOR_CHELSIO select CHELSIO_T4 select CHELSIO_LIB -- cgit 1.4.1 From 43ffe817bfe3871ffbaa1e98952a2a01b140e71e Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 10 Dec 2020 15:10:56 +0530 Subject: arm64: dts: bitmain: Use generic "ngpios" rather than "snps,nr-gpios" This is to remove similar errors as below: OF: /.../gpio-port@0: could not find phandle Commit 7569486d79ae ("gpio: dwapb: Add ngpios DT-property support") explained the reason of above errors well and added the generic "ngpios" property, let's use it. Signed-off-by: Jisheng Zhang Signed-off-by: Manivannan Sadhasivam Reviewed-by: Linus Walleij Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20201210094056.54553-1-manivannan.sadhasivam@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/bitmain/bm1880.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/bitmain/bm1880.dtsi b/arch/arm64/boot/dts/bitmain/bm1880.dtsi index fa6e6905f588..53a9b76057aa 100644 --- a/arch/arm64/boot/dts/bitmain/bm1880.dtsi +++ b/arch/arm64/boot/dts/bitmain/bm1880.dtsi @@ -127,7 +127,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -145,7 +145,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -163,7 +163,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; -- cgit 1.4.1 From 1b04fa9900263b4e217ca2509fd778b32c2b4eb2 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 9 Dec 2020 21:27:31 +0100 Subject: rcu-tasks: Move RCU-tasks initialization to before early_initcall() PowerPC testing encountered boot failures due to RCU Tasks not being fully initialized until core_initcall() time. This commit therefore initializes RCU Tasks (along with Rude RCU and RCU Tasks Trace) just before early_initcall() time, thus allowing waiting on RCU Tasks grace periods from early_initcall() handlers. Link: https://lore.kernel.org/rcu/87eekfh80a.fsf@dja-thinkpad.axtens.net/ Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall") Tested-by: Daniel Axtens Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++++ init/main.c | 1 + kernel/rcu/tasks.h | 25 +++++++++++++++++++++---- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6cdd0152c253..5c119d6cecf1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -86,6 +86,12 @@ void rcu_sched_clock_irq(int user); void rcu_report_dead(unsigned int cpu); void rcutree_migrate_callbacks(int cpu); +#ifdef CONFIG_TASKS_RCU_GENERIC +void rcu_init_tasks_generic(void); +#else +static inline void rcu_init_tasks_generic(void) { } +#endif + #ifdef CONFIG_RCU_STALL_COMMON void rcu_sysrq_start(void); void rcu_sysrq_end(void); diff --git a/init/main.c b/init/main.c index 32b2a8affafd..9d964511fe0c 100644 --- a/init/main.c +++ b/init/main.c @@ -1512,6 +1512,7 @@ static noinline void __init kernel_init_freeable(void) init_mm_internals(); + rcu_init_tasks_generic(); do_pre_smp_initcalls(); lockup_detector_init(); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index d5d9f2d03e8a..73bbe792fe1e 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -241,7 +241,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) } } -/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */ +/* Spawn RCU-tasks grace-period kthread. */ static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp) { struct task_struct *t; @@ -569,7 +569,6 @@ static int __init rcu_spawn_tasks_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks); return 0; } -core_initcall(rcu_spawn_tasks_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_classic_gp_kthread(void) @@ -697,7 +696,6 @@ static int __init rcu_spawn_tasks_rude_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude); return 0; } -core_initcall(rcu_spawn_tasks_rude_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_rude_gp_kthread(void) @@ -975,6 +973,11 @@ static void rcu_tasks_trace_pregp_step(void) static void rcu_tasks_trace_pertask(struct task_struct *t, struct list_head *hop) { + // During early boot when there is only the one boot CPU, there + // is no idle task for the other CPUs. Just return. + if (unlikely(t == NULL)) + return; + WRITE_ONCE(t->trc_reader_special.b.need_qs, false); WRITE_ONCE(t->trc_reader_checked, false); t->trc_ipi_to_cpu = -1; @@ -1200,7 +1203,6 @@ static int __init rcu_spawn_tasks_trace_kthread(void) rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace); return 0; } -core_initcall(rcu_spawn_tasks_trace_kthread); #ifndef CONFIG_TINY_RCU static void show_rcu_tasks_trace_gp_kthread(void) @@ -1229,6 +1231,21 @@ void show_rcu_tasks_gp_kthreads(void) } #endif /* #ifndef CONFIG_TINY_RCU */ +void __init rcu_init_tasks_generic(void) +{ +#ifdef CONFIG_TASKS_RCU + rcu_spawn_tasks_kthread(); +#endif + +#ifdef CONFIG_TASKS_RUDE_RCU + rcu_spawn_tasks_rude_kthread(); +#endif + +#ifdef CONFIG_TASKS_TRACE_RCU + rcu_spawn_tasks_trace_kthread(); +#endif +} + #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ static inline void rcu_tasks_bootup_oddness(void) {} void show_rcu_tasks_gp_kthreads(void) {} -- cgit 1.4.1 From 57f04815fd95bb8c46f6ec5c9d25430bb52d419f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Dec 2020 09:40:28 -0800 Subject: drm/msm: Fix WARN_ON() splat in _free_object() [ 192.062000] ------------[ cut here ]------------ [ 192.062498] WARNING: CPU: 3 PID: 2039 at drivers/gpu/drm/msm/msm_gem.c:381 put_iova_vmas+0x94/0xa0 [msm] [ 192.062870] Modules linked in: snd_hrtimer snd_seq snd_seq_device rfcomm algif_hash algif_skcipher af_alg bnep xt_CHECKSUM nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter xt_tcpudp nft_compat cpufreq_powersave cpufreq_conservative q6asm_dai q6routing q6afe_dai q6adm bridge q6afe q6asm q6dsp_common q6core stp llc nf_tables libcrc32c nfnetlink snd_soc_wsa881x regmap_sdw soundwire_qcom gpio_wcd934x snd_soc_wcd934x wcd934x regmap_slimbus venus_enc venus_dec apr videobuf2_dma_sg qrtr_smd uvcvideo videobuf2_vmalloc videobuf2_memops ath10k_snoc ath10k_core hci_uart btqca btbcm mac80211 bluetooth snd_soc_sdm845 ath snd_soc_rt5663 snd_soc_qcom_common snd_soc_rl6231 soundwire_bus ecdh_generic ecc qcom_spmi_adc5 venus_core qcom_pon qcom_spmi_temp_alarm qcom_vadc_common v4l2_mem2mem videobuf2_v4l2 cfg80211 videobuf2_common hid_multitouch reset_qcom_pdc qcrypto qcom_rng rfkill qcom_q6v5_mss libarc4 libdes qrtr ns qcom_wdt socinfo slim_qcom_ngd_ctrl [ 192.065739] pdr_interface qcom_q6v5_pas slimbus qcom_pil_info qcom_q6v5 qcom_sysmon qcom_common qcom_glink_smem qmi_helpers rmtfs_mem tcp_bbr sch_fq fuse ip_tables x_tables ipv6 crc_ccitt ti_sn65dsi86 i2c_hid msm mdt_loader llcc_qcom rtc_pm8xxx ocmem drm_kms_helper crct10dif_ce phy_qcom_qusb2 i2c_qcom_geni panel_simple drm pwm_bl [ 192.066066] CPU: 3 PID: 2039 Comm: gnome-shell Tainted: G W 5.10.0-rc7-next-20201208 #1 [ 192.066068] Hardware name: LENOVO 81JL/LNVNB161216, BIOS 9UCN33WW(V2.06) 06/ 4/2019 [ 192.066072] pstate: 40400005 (nZcv daif +PAN -UAO -TCO BTYPE=--) [ 192.066099] pc : put_iova_vmas+0x94/0xa0 [msm] [ 192.066262] lr : put_iova_vmas+0x1c/0xa0 [msm] [ 192.066403] sp : ffff800019efbbb0 [ 192.066405] x29: ffff800019efbbb0 x28: ffff800019efbd88 [ 192.066411] x27: 0000000000000000 x26: ffff109582efa400 [ 192.066417] x25: 0000000000000009 x24: 000000000000012b [ 192.066422] x23: ffff109582efa438 x22: ffff109582efa450 [ 192.066427] x21: ffff109582efa528 x20: ffff1095cbd4f200 [ 192.066432] x19: ffff1095cbd4f200 x18: 0000000000000000 [ 192.066438] x17: 0000000000000000 x16: ffffc26c200ca750 [ 192.066727] x15: 0000000000000000 x14: 0000000000000000 [ 192.066741] x13: ffff1096fb8c9100 x12: 0000000000000002 [ 192.066754] x11: ffffffffffffffff x10: 0000000000000002 [ 192.067046] x9 : 0000000000000001 x8 : 0000000000000a36 [ 192.067060] x7 : ffff4e2ad9f11000 x6 : ffffc26c216d4000 [ 192.067212] x5 : ffffc26c2022661c x4 : ffff1095c2b98000 [ 192.067367] x3 : ffff1095cbd4f300 x2 : 0000000000000000 [ 192.067380] x1 : ffff1095c2b98000 x0 : 0000000000000000 [ 192.067667] Call trace: [ 192.067734] put_iova_vmas+0x94/0xa0 [msm] [ 192.068078] msm_gem_free_object+0xb4/0x110 [msm] [ 192.068399] drm_gem_object_free+0x1c/0x30 [drm] [ 192.068717] drm_gem_object_handle_put_unlocked+0xf0/0xf8 [drm] [ 192.069032] drm_gem_object_release_handle+0x6c/0x88 [drm] [ 192.069349] drm_gem_handle_delete+0x68/0xc0 [drm] [ 192.069666] drm_gem_close_ioctl+0x30/0x48 [drm] [ 192.069984] drm_ioctl_kernel+0xc0/0x110 [drm] [ 192.070303] drm_ioctl+0x210/0x440 [drm] [ 192.070588] __arm64_sys_ioctl+0xa8/0xf0 [ 192.070599] el0_svc_common.constprop.0+0x74/0x190 [ 192.070608] do_el0_svc+0x24/0x90 [ 192.070618] el0_svc+0x14/0x20 [ 192.070903] el0_sync_handler+0xb0/0xb8 [ 192.070911] el0_sync+0x174/0x180 [ 192.070918] ---[ end trace bee6b12a899001a3 ]--- [ 192.072140] ------------[ cut here ]------------ Fixes: 9b73bde39cf2 ("drm/msm: Fix use-after-free in msm_gem with carveout") Signed-off-by: Rob Clark Acked-by: Iskren Chernev --- drivers/gpu/drm/msm/msm_gem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 68a6c7eacc0a..a21be5b910ff 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -990,6 +990,8 @@ void msm_gem_free_object(struct drm_gem_object *obj) if (msm_obj->pages) kvfree(msm_obj->pages); + put_iova_vmas(obj); + /* dma_buf_detach() grabs resv lock, so we need to unlock * prior to drm_prime_gem_destroy */ @@ -999,11 +1001,10 @@ void msm_gem_free_object(struct drm_gem_object *obj) } else { msm_gem_vunmap(obj); put_pages(obj); + put_iova_vmas(obj); msm_gem_unlock(obj); } - put_iova_vmas(obj); - drm_gem_object_release(obj); kfree(msm_obj); -- cgit 1.4.1 From 161b838e25c6f83495e27e3f546b893622d442bf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 23:40:15 +0000 Subject: netfilter: nftables: fix incorrect increment of loop counter The intention of the err_expr cleanup path is to iterate over the allocated expr_array objects and free them, starting from i - 1 and working down to the start of the array. Currently the loop counter is being incremented instead of decremented and also the index i is being used instead of k, repeatedly destroying the same expr_array element. Fix this by decrementing k and using k as the index into expr_array. Addresses-Coverity: ("Infinite loop") Fixes: 8cfd9b0f8515 ("netfilter: nftables: generalize set expressions support") Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8d5aa0ac45f4..4186b1e52d58 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5254,8 +5254,8 @@ static int nft_set_elem_expr_clone(const struct nft_ctx *ctx, return 0; err_expr: - for (k = i - 1; k >= 0; k++) - nft_expr_destroy(ctx, expr_array[i]); + for (k = i - 1; k >= 0; k--) + nft_expr_destroy(ctx, expr_array[k]); return -ENOMEM; } -- cgit 1.4.1 From df9716ec9ade3d2e190a2aac199557d30a3a8416 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 15 Dec 2020 13:20:24 +0000 Subject: regulator: pf8x00: Use specific compatible strings for devices The pf8x00 driver supports three devices, the DT compatible strings and I2C IDs should enumerate these specifically rather than using a wildcard so that we don't collide with anything incompatible in the same ID range in the future and so that we can handle any software visible differences between the variants we find. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20201215132024.13356-1-broonie@kernel.org Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml | 6 ++++-- drivers/regulator/pf8x00-regulator.c | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml index a6c259ce9785..956156fe52a3 100644 --- a/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml @@ -19,7 +19,9 @@ description: | properties: compatible: enum: - - nxp,pf8x00 + - nxp,pf8100 + - nxp,pf8121a + - nxp,pf8200 reg: maxItems: 1 @@ -118,7 +120,7 @@ examples: #size-cells = <0>; pmic@8 { - compatible = "nxp,pf8x00"; + compatible = "nxp,pf8100"; reg = <0x08>; regulators { diff --git a/drivers/regulator/pf8x00-regulator.c b/drivers/regulator/pf8x00-regulator.c index 308c27fa6ea8..af9918cd27aa 100644 --- a/drivers/regulator/pf8x00-regulator.c +++ b/drivers/regulator/pf8x00-regulator.c @@ -469,13 +469,17 @@ static int pf8x00_i2c_probe(struct i2c_client *client) } static const struct of_device_id pf8x00_dt_ids[] = { - { .compatible = "nxp,pf8x00",}, + { .compatible = "nxp,pf8100",}, + { .compatible = "nxp,pf8121a",}, + { .compatible = "nxp,pf8200",}, { } }; MODULE_DEVICE_TABLE(of, pf8x00_dt_ids); static const struct i2c_device_id pf8x00_i2c_id[] = { - { "pf8x00", 0 }, + { "pf8100", 0 }, + { "pf8121a", 0 }, + { "pf8200", 0 }, {}, }; MODULE_DEVICE_TABLE(i2c, pf8x00_i2c_id); -- cgit 1.4.1 From 2d18e54dd8662442ef5898c6bdadeaf90b3cebbc Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Thu, 10 Dec 2020 09:29:43 +0800 Subject: cgroup: Fix memory leak when parsing multiple source parameters A memory leak is found in cgroup1_parse_param() when multiple source parameters overwrite fc->source in the fs_context struct without free. unreferenced object 0xffff888100d930e0 (size 16): comm "mount", pid 520, jiffies 4303326831 (age 152.783s) hex dump (first 16 bytes): 74 65 73 74 6c 65 61 6b 00 00 00 00 00 00 00 00 testleak........ backtrace: [<000000003e5023ec>] kmemdup_nul+0x2d/0xa0 [<00000000377dbdaa>] vfs_parse_fs_string+0xc0/0x150 [<00000000cb2b4882>] generic_parse_monolithic+0x15a/0x1d0 [<000000000f750198>] path_mount+0xee1/0x1820 [<0000000004756de2>] do_mount+0xea/0x100 [<0000000094cafb0a>] __x64_sys_mount+0x14b/0x1f0 Fix this bug by permitting a single source parameter and rejecting with an error all subsequent ones. Fixes: 8d2451f4994f ("cgroup1: switch to option-by-option parsing") Reported-by: Hulk Robot Signed-off-by: Qinglang Miao Reviewed-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 191c329e482a..32596fdbcd5b 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -908,6 +908,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) opt = fs_parse(fc, cgroup1_fs_parameters, param, &result); if (opt == -ENOPARAM) { if (strcmp(param->key, "source") == 0) { + if (fc->source) + return invalf(fc, "Multiple sources not supported"); fc->source = param->string; param->string = NULL; return 0; -- cgit 1.4.1 From 1a3449c19407a28f7019a887cdf0d6ba2444751a Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Tue, 15 Dec 2020 10:20:10 -0800 Subject: selftests/bpf: Clarify build error if no vmlinux If Makefile cannot find any of the vmlinux's in its VMLINUX_BTF_PATHS list, it tries to run btftool incorrectly, with VMLINUX_BTF unset: bpftool btf dump file $(VMLINUX_BTF) format c Such that the keyword 'format' is misinterpreted as the path to vmlinux. The resulting build error message is fairly cryptic: GEN vmlinux.h Error: failed to load BTF from format: No such file or directory This patch makes the failure reason clearer by yielding this instead: Makefile:...: *** Cannot find a vmlinux for VMLINUX_BTF at any of "{paths}". Stop. Fixes: acbd06206bbb ("selftests/bpf: Add vmlinux.h selftest exercising tracing of syscalls") Signed-off-by: Kamal Mostafa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201215182011.15755-1-kamal@canonical.com --- tools/testing/selftests/bpf/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8c33e999319a..c51df6b91bef 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -121,6 +121,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +ifeq ($(VMLINUX_BTF),) +$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") +endif # Define simple and short `make test_progs`, `make test_sysctl`, etc targets # to build individual tests. -- cgit 1.4.1 From 81e7eb5bf08f36d34495a5898f6ef3fec05d9776 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 16 Dec 2020 22:43:44 -0500 Subject: Revert "Revert "scsi: megaraid_sas: Added support for shared host tagset for cpuhotplug"" This reverts commit 1a0e1943d8798cb3241fb5edb9a836af1611b60a. Commit b3c6a5997541 ("block: Fix a lockdep complaint triggered by request queue flushing") has been reverted and commit fb01a2932e81 has been introduced in its place. Consequently, it is now safe to reinstate the megaraid_sas tagset changes that led to boot problems in 5.10. Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 39 +++++++++++++++++++++++++++++ drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 +++++++++++---------- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e158d3d62056..41cd66fc7d81 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,10 @@ unsigned int enable_sdev_max_qd; module_param(enable_sdev_max_qd, int, 0444); MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0"); +int host_tagset_enable = 1; +module_param(host_tagset_enable, int, 0444); +MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)"); + MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); @@ -3119,6 +3124,19 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } +static int megasas_map_queues(struct Scsi_Host *shost) +{ + struct megasas_instance *instance; + + instance = (struct megasas_instance *)shost->hostdata; + + if (shost->nr_hw_queues == 1) + return 0; + + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + instance->pdev, instance->low_latency_index_start); +} + static void megasas_aen_polling(struct work_struct *work); /** @@ -3427,6 +3445,7 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, + .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, }; @@ -6808,6 +6827,26 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; + /* Use shared host tagset only for fusion adaptors + * if there are managed interrupts (smp affinity enabled case). + * Single msix_vectors in kdump, so shared host tag is also disabled. + */ + + host->host_tagset = 0; + host->nr_hw_queues = 1; + + if ((instance->adapter_type != MFI_SERIES) && + (instance->msix_vectors > instance->low_latency_index_start) && + host_tagset_enable && + instance->smp_affinity_enable) { + host->host_tagset = 1; + host->nr_hw_queues = instance->msix_vectors - + instance->low_latency_index_start; + } + + dev_info(&instance->pdev->dev, + "Max firmware commands: %d shared with nr_hw_queues = %d\n", + instance->max_fw_cmds, host->nr_hw_queues); /* * Notify the mid-layer about the new controller */ diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index b0c01cf0428f..fd607287608e 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -359,24 +359,29 @@ megasas_get_msix_index(struct megasas_instance *instance, { int sdev_busy; - /* nr_hw_queue = 1 for MegaRAID */ - struct blk_mq_hw_ctx *hctx = - scmd->device->request_queue->queue_hw_ctx[0]; - - sdev_busy = atomic_read(&hctx->nr_active); + /* TBD - if sml remove device_busy in future, driver + * should track counter in internal structure. + */ + sdev_busy = atomic_read(&scmd->device->device_busy); if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) + sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); - else if (instance->msix_load_balance) + } else if (instance->msix_load_balance) { cmd->request_desc->SCSIIO.MSIxIndex = (mega_mod64(atomic64_add_return(1, &instance->total_io_count), instance->msix_vectors)); - else + } else if (instance->host->nr_hw_queues > 1) { + u32 tag = blk_mq_unique_tag(scmd->request); + + cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) + + instance->low_latency_index_start; + } else { cmd->request_desc->SCSIIO.MSIxIndex = instance->reply_map[raw_smp_processor_id()]; + } } /** @@ -956,9 +961,6 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance) if (megasas_alloc_cmdlist_fusion(instance)) goto fail_exit; - dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n", - instance->max_fw_cmds); - /* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */ io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; @@ -1102,8 +1104,9 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing) instance->perf_mode = MR_BALANCED_PERF_MODE; - dev_info(&instance->pdev->dev, "Performance mode :%s\n", - MEGASAS_PERF_MODE_2STR(instance->perf_mode)); + dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n", + MEGASAS_PERF_MODE_2STR(instance->perf_mode), + instance->low_latency_index_start); instance->fw_sync_cache_support = (scratch_pad_1 & MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0; -- cgit 1.4.1 From 292bff9480c8d52fc58028979c4162abd83f1aec Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 23:24:17 +0000 Subject: ath11k: add missing null check on allocated skb Currently the null check on a newly allocated skb is missing and this can lead to a null pointer dereference is the allocation fails. Fix this by adding a null check and returning -ENOMEM. Addresses-Coverity: ("Dereference null return") Fixes: 43ed15e1ee01 ("ath11k: put hw to DBS using WMI_PDEV_SET_HW_MODE_CMDID") Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214232417.84556-1-colin.king@canonical.com --- drivers/net/wireless/ath/ath11k/wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index da4b546b62cb..73869d445c5b 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -3460,6 +3460,9 @@ int ath11k_wmi_set_hw_mode(struct ath11k_base *ab, len = sizeof(*cmd); skb = ath11k_wmi_alloc_skb(wmi_ab, len); + if (!skb) + return -ENOMEM; + cmd = (struct wmi_pdev_set_hw_mode_cmd_param *)skb->data; cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_PDEV_SET_HW_MODE_CMD) | -- cgit 1.4.1 From 3597010630d0aa96f5778901e691c6068bb86318 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Fri, 11 Dec 2020 00:56:13 -0500 Subject: ath11k: fix crash caused by NULL rx_channel During connect and disconnect stress test, crashed happened because ar->rx_channel is NULL. Fix it by checking whether ar->rx_channel is NULL. Crash stack is as below: RIP: 0010:ath11k_dp_rx_h_ppdu+0x110/0x230 [ath11k] [ 5028.808963] ath11k_dp_rx_wbm_err+0x14a/0x360 [ath11k] [ 5028.808970] ath11k_dp_rx_process_wbm_err+0x41c/0x520 [ath11k] [ 5028.808978] ath11k_dp_service_srng+0x25e/0x2d0 [ath11k] [ 5028.808982] ath11k_pci_ext_grp_napi_poll+0x23/0x80 [ath11k_pci] [ 5028.808986] net_rx_action+0x27e/0x400 [ 5028.808990] __do_softirq+0xfd/0x2bb [ 5028.808993] irq_exit+0xa6/0xb0 [ 5028.808995] do_IRQ+0x56/0xe0 [ 5028.808997] common_interrupt+0xf/0xf Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201211055613.9310-1-cjhuang@codeaurora.org --- drivers/net/wireless/ath/ath11k/dp_rx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 205c0f1a40e9..920e5026a635 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2294,6 +2294,7 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, { u8 channel_num; u32 center_freq; + struct ieee80211_channel *channel; rx_status->freq = 0; rx_status->rate_idx = 0; @@ -2314,9 +2315,12 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, rx_status->band = NL80211_BAND_5GHZ; } else { spin_lock_bh(&ar->data_lock); - rx_status->band = ar->rx_channel->band; - channel_num = - ieee80211_frequency_to_channel(ar->rx_channel->center_freq); + channel = ar->rx_channel; + if (channel) { + rx_status->band = channel->band; + channel_num = + ieee80211_frequency_to_channel(channel->center_freq); + } spin_unlock_bh(&ar->data_lock); ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "rx_desc: ", rx_desc, sizeof(struct hal_rx_desc)); -- cgit 1.4.1 From aa44b2f3ecd41f90b7e477158036648a49d21a32 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Fri, 11 Dec 2020 00:13:58 -0500 Subject: ath11k: start vdev if a bss peer is already created For QCA6390, bss peer must be created before vdev is to start. This change is to start vdev if a bss peer is created. Otherwise, ath11k delays to start vdev. This fixes an issue in a case where HT/VHT/HE settings change between authentication and association, e.g., due to the user space request to disable HT. Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201211051358.9191-1-cjhuang@codeaurora.org --- drivers/net/wireless/ath/ath11k/mac.c | 8 ++++++-- drivers/net/wireless/ath/ath11k/peer.c | 17 +++++++++++++++++ drivers/net/wireless/ath/ath11k/peer.h | 2 ++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 5c175e3e09b2..c1608f64ea95 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -3021,6 +3021,7 @@ static int ath11k_mac_station_add(struct ath11k *ar, } if (ab->hw_params.vdev_start_delay && + !arvif->is_started && arvif->vdev_type != WMI_VDEV_TYPE_AP) { ret = ath11k_start_vdev_delay(ar->hw, vif); if (ret) { @@ -5284,7 +5285,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, /* for QCA6390 bss peer must be created before vdev_start */ if (ab->hw_params.vdev_start_delay && arvif->vdev_type != WMI_VDEV_TYPE_AP && - arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) { + arvif->vdev_type != WMI_VDEV_TYPE_MONITOR && + !ath11k_peer_find_by_vdev_id(ab, arvif->vdev_id)) { memcpy(&arvif->chanctx, ctx, sizeof(*ctx)); ret = 0; goto out; @@ -5295,7 +5297,9 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, goto out; } - if (ab->hw_params.vdev_start_delay) { + if (ab->hw_params.vdev_start_delay && + (arvif->vdev_type == WMI_VDEV_TYPE_AP || + arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)) { param.vdev_id = arvif->vdev_id; param.peer_type = WMI_PEER_TYPE_DEFAULT; param.peer_addr = ar->mac_addr; diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c index 1866d82678fa..b69e7ebfa930 100644 --- a/drivers/net/wireless/ath/ath11k/peer.c +++ b/drivers/net/wireless/ath/ath11k/peer.c @@ -76,6 +76,23 @@ struct ath11k_peer *ath11k_peer_find_by_id(struct ath11k_base *ab, return NULL; } +struct ath11k_peer *ath11k_peer_find_by_vdev_id(struct ath11k_base *ab, + int vdev_id) +{ + struct ath11k_peer *peer; + + spin_lock_bh(&ab->base_lock); + + list_for_each_entry(peer, &ab->peers, list) { + if (vdev_id == peer->vdev_id) { + spin_unlock_bh(&ab->base_lock); + return peer; + } + } + spin_unlock_bh(&ab->base_lock); + return NULL; +} + void ath11k_peer_unmap_event(struct ath11k_base *ab, u16 peer_id) { struct ath11k_peer *peer; diff --git a/drivers/net/wireless/ath/ath11k/peer.h b/drivers/net/wireless/ath/ath11k/peer.h index bba2e00b6944..8553ed061aea 100644 --- a/drivers/net/wireless/ath/ath11k/peer.h +++ b/drivers/net/wireless/ath/ath11k/peer.h @@ -43,5 +43,7 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif, struct ieee80211_sta *sta, struct peer_create_params *param); int ath11k_wait_for_peer_delete_done(struct ath11k *ar, u32 vdev_id, const u8 *addr); +struct ath11k_peer *ath11k_peer_find_by_vdev_id(struct ath11k_base *ab, + int vdev_id); #endif /* _PEER_H_ */ -- cgit 1.4.1 From 9b09456258ea2f35fc8a99c4ac4829dcba0ca4be Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2020 11:31:19 +0300 Subject: ath11k: Fix error code in ath11k_core_suspend() The "if (!ret)" condition is inverted and it should be "if (ret)". It means that we return success when we had intended to return an error code. This also caused a spurious warning even when the suspend was successful: [ 297.186612] ath11k_pci 0000:06:00.0: failed to suspend hif: 0 Fixes: d1b0c33850d2 ("ath11k: implement suspend for QCA6390 PCI devices") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/X9nF17L2/EKOSbn/@mwanda --- drivers/net/wireless/ath/ath11k/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index b97c38b9a270..350b7913622c 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -185,7 +185,7 @@ int ath11k_core_suspend(struct ath11k_base *ab) ath11k_hif_ce_irq_disable(ab); ret = ath11k_hif_suspend(ab); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to suspend hif: %d\n", ret); return ret; } -- cgit 1.4.1 From 30d085039314fcad2c2e33a2dfc8e79765ddf408 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2020 11:32:12 +0300 Subject: ath11k: Fix ath11k_pci_fix_l1ss() All these conditions are reversed so presumably most of the function is dead code. This caused a spurious warning: [ 95.734922] ath11k_pci 0000:06:00.0: failed to set sysclk: 0 Fixes: 0699940755e9 ("ath11k: pci: fix L1ss clock unstable problem") Signed-off-by: Dan Carpenter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/X9nGDHiTh+Z+asDy@mwanda --- drivers/net/wireless/ath/ath11k/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 857647aa57c8..9f9a824a4c2d 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -274,7 +274,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_QSERDES_COM_SYSCLK_EN_SEL_REG, PCIE_QSERDES_COM_SYSCLK_EN_SEL_VAL, PCIE_QSERDES_COM_SYSCLK_EN_SEL_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set sysclk: %d\n", ret); return ret; } @@ -283,7 +283,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_REG, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG1_VAL, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set dtct config1 error: %d\n", ret); return ret; } @@ -292,7 +292,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_REG, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG2_VAL, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set dtct config2: %d\n", ret); return ret; } @@ -301,7 +301,7 @@ static int ath11k_pci_fix_l1ss(struct ath11k_base *ab) PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_REG, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG4_VAL, PCIE_USB3_PCS_MISC_OSC_DTCT_CONFIG_MSK); - if (!ret) { + if (ret) { ath11k_warn(ab, "failed to set dtct config4: %d\n", ret); return ret; } -- cgit 1.4.1 From e7f6f893ac39c8715d959ff8d677645ef5e0f8b4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 8 Dec 2020 10:20:15 +0100 Subject: mt76: mt76u: fix NULL pointer dereference in mt76u_status_worker Fix the following NULL pointer dereference in mt76u_status_worker that can occur if status thread runs before allocating tx queues [ 31.395373] BUG: kernel NULL pointer dereference, address: 000000000000002c [ 31.395769] #PF: supervisor read access in kernel mode [ 31.395985] #PF: error_code(0x0000) - not-present page [ 31.396178] PGD 0 P4D 0 [ 31.396277] Oops: 0000 [#1] SMP [ 31.396430] CPU: 3 PID: 337 Comm: mt76-usb-status Not tainted 5.10.0-rc1-kvm+ #49 [ 31.396703] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-3.fc33 04/01/2014 [ 31.397048] RIP: 0010:mt76u_status_worker+0x2b/0x190 [ 31.397931] RSP: 0018:ffffc9000076fe98 EFLAGS: 00010282 [ 31.398118] RAX: 0000000000000001 RBX: ffff888111203fe8 RCX: 0000000000000000 [ 31.398400] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff888111203fe8 [ 31.398668] RBP: ffff888111201d00 R08: 000000000000038c R09: 000000000000009b [ 31.398952] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 31.399235] R13: 0000000000000000 R14: 0000000000000000 R15: ffff88810c987300 [ 31.399494] FS: 0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000 [ 31.399767] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 31.399991] CR2: 000000000000002c CR3: 0000000103525000 CR4: 00000000000006a0 [ 31.400236] Call Trace: [ 31.400348] ? schedule+0x3e/0xa0 [ 31.400514] __mt76_worker_fn+0x71/0xa0 [ 31.400634] ? mt76_get_min_avg_rssi+0x110/0x110 [ 31.400827] kthread+0x118/0x130 [ 31.400984] ? __kthread_bind_mask+0x60/0x60 [ 31.401212] ret_from_fork+0x1f/0x30 [ 31.401353] Modules linked in: [ 31.401480] CR2: 000000000000002c [ 31.401627] ---[ end trace 8bf174505cc34851 ]--- [ 31.401798] RIP: 0010:mt76u_status_worker+0x2b/0x190 [ 31.402636] RSP: 0018:ffffc9000076fe98 EFLAGS: 00010282 [ 31.402829] RAX: 0000000000000001 RBX: ffff888111203fe8 RCX: 0000000000000000 [ 31.403118] RDX: 0000000000000001 RSI: 0000000000000246 RDI: ffff888111203fe8 [ 31.403424] RBP: ffff888111201d00 R08: 000000000000038c R09: 000000000000009b [ 31.403689] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 31.403933] R13: 0000000000000000 R14: 0000000000000000 R15: ffff88810c987300 [ 31.404209] FS: 0000000000000000(0000) GS:ffff88817bd80000(0000) knlGS:0000000000000000 [ 31.404482] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 31.404726] CR2: 000000000000002c CR3: 0000000103525000 CR4: 00000000000006a0 [ 31.405294] mt76x0u: probe of 1-1:1.0 failed with error -110 [ 31.406007] usb 1-1: USB disconnect, device number 2 [ 31.840756] usb 1-1: new high-speed USB device number 3 using xhci_hcd [ 32.461295] usb 1-1: reset high-speed USB device number 3 using xhci_hcd [ 32.659932] mt76x0u 1-1:1.0: ASIC revision: 76100002 MAC revision: 76502000 [ 33.197032] mt76x0u 1-1:1.0: EEPROM ver:02 fae:01 Fixes: 9daf27e62852 ("mt76: mt76u: use dedicated thread for status work") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/cd44dc407cf3e5f27688105d4a75fb1c68e62b06.1607419147.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/usb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index dc850109de22..8b08cad131c8 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -816,6 +816,8 @@ static void mt76u_status_worker(struct mt76_worker *w) for (i = 0; i < IEEE80211_NUM_ACS; i++) { q = dev->phy.q_tx[i]; + if (!q) + continue; while (q->queued > 0) { if (!q->entry[q->tail].done) -- cgit 1.4.1 From 4dfde294b9792dcf8615b55c58f093d544f472f0 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 14 Dec 2020 13:31:06 +0800 Subject: rtlwifi: rise completion at the last step of firmware callback request_firmware_nowait() which schedules another work is used to load firmware when USB is probing. If USB is unplugged before running the firmware work, it goes disconnect ops, and then causes use-after-free. Though we wait for completion of firmware work before freeing the hw, firmware callback rises completion too early. So I move it to the last step. usb 5-1: Direct firmware load for rtlwifi/rtl8192cufw.bin failed with error -2 rtlwifi: Loading alternative firmware rtlwifi/rtl8192cufw.bin rtlwifi: Selected firmware is not available ================================================================== BUG: KASAN: use-after-free in rtl_fw_do_work.cold+0x68/0x6a drivers/net/wireless/realtek/rtlwifi/core.c:93 Write of size 4 at addr ffff8881454cff50 by task kworker/0:6/7379 CPU: 0 PID: 7379 Comm: kworker/0:6 Not tainted 5.10.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events request_firmware_work_func Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xae/0x4c8 mm/kasan/report.c:385 __kasan_report mm/kasan/report.c:545 [inline] kasan_report.cold+0x1f/0x37 mm/kasan/report.c:562 rtl_fw_do_work.cold+0x68/0x6a drivers/net/wireless/realtek/rtlwifi/core.c:93 request_firmware_work_func+0x12c/0x230 drivers/base/firmware_loader/main.c:1079 process_one_work+0x933/0x1520 kernel/workqueue.c:2272 worker_thread+0x64c/0x1120 kernel/workqueue.c:2418 kthread+0x38c/0x460 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:296 The buggy address belongs to the page: page:00000000f54435b3 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1454cf flags: 0x200000000000000() raw: 0200000000000000 0000000000000000 ffffea00051533c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881454cfe00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8881454cfe80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff8881454cff00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff8881454cff80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8881454d0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Reported-by: syzbot+65be4277f3c489293939@syzkaller.appspotmail.com Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214053106.7748-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtlwifi/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index a7259dbc953d..965bd9589045 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -78,7 +78,6 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, rtl_dbg(rtlpriv, COMP_ERR, DBG_LOUD, "Firmware callback routine entered!\n"); - complete(&rtlpriv->firmware_loading_complete); if (!firmware) { if (rtlpriv->cfg->alt_fw_name) { err = request_firmware(&firmware, @@ -91,13 +90,13 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, } pr_err("Selected firmware is not available\n"); rtlpriv->max_fw_size = 0; - return; + goto exit; } found_alt: if (firmware->size > rtlpriv->max_fw_size) { pr_err("Firmware is too big!\n"); release_firmware(firmware); - return; + goto exit; } if (!is_wow) { memcpy(rtlpriv->rtlhal.pfirmware, firmware->data, @@ -109,6 +108,9 @@ found_alt: rtlpriv->rtlhal.wowlan_fwsize = firmware->size; } release_firmware(firmware); + +exit: + complete(&rtlpriv->firmware_loading_complete); } void rtl_fw_cb(const struct firmware *firmware, void *context) -- cgit 1.4.1 From 443d6e86f821a165fae3fc3fc13086d27ac140b1 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 16 Dec 2020 21:38:02 -0700 Subject: netfilter: x_tables: Update remaining dereference to RCU This fixes the dereference to fetch the RCU pointer when holding the appropriate xtables lock. Reported-by: kernel test robot Fixes: cc00bcaa5899 ("netfilter: x_tables: Switch synchronization to RCU") Signed-off-by: Subash Abhinov Kasiviswanathan Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 563b62b76a5f..c576a63d09db 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 6e2851f8d3a3..e8f6f9d86237 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index c4f532f4d311..0d453fa9e327 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) -- cgit 1.4.1 From 2b33d6ffa9e38f344418976b06057e2fc2aa9e2a Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 17 Dec 2020 11:53:40 +0300 Subject: netfilter: ipset: fixes possible oops in mtype_resize currently mtype_resize() can cause oops t = ip_set_alloc(htable_size(htable_bits)); if (!t) { ret = -ENOMEM; goto out; } t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); Increased htable_bits can force htable_size() to return 0. In own turn ip_set_alloc(0) returns not 0 but ZERO_SIZE_PTR, so follwoing access to t->hregion should trigger an OOPS. Signed-off-by: Vasily Averin Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 5f1208ad049e..1e7dddf824ae 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -640,7 +640,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t dsize = set->dsize; + size_t hsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -664,14 +664,12 @@ mtype_resize(struct ip_set *set, bool retried) retry: ret = 0; htable_bits++; - if (!htable_bits) { - /* In case we have plenty of memory :-) */ - pr_warn("Cannot increase the hashsize of set %s further\n", - set->name); - ret = -IPSET_ERR_HASH_FULL; - goto out; - } - t = ip_set_alloc(htable_size(htable_bits)); + if (!htable_bits) + goto hbwarn; + hsize = htable_size(htable_bits); + if (!hsize) + goto hbwarn; + t = ip_set_alloc(hsize); if (!t) { ret = -ENOMEM; goto out; @@ -813,6 +811,12 @@ cleanup: if (ret == -EAGAIN) goto retry; goto out; + +hbwarn: + /* In case we have plenty of memory :-) */ + pr_warn("Cannot increase the hashsize of set %s further\n", set->name); + ret = -IPSET_ERR_HASH_FULL; + goto out; } /* Get the current number of elements and ext_size in the set */ -- cgit 1.4.1 From 5c8193f568ae16f3242abad6518dc2ca6c8eef86 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 17 Dec 2020 17:53:18 +0300 Subject: netfilter: ipset: fix shift-out-of-bounds in htable_bits() htable_bits() can call jhash_size(32) and trigger shift-out-of-bounds UBSAN: shift-out-of-bounds in net/netfilter/ipset/ip_set_hash_gen.h:151:6 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 0 PID: 8498 Comm: syz-executor519 Not tainted 5.10.0-rc7-next-20201208-syzkaller #0 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 htable_bits net/netfilter/ipset/ip_set_hash_gen.h:151 [inline] hash_mac_create.cold+0x58/0x9b net/netfilter/ipset/ip_set_hash_gen.h:1524 ip_set_create+0x610/0x1380 net/netfilter/ipset/ip_set_core.c:1115 nfnetlink_rcv_msg+0xecc/0x1180 net/netfilter/nfnetlink.c:252 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 nfnetlink_rcv+0x1ac/0x420 net/netfilter/nfnetlink.c:600 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x907/0xe40 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2345 ___sys_sendmsg+0xf3/0x170 net/socket.c:2399 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2432 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This patch replaces htable_bits() by simple fls(hashsize - 1) call: it alone returns valid nbits both for round and non-round hashsizes. It is normal to set any nbits here because it is validated inside following htable_size() call which returns 0 for nbits>31. Fixes: 1feab10d7e6d("netfilter: ipset: Unified hash type generation") Reported-by: syzbot+d66bfadebca46cf61a2b@syzkaller.appspotmail.com Signed-off-by: Vasily Averin Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 1e7dddf824ae..6186358eac7c 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -141,20 +141,6 @@ htable_size(u8 hbits) return hsize * sizeof(struct hbucket *) + sizeof(struct htable); } -/* Compute htable_bits from the user input parameter hashsize */ -static u8 -htable_bits(u32 hashsize) -{ - /* Assume that hashsize == 2^htable_bits */ - u8 bits = fls(hashsize - 1); - - if (jhash_size(bits) != hashsize) - /* Round up to the first 2^n value */ - bits = fls(hashsize); - - return bits; -} - #ifdef IP_SET_HASH_WITH_NETS #if IPSET_NET_COUNT > 1 #define __CIDR(cidr, i) (cidr[i]) @@ -1525,7 +1511,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, if (!h) return -ENOMEM; - hbits = htable_bits(hashsize); + /* Compute htable_bits from the user input parameter hashsize. + * Assume that hashsize == 2^htable_bits, + * otherwise round up to the first 2^n value. + */ + hbits = fls(hashsize - 1); hsize = htable_size(hbits); if (hsize == 0) { kfree(h); -- cgit 1.4.1 From 3ac874fa84d1baaf0c0175f2a1499f5d88d528b2 Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Thu, 22 Oct 2020 12:39:36 +0200 Subject: i40e: Fix Error I40E_AQ_RC_EINVAL when removing VFs When removing VFs for PF added to bridge there was an error I40E_AQ_RC_EINVAL. It was caused by not properly resetting and reinitializing PF when adding/removing VFs. Changed how reset is performed when adding/removing VFs to properly reinitialize PFs VSI. Fixes: fc60861e9b00 ("i40e: start up in VEPA mode by default") Signed-off-by: Sylwester Dziedziuch Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 3 +++ drivers/net/ethernet/intel/i40e/i40e_main.c | 10 ++++++++++ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d231a2cdd98f..118473dfdcbd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -120,6 +120,7 @@ enum i40e_state_t { __I40E_RESET_INTR_RECEIVED, __I40E_REINIT_REQUESTED, __I40E_PF_RESET_REQUESTED, + __I40E_PF_RESET_AND_REBUILD_REQUESTED, __I40E_CORE_RESET_REQUESTED, __I40E_GLOBAL_RESET_REQUESTED, __I40E_EMP_RESET_INTR_RECEIVED, @@ -146,6 +147,8 @@ enum i40e_state_t { }; #define I40E_PF_RESET_FLAG BIT_ULL(__I40E_PF_RESET_REQUESTED) +#define I40E_PF_RESET_AND_REBUILD_FLAG \ + BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED) /* VSI state flags */ enum i40e_vsi_state_t { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1337686bd099..1db482d310c2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -36,6 +36,8 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired); +static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, + bool lock_acquired); static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf); @@ -8536,6 +8538,14 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) "FW LLDP is disabled\n" : "FW LLDP is enabled\n"); + } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { + /* Request a PF Reset + * + * Resets PF and reinitializes PFs VSI. + */ + i40e_prep_for_reset(pf, lock_acquired); + i40e_reset_and_rebuild(pf, true, lock_acquired); + } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { int v; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 729c4f0d5ac5..21ee56420c3a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1772,7 +1772,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) { if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); + i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } ret = i40e_pci_sriov_enable(pdev, num_vfs); goto sriov_configure_out; @@ -1781,7 +1781,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!pci_vfs_assigned(pf->pdev)) { i40e_free_vfs(pf); pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); + i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG); } else { dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n"); ret = -EINVAL; -- cgit 1.4.1 From 8bee683384087a6275c9183a483435225f7bb209 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 14 Dec 2020 09:51:27 +0100 Subject: xsk: Fix memory leak for failed bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a possible memory leak when a bind of an AF_XDP socket fails. When the fill and completion rings are created, they are tied to the socket. But when the buffer pool is later created at bind time, the ownership of these two rings are transferred to the buffer pool as they might be shared between sockets (and the buffer pool cannot be created until we know what we are binding to). So, before the buffer pool is created, these two rings are cleaned up with the socket, and after they have been transferred they are cleaned up together with the buffer pool. The problem is that ownership was transferred before it was absolutely certain that the buffer pool could be created and initialized correctly and when one of these errors occurred, the fill and completion rings did neither belong to the socket nor the pool and where therefore leaked. Solve this by moving the ownership transfer to the point where the buffer pool has been completely set up and there is no way it can fail. Fixes: 7361f9c3d719 ("xsk: Move fill and completion rings to buffer pool") Reported-by: syzbot+cfa88ddd0655afa88763@syzkaller.appspotmail.com Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201214085127.3960-1-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 4 ++++ net/xdp/xsk_buff_pool.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index ac4a317038f1..c6532d77fde7 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -878,6 +878,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } } + /* FQ and CQ are now owned by the buffer pool and cleaned up with it. */ + xs->fq_tmp = NULL; + xs->cq_tmp = NULL; + xs->dev = dev; xs->zc = xs->umem->zc; xs->queue_id = qid; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 67a4494d63b6..818b75060922 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -75,8 +75,6 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->fq = xs->fq_tmp; pool->cq = xs->cq_tmp; - xs->fq_tmp = NULL; - xs->cq_tmp = NULL; for (i = 0; i < pool->free_heads_cnt; i++) { xskb = &pool->heads[i]; -- cgit 1.4.1 From f1340265726e0edf8a8cef28e665b28ad6302ce9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 2 Dec 2020 18:18:06 -0800 Subject: iavf: fix double-release of rtnl_lock This code does not jump to exit on an error in iavf_lan_add_device(), so the rtnl_unlock() from the normal path will follow. Fixes: b66c7bc1cd4d ("iavf: Refactor init state machine") Signed-off-by: Jakub Kicinski Reviewed-by: Tony Nguyen Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 95543dfd4fe7..0a867d64d467 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1834,11 +1834,9 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) netif_tx_stop_all_queues(netdev); if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_add_device(adapter); - if (err) { - rtnl_unlock(); + if (err) dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n", err); - } } dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr); if (netdev->features & NETIF_F_GRO) -- cgit 1.4.1 From f6f92968e1e5a7a9d211faaebefc26ebe408dad7 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Thu, 17 Dec 2020 09:04:57 +0200 Subject: ath11k: qmi: try to allocate a big block of DMA memory first Not all firmware versions support allocating DMA memory in smaller blocks so first try to allocate big block of DMA memory for QMI. If the allocation fails, let firmware request multiple blocks of DMA memory with smaller size. This also fixes an unnecessary error message seen during ath11k probe on QCA6390: ath11k_pci 0000:06:00.0: Respond mem req failed, result: 1, err: 0 ath11k_pci 0000:06:00.0: qmi failed to respond fw mem req:-22 Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1608127593-15192-1-git-send-email-kvalo@codeaurora.org --- drivers/net/wireless/ath/ath11k/qmi.c | 24 ++++++++++++++++++++++-- drivers/net/wireless/ath/ath11k/qmi.h | 1 + 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index f0b5c50974f3..0db623ff4bb9 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1660,6 +1660,7 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) struct qmi_wlanfw_respond_mem_resp_msg_v01 resp; struct qmi_txn txn = {}; int ret = 0, i; + bool delayed; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) @@ -1672,11 +1673,13 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) * failure to FW and FW will then request mulitple blocks of small * chunk size memory. */ - if (!ab->bus_params.fixed_mem_region && ab->qmi.mem_seg_count <= 2) { + if (!ab->bus_params.fixed_mem_region && ab->qmi.target_mem_delayed) { + delayed = true; ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi delays mem_request %d\n", ab->qmi.mem_seg_count); memset(req, 0, sizeof(*req)); } else { + delayed = false; req->mem_seg_len = ab->qmi.mem_seg_count; for (i = 0; i < req->mem_seg_len ; i++) { @@ -1708,6 +1711,12 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab) } if (resp.resp.result != QMI_RESULT_SUCCESS_V01) { + /* the error response is expected when + * target_mem_delayed is true. + */ + if (delayed && resp.resp.error == 0) + goto out; + ath11k_warn(ab, "Respond mem req failed, result: %d, err: %d\n", resp.resp.result, resp.resp.error); ret = -EINVAL; @@ -1742,6 +1751,8 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab) int i; struct target_mem_chunk *chunk; + ab->qmi.target_mem_delayed = false; + for (i = 0; i < ab->qmi.mem_seg_count; i++) { chunk = &ab->qmi.target_mem[i]; chunk->vaddr = dma_alloc_coherent(ab->dev, @@ -1749,6 +1760,15 @@ static int ath11k_qmi_alloc_target_mem_chunk(struct ath11k_base *ab) &chunk->paddr, GFP_KERNEL); if (!chunk->vaddr) { + if (ab->qmi.mem_seg_count <= 2) { + ath11k_dbg(ab, ATH11K_DBG_QMI, + "qmi dma allocation failed (%d B type %u), will try later with small size\n", + chunk->size, + chunk->type); + ath11k_qmi_free_target_mem_chunk(ab); + ab->qmi.target_mem_delayed = true; + return 0; + } ath11k_err(ab, "failed to alloc memory, size: 0x%x, type: %u\n", chunk->size, chunk->type); @@ -2517,7 +2537,7 @@ static void ath11k_qmi_msg_mem_request_cb(struct qmi_handle *qmi_hdl, ret); return; } - } else if (msg->mem_seg_len > 2) { + } else { ret = ath11k_qmi_alloc_target_mem_chunk(ab); if (ret) { ath11k_warn(ab, "qmi failed to alloc target memory: %d\n", diff --git a/drivers/net/wireless/ath/ath11k/qmi.h b/drivers/net/wireless/ath/ath11k/qmi.h index 92925c9eac67..7bad374cc23a 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.h +++ b/drivers/net/wireless/ath/ath11k/qmi.h @@ -125,6 +125,7 @@ struct ath11k_qmi { struct target_mem_chunk target_mem[ATH11K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01]; u32 mem_seg_count; u32 target_mem_mode; + bool target_mem_delayed; u8 cal_done; struct target_info target; struct m3_mem_region m3_mem; -- cgit 1.4.1 From e9603f4bdcc04417f1c7b3585e63654819dc11f6 Mon Sep 17 00:00:00 2001 From: Carl Huang Date: Thu, 17 Dec 2020 17:22:10 +0200 Subject: ath11k: pci: disable ASPM L0sLs before downloading firmware Sometimes QCA6390 doesn't switch to amss state as device enters L1ss state, so disable L0sL1s during firmware downloading. Driver recovers the ASPM to default value in start callback or powerdown callback. Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1 Signed-off-by: Carl Huang Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1608218530-15426-1-git-send-email-kvalo@codeaurora.org --- drivers/net/wireless/ath/ath11k/pci.c | 36 +++++++++++++++++++++++++++++++++++ drivers/net/wireless/ath/ath11k/pci.h | 2 ++ 2 files changed, 38 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index 9f9a824a4c2d..20b415cd96c4 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -886,6 +886,32 @@ static void ath11k_pci_free_region(struct ath11k_pci *ab_pci) pci_disable_device(pci_dev); } +static void ath11k_pci_aspm_disable(struct ath11k_pci *ab_pci) +{ + struct ath11k_base *ab = ab_pci->ab; + + pcie_capability_read_word(ab_pci->pdev, PCI_EXP_LNKCTL, + &ab_pci->link_ctl); + + ath11k_dbg(ab, ATH11K_DBG_PCI, "pci link_ctl 0x%04x L0s %d L1 %d\n", + ab_pci->link_ctl, + u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L0S), + u16_get_bits(ab_pci->link_ctl, PCI_EXP_LNKCTL_ASPM_L1)); + + /* disable L0s and L1 */ + pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL, + ab_pci->link_ctl & ~PCI_EXP_LNKCTL_ASPMC); + + set_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags); +} + +static void ath11k_pci_aspm_restore(struct ath11k_pci *ab_pci) +{ + if (test_and_clear_bit(ATH11K_PCI_ASPM_RESTORE, &ab_pci->flags)) + pcie_capability_write_word(ab_pci->pdev, PCI_EXP_LNKCTL, + ab_pci->link_ctl); +} + static int ath11k_pci_power_up(struct ath11k_base *ab) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); @@ -895,6 +921,11 @@ static int ath11k_pci_power_up(struct ath11k_base *ab) clear_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags); ath11k_pci_sw_reset(ab_pci->ab, true); + /* Disable ASPM during firmware download due to problems switching + * to AMSS state. + */ + ath11k_pci_aspm_disable(ab_pci); + ret = ath11k_mhi_start(ab_pci); if (ret) { ath11k_err(ab, "failed to start mhi: %d\n", ret); @@ -908,6 +939,9 @@ static void ath11k_pci_power_down(struct ath11k_base *ab) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); + /* restore aspm in case firmware bootup fails */ + ath11k_pci_aspm_restore(ab_pci); + ath11k_pci_force_wake(ab_pci->ab); ath11k_mhi_stop(ab_pci); clear_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags); @@ -965,6 +999,8 @@ static int ath11k_pci_start(struct ath11k_base *ab) set_bit(ATH11K_PCI_FLAG_INIT_DONE, &ab_pci->flags); + ath11k_pci_aspm_restore(ab_pci); + ath11k_pci_ce_irqs_enable(ab); ath11k_ce_rx_post_buf(ab); diff --git a/drivers/net/wireless/ath/ath11k/pci.h b/drivers/net/wireless/ath/ath11k/pci.h index 0432a702416b..fe44d0dfce19 100644 --- a/drivers/net/wireless/ath/ath11k/pci.h +++ b/drivers/net/wireless/ath/ath11k/pci.h @@ -63,6 +63,7 @@ struct ath11k_msi_config { enum ath11k_pci_flags { ATH11K_PCI_FLAG_INIT_DONE, ATH11K_PCI_FLAG_IS_MSI_64, + ATH11K_PCI_ASPM_RESTORE, }; struct ath11k_pci { @@ -80,6 +81,7 @@ struct ath11k_pci { /* enum ath11k_pci_flags */ unsigned long flags; + u16 link_ctl; }; static inline struct ath11k_pci *ath11k_pci_priv(struct ath11k_base *ab) -- cgit 1.4.1 From 3d45f221ce627d13e2e6ef3274f06750c84a6542 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 2 Dec 2020 11:55:58 +0000 Subject: btrfs: fix deadlock when cloning inline extent and low on free metadata space When cloning an inline extent there are cases where we can not just copy the inline extent from the source range to the target range (e.g. when the target range starts at an offset greater than zero). In such cases we copy the inline extent's data into a page of the destination inode and then dirty that page. However, after that we will need to start a transaction for each processed extent and, if we are ever low on available metadata space, we may need to flush existing delalloc for all dirty inodes in an attempt to release metadata space - if that happens we may deadlock: * the async reclaim task queued a delalloc work to flush delalloc for the destination inode of the clone operation; * the task executing that delalloc work gets blocked waiting for the range with the dirty page to be unlocked, which is currently locked by the task doing the clone operation; * the async reclaim task blocks waiting for the delalloc work to complete; * the cloning task is waiting on the waitqueue of its reservation ticket while holding the range with the dirty page locked in the inode's io_tree; * if metadata space is not released by some other task (like delalloc for some other inode completing for example), the clone task waits forever and as a consequence the delalloc work and async reclaim tasks will hang forever as well. Releasing more space on the other hand may require starting a transaction, which will hang as well when trying to reserve metadata space, resulting in a deadlock between all these tasks. When this happens, traces like the following show up in dmesg/syslog: [87452.323003] INFO: task kworker/u16:11:1810830 blocked for more than 120 seconds. [87452.323644] Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 [87452.324248] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [87452.324852] task:kworker/u16:11 state:D stack: 0 pid:1810830 ppid: 2 flags:0x00004000 [87452.325520] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs] [87452.326136] Call Trace: [87452.326737] __schedule+0x5d1/0xcf0 [87452.327390] schedule+0x45/0xe0 [87452.328174] lock_extent_bits+0x1e6/0x2d0 [btrfs] [87452.328894] ? finish_wait+0x90/0x90 [87452.329474] btrfs_invalidatepage+0x32c/0x390 [btrfs] [87452.330133] ? __mod_memcg_state+0x8e/0x160 [87452.330738] __extent_writepage+0x2d4/0x400 [btrfs] [87452.331405] extent_write_cache_pages+0x2b2/0x500 [btrfs] [87452.332007] ? lock_release+0x20e/0x4c0 [87452.332557] ? trace_hardirqs_on+0x1b/0xf0 [87452.333127] extent_writepages+0x43/0x90 [btrfs] [87452.333653] ? lock_acquire+0x1a3/0x490 [87452.334177] do_writepages+0x43/0xe0 [87452.334699] ? __filemap_fdatawrite_range+0xa4/0x100 [87452.335720] __filemap_fdatawrite_range+0xc5/0x100 [87452.336500] btrfs_run_delalloc_work+0x17/0x40 [btrfs] [87452.337216] btrfs_work_helper+0xf1/0x600 [btrfs] [87452.337838] process_one_work+0x24e/0x5e0 [87452.338437] worker_thread+0x50/0x3b0 [87452.339137] ? process_one_work+0x5e0/0x5e0 [87452.339884] kthread+0x153/0x170 [87452.340507] ? kthread_mod_delayed_work+0xc0/0xc0 [87452.341153] ret_from_fork+0x22/0x30 [87452.341806] INFO: task kworker/u16:1:2426217 blocked for more than 120 seconds. [87452.342487] Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 [87452.343274] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [87452.344049] task:kworker/u16:1 state:D stack: 0 pid:2426217 ppid: 2 flags:0x00004000 [87452.344974] Workqueue: events_unbound btrfs_async_reclaim_metadata_space [btrfs] [87452.345655] Call Trace: [87452.346305] __schedule+0x5d1/0xcf0 [87452.346947] ? kvm_clock_read+0x14/0x30 [87452.347676] ? wait_for_completion+0x81/0x110 [87452.348389] schedule+0x45/0xe0 [87452.349077] schedule_timeout+0x30c/0x580 [87452.349718] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [87452.350340] ? lock_acquire+0x1a3/0x490 [87452.351006] ? try_to_wake_up+0x7a/0xa20 [87452.351541] ? lock_release+0x20e/0x4c0 [87452.352040] ? lock_acquired+0x199/0x490 [87452.352517] ? wait_for_completion+0x81/0x110 [87452.353000] wait_for_completion+0xab/0x110 [87452.353490] start_delalloc_inodes+0x2af/0x390 [btrfs] [87452.353973] btrfs_start_delalloc_roots+0x12d/0x250 [btrfs] [87452.354455] flush_space+0x24f/0x660 [btrfs] [87452.355063] btrfs_async_reclaim_metadata_space+0x1bb/0x480 [btrfs] [87452.355565] process_one_work+0x24e/0x5e0 [87452.356024] worker_thread+0x20f/0x3b0 [87452.356487] ? process_one_work+0x5e0/0x5e0 [87452.356973] kthread+0x153/0x170 [87452.357434] ? kthread_mod_delayed_work+0xc0/0xc0 [87452.357880] ret_from_fork+0x22/0x30 (...) < stack traces of several tasks waiting for the locks of the inodes of the clone operation > (...) [92867.444138] RSP: 002b:00007ffc3371bbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000052 [92867.444624] RAX: ffffffffffffffda RBX: 00007ffc3371bea0 RCX: 00007f61efe73f97 [92867.445116] RDX: 0000000000000000 RSI: 0000560fbd5d7a40 RDI: 0000560fbd5d8960 [92867.445595] RBP: 00007ffc3371beb0 R08: 0000000000000001 R09: 0000000000000003 [92867.446070] R10: 00007ffc3371b996 R11: 0000000000000246 R12: 0000000000000000 [92867.446820] R13: 000000000000001f R14: 00007ffc3371bea0 R15: 00007ffc3371beb0 [92867.447361] task:fsstress state:D stack: 0 pid:2508238 ppid:2508153 flags:0x00004000 [92867.447920] Call Trace: [92867.448435] __schedule+0x5d1/0xcf0 [92867.448934] ? _raw_spin_unlock_irqrestore+0x3c/0x60 [92867.449423] schedule+0x45/0xe0 [92867.449916] __reserve_bytes+0x4a4/0xb10 [btrfs] [92867.450576] ? finish_wait+0x90/0x90 [92867.451202] btrfs_reserve_metadata_bytes+0x29/0x190 [btrfs] [92867.451815] btrfs_block_rsv_add+0x1f/0x50 [btrfs] [92867.452412] start_transaction+0x2d1/0x760 [btrfs] [92867.453216] clone_copy_inline_extent+0x333/0x490 [btrfs] [92867.453848] ? lock_release+0x20e/0x4c0 [92867.454539] ? btrfs_search_slot+0x9a7/0xc30 [btrfs] [92867.455218] btrfs_clone+0x569/0x7e0 [btrfs] [92867.455952] btrfs_clone_files+0xf6/0x150 [btrfs] [92867.456588] btrfs_remap_file_range+0x324/0x3d0 [btrfs] [92867.457213] do_clone_file_range+0xd4/0x1f0 [92867.457828] vfs_clone_file_range+0x4d/0x230 [92867.458355] ? lock_release+0x20e/0x4c0 [92867.458890] ioctl_file_clone+0x8f/0xc0 [92867.459377] do_vfs_ioctl+0x342/0x750 [92867.459913] __x64_sys_ioctl+0x62/0xb0 [92867.460377] do_syscall_64+0x33/0x80 [92867.460842] entry_SYSCALL_64_after_hwframe+0x44/0xa9 (...) < stack traces of more tasks blocked on metadata reservation like the clone task above, because the async reclaim task has deadlocked > (...) Another thing to notice is that the worker task that is deadlocked when trying to flush the destination inode of the clone operation is at btrfs_invalidatepage(). This is simply because the clone operation has a destination offset greater than the i_size and we only update the i_size of the destination file after cloning an extent (just like we do in the buffered write path). Since the async reclaim path uses btrfs_start_delalloc_roots() to trigger the flushing of delalloc for all inodes that have delalloc, add a runtime flag to an inode to signal it should not be flushed, and for inodes with that flag set, start_delalloc_inodes() will simply skip them. When the cloning code needs to dirty a page to copy an inline extent, set that flag on the inode and then clear it when the clone operation finishes. This could be sporadically triggered with test case generic/269 from fstests, which exercises many fsstress processes running in parallel with several dd processes filling up the entire filesystem. CC: stable@vger.kernel.org # 5.9+ Fixes: 05a5a7621ce6 ("Btrfs: implement full reflink support for inline extents") Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 9 +++++++++ fs/btrfs/ctree.h | 3 ++- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/inode.c | 15 +++++++++++---- fs/btrfs/ioctl.c | 2 +- fs/btrfs/reflink.c | 15 +++++++++++++++ fs/btrfs/space-info.c | 2 +- 7 files changed, 40 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 555cbcef6585..d9bf53d9ff90 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -42,6 +42,15 @@ enum { * to an inode. */ BTRFS_INODE_NO_XATTRS, + /* + * Set when we are in a context where we need to start a transaction and + * have dirty pages with the respective file range locked. This is to + * ensure that when reserving space for the transaction, if we are low + * on available space and need to flush delalloc, we will not flush + * delalloc for this inode, because that could result in a deadlock (on + * the file range, inode's io_tree). + */ + BTRFS_INODE_NO_DELALLOC_FLUSH, }; /* in memory btrfs inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9dde7707873a..2674f24cf2e0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3074,7 +3074,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u32 min_type); int btrfs_start_delalloc_snapshot(struct btrfs_root *root); -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr); +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, unsigned int extra_bits, struct extent_state **cached_state); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index a98e33f232d5..324f646d6e5e 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -715,7 +715,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, * flush all outstanding I/O and inode extent mappings before the * copy operation is declared as being finished */ - ret = btrfs_start_delalloc_roots(fs_info, U64_MAX); + ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false); if (ret) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8e23780acfae..070716650df8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot) +static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot, + bool in_reclaim_context) { struct btrfs_inode *binode; struct inode *inode; @@ -9411,6 +9412,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot list_move_tail(&binode->delalloc_inodes, &root->delalloc_inodes); + + if (in_reclaim_context && + test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags)) + continue; + inode = igrab(&binode->vfs_inode); if (!inode) { cond_resched_lock(&root->delalloc_lock); @@ -9464,10 +9470,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &nr, true); + return start_delalloc_inodes(root, &nr, true, false); } -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr) +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, + bool in_reclaim_context) { struct btrfs_root *root; struct list_head splice; @@ -9490,7 +9497,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr) &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = start_delalloc_inodes(root, &nr, false); + ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context); btrfs_put_root(root); if (ret < 0) goto out; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 703212ff50a5..dde49a791f3e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4951,7 +4951,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_SYNC: { int ret; - ret = btrfs_start_delalloc_roots(fs_info, U64_MAX); + ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false); if (ret) return ret; ret = btrfs_sync_fs(inode->i_sb, 1); diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index ab80896315be..b03e7891394e 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -89,6 +89,19 @@ static int copy_inline_to_page(struct btrfs_inode *inode, if (ret) goto out_unlock; + /* + * After dirtying the page our caller will need to start a transaction, + * and if we are low on metadata free space, that can cause flushing of + * delalloc for all inodes in order to get metadata space released. + * However we are holding the range locked for the whole duration of + * the clone/dedupe operation, so we may deadlock if that happens and no + * other task releases enough space. So mark this inode as not being + * possible to flush to avoid such deadlock. We will clear that flag + * when we finish cloning all extents, since a transaction is started + * after finding each extent to clone. + */ + set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags); + if (comp_type == BTRFS_COMPRESS_NONE) { char *map; @@ -549,6 +562,8 @@ process_slot: out: btrfs_free_path(path); kvfree(buf); + clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags); + return ret; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 64099565ab8f..67e55c5479b8 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -532,7 +532,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, loops = 0; while ((delalloc_bytes || dio_bytes) && loops < 3) { - btrfs_start_delalloc_roots(fs_info, items); + btrfs_start_delalloc_roots(fs_info, items, true); loops++; if (wait_ordered && !trans) { -- cgit 1.4.1 From 9a664971569daf68254928149f580b4f5856d274 Mon Sep 17 00:00:00 2001 From: ethanwu Date: Tue, 1 Dec 2020 17:25:12 +0800 Subject: btrfs: correctly calculate item size used when item key collision happens Item key collision is allowed for some item types, like dir item and inode refs, but the overall item size is limited by the nodesize. item size(ins_len) passed from btrfs_insert_empty_items to btrfs_search_slot already contains size of btrfs_item. When btrfs_search_slot reaches leaf, we'll see if we need to split leaf. The check incorrectly reports that split leaf is required, because it treats the space required by the newly inserted item as btrfs_item + item data. But in item key collision case, only item data is actually needed, the newly inserted item could merge into the existing one. No new btrfs_item will be inserted. And split_leaf return EOVERFLOW from following code: if (extend && data_size + btrfs_item_size_nr(l, slot) + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info)) return -EOVERFLOW; In most cases, when callers receive EOVERFLOW, they either return this error or handle in different ways. For example, in normal dir item creation the userspace will get errno EOVERFLOW; in inode ref case INODE_EXTREF is used instead. However, this is not the case for rename. To avoid the unrecoverable situation in rename, btrfs_check_dir_item_collision is called in early phase of rename. In this function, when item key collision is detected leaf space is checked: data_size = sizeof(*di) + name_len; if (data_size + btrfs_item_size_nr(leaf, slot) + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root->fs_info)) the sizeof(struct btrfs_item) + btrfs_item_size_nr(leaf, slot) here refers to existing item size, the condition here correctly calculates the needed size for collision case rather than the wrong case above. The consequence of inconsistent condition check between btrfs_check_dir_item_collision and btrfs_search_slot when item key collision happens is that we might pass check here but fail later at btrfs_search_slot. Rename fails and volume is forced readonly [436149.586170] ------------[ cut here ]------------ [436149.586173] BTRFS: Transaction aborted (error -75) [436149.586196] WARNING: CPU: 0 PID: 16733 at fs/btrfs/inode.c:9870 btrfs_rename2+0x1938/0x1b70 [btrfs] [436149.586227] CPU: 0 PID: 16733 Comm: python Tainted: G D 4.18.0-rc5+ #1 [436149.586228] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016 [436149.586238] RIP: 0010:btrfs_rename2+0x1938/0x1b70 [btrfs] [436149.586254] RSP: 0018:ffffa327043a7ce0 EFLAGS: 00010286 [436149.586255] RAX: 0000000000000000 RBX: ffff8d8a17d13340 RCX: 0000000000000006 [436149.586256] RDX: 0000000000000007 RSI: 0000000000000096 RDI: ffff8d8a7fc164b0 [436149.586257] RBP: ffffa327043a7da0 R08: 0000000000000560 R09: 7265282064657472 [436149.586258] R10: 0000000000000000 R11: 6361736e61725420 R12: ffff8d8a0d4c8b08 [436149.586258] R13: ffff8d8a17d13340 R14: ffff8d8a33e0a540 R15: 00000000000001fe [436149.586260] FS: 00007fa313933740(0000) GS:ffff8d8a7fc00000(0000) knlGS:0000000000000000 [436149.586261] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [436149.586262] CR2: 000055d8d9c9a720 CR3: 000000007aae0003 CR4: 00000000003606f0 [436149.586295] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [436149.586296] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [436149.586296] Call Trace: [436149.586311] vfs_rename+0x383/0x920 [436149.586313] ? vfs_rename+0x383/0x920 [436149.586315] do_renameat2+0x4ca/0x590 [436149.586317] __x64_sys_rename+0x20/0x30 [436149.586324] do_syscall_64+0x5a/0x120 [436149.586330] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [436149.586332] RIP: 0033:0x7fa3133b1d37 [436149.586348] RSP: 002b:00007fffd3e43908 EFLAGS: 00000246 ORIG_RAX: 0000000000000052 [436149.586349] RAX: ffffffffffffffda RBX: 00007fa3133b1d30 RCX: 00007fa3133b1d37 [436149.586350] RDX: 000055d8da06b5e0 RSI: 000055d8da225d60 RDI: 000055d8da2c4da0 [436149.586351] RBP: 000055d8da2252f0 R08: 00007fa313782000 R09: 00000000000177e0 [436149.586351] R10: 000055d8da010680 R11: 0000000000000246 R12: 00007fa313840b00 Thanks to Hans van Kranenburg for information about crc32 hash collision tools, I was able to reproduce the dir item collision with following python script. https://github.com/wutzuchieh/misc_tools/blob/master/crc32_forge.py Run it under a btrfs volume will trigger the abort transaction. It simply creates files and rename them to forged names that leads to hash collision. There are two ways to fix this. One is to simply revert the patch 878f2d2cb355 ("Btrfs: fix max dir item size calculation") to make the condition consistent although that patch is correct about the size. The other way is to handle the leaf space check correctly when collision happens. I prefer the second one since it correct leaf space check in collision case. This fix will not account sizeof(struct btrfs_item) when the item already exists. There are two places where ins_len doesn't contain sizeof(struct btrfs_item), however. 1. extent-tree.c: lookup_inline_extent_backref 2. file-item.c: btrfs_csum_file_blocks to make the logic of btrfs_search_slot more clear, we add a flag search_for_extension in btrfs_path. This flag indicates that ins_len passed to btrfs_search_slot doesn't contain sizeof(struct btrfs_item). When key exists, btrfs_search_slot will use the actual size needed to calculate the required leaf space. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: ethanwu Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 24 ++++++++++++++++++++++-- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/extent-tree.c | 2 ++ fs/btrfs/file-item.c | 2 ++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 07810891e204..cc89b63d65a4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2555,8 +2555,14 @@ out: * @p: Holds all btree nodes along the search path * @root: The root node of the tree * @key: The key we are looking for - * @ins_len: Indicates purpose of search, for inserts it is 1, for - * deletions it's -1. 0 for plain searches + * @ins_len: Indicates purpose of search: + * >0 for inserts it's size of item inserted (*) + * <0 for deletions + * 0 for plain searches, not modifying the tree + * + * (*) If size of item inserted doesn't include + * sizeof(struct btrfs_item), then p->search_for_extension must + * be set. * @cow: boolean should CoW operations be performed. Must always be 1 * when modifying the tree. * @@ -2717,6 +2723,20 @@ cow_done: if (level == 0) { p->slots[level] = slot; + /* + * Item key already exists. In this case, if we are + * allowed to insert the item (for example, in dir_item + * case, item key collision is allowed), it will be + * merged with the original item. Only the item size + * grows, no new btrfs item will be added. If + * search_for_extension is not set, ins_len already + * accounts the size btrfs_item, deduct it here so leaf + * space check will be correct. + */ + if (ret == 0 && ins_len > 0 && !p->search_for_extension) { + ASSERT(ins_len >= sizeof(struct btrfs_item)); + ins_len -= sizeof(struct btrfs_item); + } if (ins_len > 0 && btrfs_leaf_free_space(b) < ins_len) { if (write_lock_level < 1) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2674f24cf2e0..3935d297d198 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -368,6 +368,12 @@ struct btrfs_path { unsigned int search_commit_root:1; unsigned int need_commit_sem:1; unsigned int skip_release_on_error:1; + /* + * Indicate that new item (btrfs_search_slot) is extending already + * existing item and ins_len contains only the data size and not item + * header (ie. sizeof(struct btrfs_item) is not included). + */ + unsigned int search_for_extension:1; }; #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ sizeof(struct btrfs_item)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 56ea380f5a17..d79b8369e6aa 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -844,6 +844,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, want = extent_ref_type(parent, owner); if (insert) { extra_size = btrfs_extent_inline_ref_size(want); + path->search_for_extension = 1; path->keep_locks = 1; } else extra_size = -1; @@ -996,6 +997,7 @@ again: out: if (insert) { path->keep_locks = 0; + path->search_for_extension = 0; btrfs_unlock_up_safe(path, 1); } return err; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1545c22ef280..6ccfc019ad90 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1016,8 +1016,10 @@ again: } btrfs_release_path(path); + path->search_for_extension = 1; ret = btrfs_search_slot(trans, root, &file_key, path, csum_size, 1); + path->search_for_extension = 0; if (ret < 0) goto out; -- cgit 1.4.1 From ae5e070eaca9dbebde3459dd8f4c2756f8c097d0 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 4 Dec 2020 09:24:47 +0800 Subject: btrfs: qgroup: don't try to wait flushing if we're already holding a transaction There is a chance of racing for qgroup flushing which may lead to deadlock: Thread A | Thread B (not holding trans handle) | (holding a trans handle) --------------------------------+-------------------------------- __btrfs_qgroup_reserve_meta() | __btrfs_qgroup_reserve_meta() |- try_flush_qgroup() | |- try_flush_qgroup() |- QGROUP_FLUSHING bit set | | | | |- test_and_set_bit() | | |- wait_event() |- btrfs_join_transaction() | |- btrfs_commit_transaction()| !!! DEAD LOCK !!! Since thread A wants to commit transaction, but thread B is holding a transaction handle, blocking the commit. At the same time, thread B is waiting for thread A to finish its commit. This is just a hot fix, and would lead to more EDQUOT when we're near the qgroup limit. The proper fix would be to make all metadata/data reservations happen without holding a transaction handle. CC: stable@vger.kernel.org # 5.9+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fe3046007f52..47f27658eac1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3530,16 +3530,6 @@ static int try_flush_qgroup(struct btrfs_root *root) int ret; bool can_commit = true; - /* - * We don't want to run flush again and again, so if there is a running - * one, we won't try to start a new flush, but exit directly. - */ - if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { - wait_event(root->qgroup_flush_wait, - !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); - return 0; - } - /* * If current process holds a transaction, we shouldn't flush, as we * assume all space reservation happens before a transaction handle is @@ -3554,6 +3544,26 @@ static int try_flush_qgroup(struct btrfs_root *root) current->journal_info != BTRFS_SEND_TRANS_STUB) can_commit = false; + /* + * We don't want to run flush again and again, so if there is a running + * one, we won't try to start a new flush, but exit directly. + */ + if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) { + /* + * We are already holding a transaction, thus we can block other + * threads from flushing. So exit right now. This increases + * the chance of EDQUOT for heavy load and near limit cases. + * But we can argue that if we're already near limit, EDQUOT is + * unavoidable anyway. + */ + if (!can_commit) + return 0; + + wait_event(root->qgroup_flush_wait, + !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)); + return 0; + } + ret = btrfs_start_delalloc_snapshot(root); if (ret < 0) goto out; -- cgit 1.4.1 From 0b3f407e6728d990ae1630a02c7b952c21c288d3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 10 Dec 2020 12:09:02 +0000 Subject: btrfs: send: fix wrong file path when there is an inode with a pending rmdir When doing an incremental send, if we have a new inode that happens to have the same number that an old directory inode had in the base snapshot and that old directory has a pending rmdir operation, we end up computing a wrong path for the new inode, causing the receiver to fail. Example reproducer: $ cat test-send-rmdir.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV >/dev/null mount $DEV $MNT mkdir $MNT/dir touch $MNT/dir/file1 touch $MNT/dir/file2 touch $MNT/dir/file3 # Filesystem looks like: # # . (ino 256) # |----- dir/ (ino 257) # |----- file1 (ino 258) # |----- file2 (ino 259) # |----- file3 (ino 260) # btrfs subvolume snapshot -r $MNT $MNT/snap1 btrfs send -f /tmp/snap1.send $MNT/snap1 # Now remove our directory and all its files. rm -fr $MNT/dir # Unmount the filesystem and mount it again. This is to ensure that # the next inode that is created ends up with the same inode number # that our directory "dir" had, 257, which is the first free "objectid" # available after mounting again the filesystem. umount $MNT mount $DEV $MNT # Now create a new file (it could be a directory as well). touch $MNT/newfile # Filesystem now looks like: # # . (ino 256) # |----- newfile (ino 257) # btrfs subvolume snapshot -r $MNT $MNT/snap2 btrfs send -f /tmp/snap2.send -p $MNT/snap1 $MNT/snap2 # Now unmount the filesystem, create a new one, mount it and try to apply # both send streams to recreate both snapshots. umount $DEV mkfs.btrfs -f $DEV >/dev/null mount $DEV $MNT btrfs receive -f /tmp/snap1.send $MNT btrfs receive -f /tmp/snap2.send $MNT umount $MNT When running the test, the receive operation for the incremental stream fails: $ ./test-send-rmdir.sh Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1' At subvol /mnt/sdi/snap1 Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2' At subvol /mnt/sdi/snap2 At subvol snap1 At snapshot snap2 ERROR: chown o257-9-0 failed: No such file or directory So fix this by tracking directories that have a pending rmdir by inode number and generation number, instead of only inode number. A test case for fstests follows soon. Reported-by: Massimo B. Tested-by: Massimo B. Link: https://lore.kernel.org/linux-btrfs/6ae34776e85912960a253a8327068a892998e685.camel@gmx.net/ CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index d719a2755a40..ae97f4dbaff3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -236,6 +236,7 @@ struct waiting_dir_move { * after this directory is moved, we can try to rmdir the ino rmdir_ino. */ u64 rmdir_ino; + u64 rmdir_gen; bool orphanized; }; @@ -316,7 +317,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); static struct waiting_dir_move * get_waiting_dir_move(struct send_ctx *sctx, u64 ino); -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen); static int need_send_hole(struct send_ctx *sctx) { @@ -2299,7 +2300,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) { + if (is_waiting_for_rm(sctx, ino, gen)) { ret = gen_unique_name(sctx, ino, gen, name); if (ret < 0) goto out; @@ -2858,8 +2859,8 @@ out: return ret; } -static struct orphan_dir_info * -add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 dir_gen) { struct rb_node **p = &sctx->orphan_dirs.rb_node; struct rb_node *parent = NULL; @@ -2868,20 +2869,23 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) while (*p) { parent = *p; entry = rb_entry(parent, struct orphan_dir_info, node); - if (dir_ino < entry->ino) { + if (dir_ino < entry->ino) p = &(*p)->rb_left; - } else if (dir_ino > entry->ino) { + else if (dir_ino > entry->ino) p = &(*p)->rb_right; - } else { + else if (dir_gen < entry->gen) + p = &(*p)->rb_left; + else if (dir_gen > entry->gen) + p = &(*p)->rb_right; + else return entry; - } } odi = kmalloc(sizeof(*odi), GFP_KERNEL); if (!odi) return ERR_PTR(-ENOMEM); odi->ino = dir_ino; - odi->gen = 0; + odi->gen = dir_gen; odi->last_dir_index_offset = 0; rb_link_node(&odi->node, parent, p); @@ -2889,8 +2893,8 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) return odi; } -static struct orphan_dir_info * -get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx, + u64 dir_ino, u64 gen) { struct rb_node *n = sctx->orphan_dirs.rb_node; struct orphan_dir_info *entry; @@ -2901,15 +2905,19 @@ get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) n = n->rb_left; else if (dir_ino > entry->ino) n = n->rb_right; + else if (gen < entry->gen) + n = n->rb_left; + else if (gen > entry->gen) + n = n->rb_right; else return entry; } return NULL; } -static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen) { - struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); + struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen); return odi != NULL; } @@ -2954,7 +2962,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, key.type = BTRFS_DIR_INDEX_KEY; key.offset = 0; - odi = get_orphan_dir_info(sctx, dir); + odi = get_orphan_dir_info(sctx, dir, dir_gen); if (odi) key.offset = odi->last_dir_index_offset; @@ -2985,7 +2993,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, dm = get_waiting_dir_move(sctx, loc.objectid); if (dm) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -2993,12 +3001,13 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, odi->gen = dir_gen; odi->last_dir_index_offset = found_key.offset; dm->rmdir_ino = dir; + dm->rmdir_gen = dir_gen; ret = 0; goto out; } if (loc.objectid > send_progress) { - odi = add_orphan_dir_info(sctx, dir); + odi = add_orphan_dir_info(sctx, dir, dir_gen); if (IS_ERR(odi)) { ret = PTR_ERR(odi); goto out; @@ -3038,6 +3047,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) return -ENOMEM; dm->ino = ino; dm->rmdir_ino = 0; + dm->rmdir_gen = 0; dm->orphanized = orphanized; while (*p) { @@ -3183,7 +3193,7 @@ static int path_loop(struct send_ctx *sctx, struct fs_path *name, while (ino != BTRFS_FIRST_FREE_OBJECTID) { fs_path_reset(name); - if (is_waiting_for_rm(sctx, ino)) + if (is_waiting_for_rm(sctx, ino, gen)) break; if (is_waiting_for_move(sctx, ino)) { if (*ancestor_ino == 0) @@ -3223,6 +3233,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) u64 parent_ino, parent_gen; struct waiting_dir_move *dm = NULL; u64 rmdir_ino = 0; + u64 rmdir_gen; u64 ancestor; bool is_orphan; int ret; @@ -3237,6 +3248,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); rmdir_ino = dm->rmdir_ino; + rmdir_gen = dm->rmdir_gen; is_orphan = dm->orphanized; free_waiting_dir_move(sctx, dm); @@ -3273,6 +3285,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) dm = get_waiting_dir_move(sctx, pm->ino); ASSERT(dm); dm->rmdir_ino = rmdir_ino; + dm->rmdir_gen = rmdir_gen; } goto out; } @@ -3291,7 +3304,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) struct orphan_dir_info *odi; u64 gen; - odi = get_orphan_dir_info(sctx, rmdir_ino); + odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen); if (!odi) { /* already deleted */ goto finish; -- cgit 1.4.1 From 675a4fc8f3149e93f35fb5739fd8d4764206ba0b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 15 Dec 2020 12:00:26 -0500 Subject: btrfs: tests: initialize test inodes location I noticed that sometimes the module failed to load because the self tests failed like this: BTRFS: selftest: fs/btrfs/tests/inode-tests.c:963 miscount, wanted 1, got 0 This turned out to be because sometimes the btrfs ino would be the btree inode number, and thus we'd skip calling the set extent delalloc bit helper, and thus not adjust ->outstanding_extents. Fix this by making sure we initialize test inodes with a valid inode number so that we don't get random failures during self tests. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tests/btrfs-tests.c | 10 ++++++++-- fs/btrfs/tests/inode-tests.c | 9 --------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 8ca334d554af..6bd97bd4cb37 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -55,8 +55,14 @@ struct inode *btrfs_new_test_inode(void) struct inode *inode; inode = new_inode(test_mnt->mnt_sb); - if (inode) - inode_init_owner(inode, NULL, S_IFREG); + if (!inode) + return NULL; + + inode->i_mode = S_IFREG; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; + BTRFS_I(inode)->location.offset = 0; + inode_init_owner(inode, NULL, S_IFREG); return inode; } diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 04022069761d..c9874b12d337 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -232,11 +232,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) return ret; } - inode->i_mode = S_IFREG; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; - BTRFS_I(inode)->location.offset = 0; - fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { test_std_err(TEST_ALLOC_FS_INFO); @@ -835,10 +830,6 @@ static int test_hole_first(u32 sectorsize, u32 nodesize) return ret; } - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID; - BTRFS_I(inode)->location.offset = 0; - fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize); if (!fs_info) { test_std_err(TEST_ALLOC_FS_INFO); -- cgit 1.4.1 From ea9ed87c73e87e044b2c58d658eb4ba5216bc488 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Dec 2020 15:56:20 +0000 Subject: btrfs: fix async discard stall Might happen that bg->discard_eligible_time was changed without rescheduling, so btrfs_discard_workfn() wakes up earlier than that new time, peek_discard_list() returns NULL, and all work halts and goes to sleep without further rescheduling even there are block groups to discard. It happens pretty often, but not so visible from the userspace because after some time it usually will be kicked off anyway by someone else calling btrfs_discard_reschedule_work(). Fix it by continue rescheduling if block group discard lists are not empty. Reviewed-by: Josef Bacik Signed-off-by: Pavel Begunkov Signed-off-by: David Sterba --- fs/btrfs/discard.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 1db966bf85b2..36431d7e1334 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group( static struct btrfs_block_group *peek_discard_list( struct btrfs_discard_ctl *discard_ctl, enum btrfs_discard_state *discard_state, - int *discard_index) + int *discard_index, u64 now) { struct btrfs_block_group *block_group; - const u64 now = ktime_get_ns(); spin_lock(&discard_ctl->lock); again: block_group = find_next_block_group(discard_ctl, now); - if (block_group && now > block_group->discard_eligible_time) { + if (block_group && now >= block_group->discard_eligible_time) { if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && block_group->used != 0) { if (btrfs_is_block_group_data_only(block_group)) @@ -222,12 +221,11 @@ again: block_group->discard_state = BTRFS_DISCARD_EXTENTS; } discard_ctl->block_group = block_group; + } + if (block_group) { *discard_state = block_group->discard_state; *discard_index = block_group->discard_index; - } else { - block_group = NULL; } - spin_unlock(&discard_ctl->lock); return block_group; @@ -438,13 +436,18 @@ static void btrfs_discard_workfn(struct work_struct *work) int discard_index = 0; u64 trimmed = 0; u64 minlen = 0; + u64 now = ktime_get_ns(); discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); block_group = peek_discard_list(discard_ctl, &discard_state, - &discard_index); + &discard_index, now); if (!block_group || !btrfs_run_discard_work(discard_ctl)) return; + if (now < block_group->discard_eligible_time) { + btrfs_discard_schedule_work(discard_ctl, false); + return; + } /* Perform discarding */ minlen = discard_minlen[discard_index]; -- cgit 1.4.1 From 1ea2872fc6f2aaee0a4b4f1578b83ffd9f55c6a7 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Dec 2020 15:56:21 +0000 Subject: btrfs: fix racy access to discard_ctl data Because only one discard worker may be running at any given point, it could have been safe to modify ->prev_discard, etc. without synchronization, if not for @override flag in btrfs_discard_schedule_work() and delayed_work_pending() returning false while workfn is running. That may lead to torn reads of u64 for some architectures, but that's not a big problem as only slightly affects the discard rate. Suggested-by: Josef Bacik Reviewed-by: Josef Bacik Signed-off-by: Pavel Begunkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/discard.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 36431d7e1334..d641f451f840 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -477,13 +477,6 @@ static void btrfs_discard_workfn(struct work_struct *work) discard_ctl->discard_extent_bytes += trimmed; } - /* - * Updated without locks as this is inside the workfn and nothing else - * is reading the values - */ - discard_ctl->prev_discard = trimmed; - discard_ctl->prev_discard_time = ktime_get_ns(); - /* Determine next steps for a block_group */ if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { if (discard_state == BTRFS_DISCARD_BITMAPS) { @@ -499,7 +492,10 @@ static void btrfs_discard_workfn(struct work_struct *work) } } + now = ktime_get_ns(); spin_lock(&discard_ctl->lock); + discard_ctl->prev_discard = trimmed; + discard_ctl->prev_discard_time = now; discard_ctl->block_group = NULL; spin_unlock(&discard_ctl->lock); -- cgit 1.4.1 From 8fc058597a283e9a37720abb0e8d68e342b9387d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 6 Dec 2020 15:56:22 +0000 Subject: btrfs: merge critical sections of discard lock in workfn btrfs_discard_workfn() drops discard_ctl->lock just to take it again in a moment in btrfs_discard_schedule_work(). Avoid that and also reuse ktime. Reviewed-by: Josef Bacik Signed-off-by: Pavel Begunkov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/discard.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index d641f451f840..2b8383d41144 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -328,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, btrfs_discard_schedule_work(discard_ctl, false); } -/** - * btrfs_discard_schedule_work - responsible for scheduling the discard work - * @discard_ctl: discard control - * @override: override the current timer - * - * Discards are issued by a delayed workqueue item. @override is used to - * update the current delay as the baseline delay interval is reevaluated on - * transaction commit. This is also maxed with any other rate limit. - */ -void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, - bool override) +static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, + u64 now, bool override) { struct btrfs_block_group *block_group; - const u64 now = ktime_get_ns(); - - spin_lock(&discard_ctl->lock); if (!btrfs_run_discard_work(discard_ctl)) - goto out; - + return; if (!override && delayed_work_pending(&discard_ctl->work)) - goto out; + return; block_group = find_next_block_group(discard_ctl, now); if (block_group) { @@ -391,7 +378,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, mod_delayed_work(discard_ctl->discard_workers, &discard_ctl->work, nsecs_to_jiffies(delay)); } -out: +} + +/* + * btrfs_discard_schedule_work - responsible for scheduling the discard work + * @discard_ctl: discard control + * @override: override the current timer + * + * Discards are issued by a delayed workqueue item. @override is used to + * update the current delay as the baseline delay interval is reevaluated on + * transaction commit. This is also maxed with any other rate limit. + */ +void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, + bool override) +{ + const u64 now = ktime_get_ns(); + + spin_lock(&discard_ctl->lock); + __btrfs_discard_schedule_work(discard_ctl, now, override); spin_unlock(&discard_ctl->lock); } @@ -497,9 +501,8 @@ static void btrfs_discard_workfn(struct work_struct *work) discard_ctl->prev_discard = trimmed; discard_ctl->prev_discard_time = now; discard_ctl->block_group = NULL; + __btrfs_discard_schedule_work(discard_ctl, now, false); spin_unlock(&discard_ctl->lock); - - btrfs_discard_schedule_work(discard_ctl, false); } /** -- cgit 1.4.1 From cb13eea3b49055bd78e6ddf39defd6340f7379fc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:45 +0000 Subject: btrfs: fix transaction leak and crash after RO remount caused by qgroup rescan If we remount a filesystem in RO mode while the qgroup rescan worker is running, we can end up having it still running after the remount is done, and at unmount time we may end up with an open transaction that ends up never getting committed. If that happens we end up with several memory leaks and can crash when hardware acceleration is unavailable for crc32c. Possibly it can lead to other nasty surprises too, due to use-after-free issues. The following steps explain how the problem happens. 1) We have a filesystem mounted in RW mode and the qgroup rescan worker is running; 2) We remount the filesystem in RO mode, and never stop/pause the rescan worker, so after the remount the rescan worker is still running. The important detail here is that the rescan task is still running after the remount operation committed any ongoing transaction through its call to btrfs_commit_super(); 3) The rescan is still running, and after the remount completed, the rescan worker started a transaction, after it finished iterating all leaves of the extent tree, to update the qgroup status item in the quotas tree. It does not commit the transaction, it only releases its handle on the transaction; 4) A filesystem unmount operation starts shortly after; 5) The unmount task, at close_ctree(), stops the transaction kthread, which had not had a chance to commit the open transaction since it was sleeping and the commit interval (default of 30 seconds) has not yet elapsed since the last time it committed a transaction; 6) So after stopping the transaction kthread we still have the transaction used to update the qgroup status item open. At close_ctree(), when the filesystem is in RO mode and no transaction abort happened (or the filesystem is in error mode), we do not expect to have any transaction open, so we do not call btrfs_commit_super(); 7) We then proceed to destroy the work queues, free the roots and block groups, etc. After that we drop the last reference on the btree inode by calling iput() on it. Since there are dirty pages for the btree inode, corresponding to the COWed extent buffer for the quotas btree, btree_write_cache_pages() is invoked to flush those dirty pages. This results in creating a bio and submitting it, which makes us end up at btrfs_submit_metadata_bio(); 8) At btrfs_submit_metadata_bio() we end up at the if-then-else branch that calls btrfs_wq_submit_bio(), because check_async_write() returned a value of 1. This value of 1 is because we did not have hardware acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not set in fs_info->flags; 9) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the workqueue at fs_info->workers, which was already freed before by the call to btrfs_stop_all_workers() at close_ctree(). This results in an invalid memory access due to a use-after-free, leading to a crash. When this happens, before the crash there are several warnings triggered, since we have reserved metadata space in a block group, the delayed refs reservation, etc: ------------[ cut here ]------------ WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 4 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs] Code: f0 01 00 00 48 39 c2 75 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8 RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800 RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x17f/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 48 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c6 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 2 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Code: 48 83 bb b0 03 00 00 00 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x24c/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c7 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 5 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Code: ad de 49 be 22 01 00 (...) RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206 RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246 RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c8 ]--- BTRFS info (device sdc): space_info 4 has 268238848 free, is not full BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536 BTRFS info (device sdc): global_block_rsv: size 0 reserved 0 BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0 BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0 And the crash, which only happens when we do not have crc32c hardware acceleration, produces the following trace immediately after those warnings: stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 2 PID: 1749129 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs] Code: 54 55 53 48 89 f3 (...) RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282 RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0 RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8 R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000 FS: 00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_wq_submit_bio+0xb3/0xd0 [btrfs] btrfs_submit_metadata_bio+0x44/0xc0 [btrfs] submit_one_bio+0x61/0x70 [btrfs] btree_write_cache_pages+0x414/0x450 [btrfs] ? kobject_put+0x9a/0x1d0 ? trace_hardirqs_on+0x1b/0xf0 ? _raw_spin_unlock_irqrestore+0x3c/0x60 ? free_debug_processing+0x1e1/0x2b0 do_writepages+0x43/0xe0 ? lock_acquired+0x199/0x490 __writeback_single_inode+0x59/0x650 writeback_single_inode+0xaf/0x120 write_inode_now+0x94/0xd0 iput+0x187/0x2b0 close_ctree+0x2c6/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f3cfebabee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000 RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0 R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000 R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60 Modules linked in: btrfs dm_snapshot dm_thin_pool (...) ---[ end trace dd74718fef1ed5cc ]--- Finally when we remove the btrfs module (rmmod btrfs), there are several warnings about objects that were allocated from our slabs but were never freed, consequence of the transaction that was never committed and got leaked: ============================================================================= BUG btrfs_delayed_ref_head (Tainted: G B W ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x0000000050cbdd61 @offset=12104 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x0000000086e9b0ff @offset=12776 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs] commit_cowonly_roots+0x248/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 0b (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_tree_ref (Tainted: G B W ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000001a340018 @offset=4408 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_commit_transaction+0x60/0xc40 [btrfs] create_subvol+0x56a/0x990 [btrfs] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] btrfs_ioctl+0x1a92/0x36f0 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x000000002b46292a @offset=13648 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_extent_op (Tainted: G B W ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? __mutex_unlock_slowpath+0x45/0x2a0 kmem_cache_destroy+0x55/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000004cf95ea8 @offset=6264 INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1 Fix this issue by having the remount path stop the qgroup rescan worker when we are remounting RO and teach the rescan worker to stop when a remount is in progress. If later a remount in RW mode happens, we are already resuming the qgroup rescan worker through the call to btrfs_qgroup_rescan_resume(), so we do not need to worry about that. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 13 ++++++++++--- fs/btrfs/super.c | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 47f27658eac1..808370ada888 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3190,6 +3190,12 @@ out: return ret; } +static bool rescan_should_stop(struct btrfs_fs_info *fs_info) +{ + return btrfs_fs_closing(fs_info) || + test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); +} + static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) { struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, @@ -3198,6 +3204,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) struct btrfs_trans_handle *trans = NULL; int err = -ENOMEM; int ret = 0; + bool stopped = false; path = btrfs_alloc_path(); if (!path) @@ -3210,7 +3217,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) path->skip_locking = 1; err = 0; - while (!err && !btrfs_fs_closing(fs_info)) { + while (!err && !(stopped = rescan_should_stop(fs_info))) { trans = btrfs_start_transaction(fs_info->fs_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -3253,7 +3260,7 @@ out: } mutex_lock(&fs_info->qgroup_rescan_lock); - if (!btrfs_fs_closing(fs_info)) + if (!stopped) fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN; if (trans) { ret = update_qgroup_status_item(trans); @@ -3272,7 +3279,7 @@ out: btrfs_end_transaction(trans); - if (btrfs_fs_closing(fs_info)) { + if (stopped) { btrfs_info(fs_info, "qgroup scan paused"); } else if (err >= 0) { btrfs_info(fs_info, "qgroup scan completed%s", diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 022f20810089..b24fa62375e0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1968,6 +1968,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); + /* + * Pause the qgroup rescan worker if it is running. We don't want + * it to be still running after we are in RO mode, as after that, + * by the time we unmount, it might have left a transaction open, + * so we would leak the transaction and/or crash. + */ + btrfs_qgroup_wait_for_completion(fs_info, false); + ret = btrfs_commit_super(fs_info); if (ret) goto restore; -- cgit 1.4.1 From 638331fa56caeaa8b4d31cc1dfbe0ce989bcff67 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:46 +0000 Subject: btrfs: fix transaction leak and crash after cleaning up orphans on RO mount When we delete a root (subvolume or snapshot), at the very end of the operation, we attempt to remove the root's orphan item from the root tree, at btrfs_drop_snapshot(), by calling btrfs_del_orphan_item(). We ignore any error from btrfs_del_orphan_item() since it is not a serious problem and the next time the filesystem is mounted we remove such stray orphan items at btrfs_find_orphan_roots(). However if the filesystem is mounted RO and we have stray orphan items for any previously deleted root, we can end up leaking a transaction and other data structures when unmounting the filesystem, as well as crashing if we do not have hardware acceleration for crc32c available. The steps that lead to the transaction leak are the following: 1) The filesystem is mounted in RW mode; 2) A subvolume is deleted; 3) When the cleaner kthread runs btrfs_drop_snapshot() to delete the root, it gets a failure at btrfs_del_orphan_item(), which is ignored, due to an ENOMEM when allocating a path for example. So the orphan item for the root remains in the root tree; 4) The filesystem is unmounted; 5) The filesystem is mounted RO (-o ro). During the mount path we call btrfs_find_orphan_roots(), which iterates the root tree searching for orphan items. It finds the orphan item for our deleted root, and since it can not find the root, it starts a transaction to delete the orphan item (by calling btrfs_del_orphan_item()); 6) The RO mount completes; 7) Before the transaction kthread commits the transaction created for deleting the orphan item (i.e. less than 30 seconds elapsed since the mount, the default commit interval), a filesystem unmount operation is started; 8) At close_ctree(), we stop the transaction kthread, but we still have a transaction open with at least one dirty extent buffer, a leaf for the tree root which was COWed when deleting the orphan item; 9) We then proceed to destroy the work queues, free the roots and block groups, etc. After that we drop the last reference on the btree inode by calling iput() on it. Since there are dirty pages for the btree inode, corresponding to the COWed extent buffer, btree_write_cache_pages() is invoked to flush those dirty pages. This results in creating a bio and submitting it, which makes us end up at btrfs_submit_metadata_bio(); 10) At btrfs_submit_metadata_bio() we end up at the if-then-else branch that calls btrfs_wq_submit_bio(), because check_async_write() returned a value of 1. This value of 1 is because we did not have hardware acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not set in fs_info->flags; 11) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the workqueue at fs_info->workers, which was already freed before by the call to btrfs_stop_all_workers() at close_ctree(). This results in an invalid memory access due to a use-after-free, leading to a crash. When this happens, before the crash there are several warnings triggered, since we have reserved metadata space in a block group, the delayed refs reservation, etc: ------------[ cut here ]------------ WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 4 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs] Code: f0 01 00 00 48 39 c2 75 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8 RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800 RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x17f/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 48 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c6 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 2 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Code: 48 83 bb b0 03 00 00 00 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x24c/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c7 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 5 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Code: ad de 49 be 22 01 00 (...) RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206 RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246 RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c8 ]--- BTRFS info (device sdc): space_info 4 has 268238848 free, is not full BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536 BTRFS info (device sdc): global_block_rsv: size 0 reserved 0 BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0 BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0 And the crash, which only happens when we do not have crc32c hardware acceleration, produces the following trace immediately after those warnings: stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 2 PID: 1749129 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs] Code: 54 55 53 48 89 f3 (...) RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282 RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0 RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8 R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000 FS: 00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_wq_submit_bio+0xb3/0xd0 [btrfs] btrfs_submit_metadata_bio+0x44/0xc0 [btrfs] submit_one_bio+0x61/0x70 [btrfs] btree_write_cache_pages+0x414/0x450 [btrfs] ? kobject_put+0x9a/0x1d0 ? trace_hardirqs_on+0x1b/0xf0 ? _raw_spin_unlock_irqrestore+0x3c/0x60 ? free_debug_processing+0x1e1/0x2b0 do_writepages+0x43/0xe0 ? lock_acquired+0x199/0x490 __writeback_single_inode+0x59/0x650 writeback_single_inode+0xaf/0x120 write_inode_now+0x94/0xd0 iput+0x187/0x2b0 close_ctree+0x2c6/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f3cfebabee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000 RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0 R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000 R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60 Modules linked in: btrfs dm_snapshot dm_thin_pool (...) ---[ end trace dd74718fef1ed5cc ]--- Finally when we remove the btrfs module (rmmod btrfs), there are several warnings about objects that were allocated from our slabs but were never freed, consequence of the transaction that was never committed and got leaked: ============================================================================= BUG btrfs_delayed_ref_head (Tainted: G B W ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x0000000050cbdd61 @offset=12104 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x0000000086e9b0ff @offset=12776 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs] commit_cowonly_roots+0x248/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 0b (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_tree_ref (Tainted: G B W ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000001a340018 @offset=4408 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_commit_transaction+0x60/0xc40 [btrfs] create_subvol+0x56a/0x990 [btrfs] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] btrfs_ioctl+0x1a92/0x36f0 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x000000002b46292a @offset=13648 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_extent_op (Tainted: G B W ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? __mutex_unlock_slowpath+0x45/0x2a0 kmem_cache_destroy+0x55/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000004cf95ea8 @offset=6264 INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1 So fix this by calling btrfs_find_orphan_roots() in the mount path only if we are mounting the filesystem in RW mode. It's pointless to have it called for RO mounts anyway, since despite adding any deleted roots to the list of dead roots, we will never have the roots deleted until the filesystem is remounted in RW mode, as the cleaner kthread does nothing when we are mounted in RO - btrfs_need_cleaner_sleep() always returns true and the cleaner spends all time sleeping, never cleaning dead roots. This is accomplished by moving the call to btrfs_find_orphan_roots() from open_ctree() to btrfs_start_pre_rw_mount(), which also guarantees that if later the filesystem is remounted RW, we populate the list of dead roots and have the cleaner task delete the dead roots. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 765deefda92b..e941cbae3991 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2969,6 +2969,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } + ret = btrfs_find_orphan_roots(fs_info); out: return ret; } @@ -3383,10 +3384,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } } - ret = btrfs_find_orphan_roots(fs_info); - if (ret) - goto fail_qgroup; - fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); -- cgit 1.4.1 From a0a1db70df5f48576fea6d08f0a69c05f3ab4cf4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:47 +0000 Subject: btrfs: fix race between RO remount and the cleaner task When we are remounting a filesystem in RO mode we can race with the cleaner task and result in leaking a transaction if the filesystem is unmounted shortly after, before the transaction kthread had a chance to commit that transaction. That also results in a crash during unmount, due to a use-after-free, if hardware acceleration is not available for crc32c. The following sequence of steps explains how the race happens. 1) The filesystem is mounted in RW mode and the cleaner task is running. This means that currently BTRFS_FS_CLEANER_RUNNING is set at fs_info->flags; 2) The cleaner task is currently running delayed iputs for example; 3) A filesystem RO remount operation starts; 4) The RO remount task calls btrfs_commit_super(), which commits any currently open transaction, and it finishes; 5) At this point the cleaner task is still running and it creates a new transaction by doing one of the following things: * When running the delayed iput() for an inode with a 0 link count, in which case at btrfs_evict_inode() we start a transaction through the call to evict_refill_and_join(), use it and then release its handle through btrfs_end_transaction(); * When deleting a dead root through btrfs_clean_one_deleted_snapshot(), a transaction is started at btrfs_drop_snapshot() and then its handle is released through a call to btrfs_end_transaction_throttle(); * When the remount task was still running, and before the remount task called btrfs_delete_unused_bgs(), the cleaner task also called btrfs_delete_unused_bgs() and it picked and removed one block group from the list of unused block groups. Before the cleaner task started a transaction, through btrfs_start_trans_remove_block_group() at btrfs_delete_unused_bgs(), the remount task had already called btrfs_commit_super(); 6) So at this point the filesystem is in RO mode and we have an open transaction that was started by the cleaner task; 7) Shortly after a filesystem unmount operation starts. At close_ctree() we stop the transaction kthread before it had a chance to commit the transaction, since less than 30 seconds (the default commit interval) have elapsed since the last transaction was committed; 8) We end up calling iput() against the btree inode at close_ctree() while there is an open transaction, and since that transaction was used to update btrees by the cleaner, we have dirty pages in the btree inode due to COW operations on metadata extents, and therefore writeback is triggered for the btree inode. So btree_write_cache_pages() is invoked to flush those dirty pages during the final iput() on the btree inode. This results in creating a bio and submitting it, which makes us end up at btrfs_submit_metadata_bio(); 9) At btrfs_submit_metadata_bio() we end up at the if-then-else branch that calls btrfs_wq_submit_bio(), because check_async_write() returned a value of 1. This value of 1 is because we did not have hardware acceleration available for crc32c, so BTRFS_FS_CSUM_IMPL_FAST was not set in fs_info->flags; 10) Then at btrfs_wq_submit_bio() we call btrfs_queue_work() against the workqueue at fs_info->workers, which was already freed before by the call to btrfs_stop_all_workers() at close_ctree(). This results in an invalid memory access due to a use-after-free, leading to a crash. When this happens, before the crash there are several warnings triggered, since we have reserved metadata space in a block group, the delayed refs reservation, etc: ------------[ cut here ]------------ WARNING: CPU: 4 PID: 1729896 at fs/btrfs/block-group.c:125 btrfs_put_block_group+0x63/0xa0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 4 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_put_block_group+0x63/0xa0 [btrfs] Code: f0 01 00 00 48 39 c2 75 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 0000000000000001 RBX: ffff947ed73e4000 RCX: ffff947ebc8b29c8 RDX: 0000000000000001 RSI: ffffffffc0b150a0 RDI: ffff947ebc8b2800 RBP: ffff947ebc8b2800 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e4160 R14: ffff947ebc8b2988 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f37e2893320 CR3: 0000000138f68001 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x17f/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 48 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c6 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-rsv.c:459 btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 2 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_release_global_block_rsv+0x70/0xc0 [btrfs] Code: 48 83 bb b0 03 00 00 00 (...) RSP: 0018:ffffb270826bbdd8 EFLAGS: 00010206 RAX: 000000000033c000 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffffffffc0b0d8c1 RDI: 00000000ffffffff RBP: ffff947ebc8b7000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ed73e4110 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481aca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561a79f76e20 CR3: 0000000138f68006 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0x24c/0x2f0 [btrfs] close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c7 ]--- ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1729896 at fs/btrfs/block-group.c:3377 btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Modules linked in: btrfs dm_snapshot dm_thin_pool (...) CPU: 5 PID: 1729896 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_free_block_groups+0x25d/0x2f0 [btrfs] Code: ad de 49 be 22 01 00 (...) RSP: 0018:ffffb270826bbde8 EFLAGS: 00010206 RAX: ffff947ebeae1d08 RBX: ffff947ed73e4000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff947e9d823ae8 RDI: 0000000000000246 RBP: ffff947ebeae1d08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff947ebeae1c00 R13: ffff947ed73e5278 R14: dead000000000122 R15: dead000000000100 FS: 00007f15edfea840(0000) GS:ffff9481ad200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1475d98ea8 CR3: 0000000138f68005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x2ba/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f15ee221ee7 Code: ff 0b 00 f7 d8 64 89 (...) RSP: 002b:00007ffe9470f0f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f15ee347264 RCX: 00007f15ee221ee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 000056169701d000 RBP: 0000561697018a30 R08: 0000000000000000 R09: 00007f15ee2e2be0 R10: 000056169701efe0 R11: 0000000000000246 R12: 0000000000000000 R13: 000056169701d000 R14: 0000561697018b40 R15: 0000561697018c60 irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last enabled at (0): [] copy_process+0x8a0/0x1d70 softirqs last disabled at (0): [<0000000000000000>] 0x0 ---[ end trace dd74718fef1ed5c8 ]--- BTRFS info (device sdc): space_info 4 has 268238848 free, is not full BTRFS info (device sdc): space_info total=268435456, used=114688, pinned=0, reserved=16384, may_use=0, readonly=65536 BTRFS info (device sdc): global_block_rsv: size 0 reserved 0 BTRFS info (device sdc): trans_block_rsv: size 0 reserved 0 BTRFS info (device sdc): chunk_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_block_rsv: size 0 reserved 0 BTRFS info (device sdc): delayed_refs_rsv: size 524288 reserved 0 And the crash, which only happens when we do not have crc32c hardware acceleration, produces the following trace immediately after those warnings: stack segment: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 2 PID: 1749129 Comm: umount Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:btrfs_queue_work+0x36/0x190 [btrfs] Code: 54 55 53 48 89 f3 (...) RSP: 0018:ffffb27082443ae8 EFLAGS: 00010282 RAX: 0000000000000004 RBX: ffff94810ee9ad90 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff94810ee9ad90 RDI: ffff947ed8ee75a0 RBP: a56b6b6b6b6b6b6b R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000007 R11: 0000000000000001 R12: ffff947fa9b435a8 R13: ffff94810ee9ad90 R14: 0000000000000000 R15: ffff947e93dc0000 FS: 00007f3cfe974840(0000) GS:ffff9481ac600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f1b42995a70 CR3: 0000000127638003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_wq_submit_bio+0xb3/0xd0 [btrfs] btrfs_submit_metadata_bio+0x44/0xc0 [btrfs] submit_one_bio+0x61/0x70 [btrfs] btree_write_cache_pages+0x414/0x450 [btrfs] ? kobject_put+0x9a/0x1d0 ? trace_hardirqs_on+0x1b/0xf0 ? _raw_spin_unlock_irqrestore+0x3c/0x60 ? free_debug_processing+0x1e1/0x2b0 do_writepages+0x43/0xe0 ? lock_acquired+0x199/0x490 __writeback_single_inode+0x59/0x650 writeback_single_inode+0xaf/0x120 write_inode_now+0x94/0xd0 iput+0x187/0x2b0 close_ctree+0x2c6/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f3cfebabee7 Code: ff 0b 00 f7 d8 64 89 01 (...) RSP: 002b:00007ffc9c9a05f8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 00007f3cfecd1264 RCX: 00007f3cfebabee7 RDX: ffffffffffffff78 RSI: 0000000000000000 RDI: 0000562b6b478000 RBP: 0000562b6b473a30 R08: 0000000000000000 R09: 00007f3cfec6cbe0 R10: 0000562b6b479fe0 R11: 0000000000000246 R12: 0000000000000000 R13: 0000562b6b478000 R14: 0000562b6b473b40 R15: 0000562b6b473c60 Modules linked in: btrfs dm_snapshot dm_thin_pool (...) ---[ end trace dd74718fef1ed5cc ]--- Finally when we remove the btrfs module (rmmod btrfs), there are several warnings about objects that were allocated from our slabs but were never freed, consequence of the transaction that was never committed and got leaked: ============================================================================= BUG btrfs_delayed_ref_head (Tainted: G B W ): Objects remaining in btrfs_delayed_ref_head on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000094c2ae56 objects=24 used=2 fp=0x000000002bfa2521 flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x0000000050cbdd61 @offset=12104 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1894 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=4292 cpu=2 pid=1729526 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x0000000086e9b0ff @offset=12776 INFO: Allocated in btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] age=1900 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0xbb/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] age=3141 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x1117/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_write_dirty_block_groups+0x17d/0x3d0 [btrfs] commit_cowonly_roots+0x248/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_ref_head: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x11/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 0b (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_tree_ref (Tainted: G B W ): Objects remaining in btrfs_delayed_tree_ref on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x0000000011f78dc0 objects=37 used=2 fp=0x0000000032d55d91 flags=0x17fffc000010200 CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? lock_release+0x20e/0x4c0 kmem_cache_destroy+0x55/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000001a340018 @offset=4408 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1917 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_free_tree_block+0x128/0x360 [btrfs] __btrfs_cow_block+0x489/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=4167 cpu=4 pid=1729795 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] btrfs_commit_transaction+0x60/0xc40 [btrfs] create_subvol+0x56a/0x990 [btrfs] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] btrfs_ioctl+0x1a92/0x36f0 [btrfs] __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 INFO: Object 0x000000002b46292a @offset=13648 INFO: Allocated in btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] age=1923 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_add_delayed_tree_ref+0x9e/0x480 [btrfs] btrfs_alloc_tree_block+0x2bf/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 INFO: Freed in __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] age=3164 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0x63d/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_tree_ref: Slab cache still has objects CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 btrfs_delayed_ref_exit+0x1d/0x35 [btrfs] exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 ============================================================================= BUG btrfs_delayed_extent_op (Tainted: G B W ): Objects remaining in btrfs_delayed_extent_op on __kmem_cache_shutdown() ----------------------------------------------------------------------------- INFO: Slab 0x00000000f145ce2f objects=22 used=1 fp=0x00000000af0f92cf flags=0x17fffc000010200 CPU: 5 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 slab_err+0xb7/0xdc ? lock_acquired+0x199/0x490 __kmem_cache_shutdown+0x1ac/0x3c0 ? __mutex_unlock_slowpath+0x45/0x2a0 kmem_cache_destroy+0x55/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 f5 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 INFO: Object 0x000000004cf95ea8 @offset=6264 INFO: Allocated in btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] age=1931 cpu=6 pid=1729873 __slab_alloc.isra.0+0x109/0x1c0 kmem_cache_alloc+0x7bb/0x830 btrfs_alloc_tree_block+0x1e0/0x360 [btrfs] alloc_tree_block_no_bg_flush+0x4f/0x60 [btrfs] __btrfs_cow_block+0x12d/0x5f0 [btrfs] btrfs_cow_block+0xf7/0x220 [btrfs] btrfs_search_slot+0x62a/0xc40 [btrfs] btrfs_del_orphan_item+0x65/0xd0 [btrfs] btrfs_find_orphan_roots+0x1bf/0x200 [btrfs] open_ctree+0x125a/0x18a0 [btrfs] btrfs_mount_root.cold+0x13/0xed [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xe0 fc_mount+0xe/0x40 vfs_kern_mount.part.0+0x71/0x90 btrfs_mount+0x13b/0x3e0 [btrfs] INFO: Freed in __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] age=3173 cpu=6 pid=1729803 kmem_cache_free+0x34c/0x3c0 __btrfs_run_delayed_refs+0xabd/0x1290 [btrfs] btrfs_run_delayed_refs+0x81/0x210 [btrfs] commit_cowonly_roots+0xfb/0x300 [btrfs] btrfs_commit_transaction+0x367/0xc40 [btrfs] close_ctree+0x113/0x2fa [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x68/0xb0 exit_to_user_mode_prepare+0x1bb/0x1c0 syscall_exit_to_user_mode+0x4b/0x260 entry_SYSCALL_64_after_hwframe+0x44/0xa9 kmem_cache_destroy btrfs_delayed_extent_op: Slab cache still has objects CPU: 3 PID: 1729921 Comm: rmmod Tainted: G B W 5.10.0-rc4-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x8d/0xb5 kmem_cache_destroy+0x119/0x120 exit_btrfs_fs+0xa/0x59 [btrfs] __x64_sys_delete_module+0x194/0x260 ? fpregs_assert_state_consistent+0x1e/0x40 ? exit_to_user_mode_prepare+0x55/0x1c0 ? trace_hardirqs_on+0x1b/0xf0 do_syscall_64+0x33/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f693e305897 Code: 73 01 c3 48 8b 0d f9 (...) RSP: 002b:00007ffcf73eb508 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 0000559df504f760 RCX: 00007f693e305897 RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559df504f7c8 RBP: 00007ffcf73eb568 R08: 0000000000000000 R09: 0000000000000000 R10: 00007f693e378ac0 R11: 0000000000000206 R12: 00007ffcf73eb740 R13: 00007ffcf73ec5a6 R14: 0000559df504f2a0 R15: 0000559df504f760 BTRFS: state leak: start 30408704 end 30425087 state 1 in tree 1 refs 1 So fix this by making the remount path to wait for the cleaner task before calling btrfs_commit_super(). The remount path now waits for the bit BTRFS_FS_CLEANER_RUNNING to be cleared from fs_info->flags before calling btrfs_commit_super() and this ensures the cleaner can not start a transaction after that, because it sleeps when the filesystem is in RO mode and we have already flagged the filesystem as RO before waiting for BTRFS_FS_CLEANER_RUNNING to be cleared. This also introduces a new flag BTRFS_FS_STATE_RO to be used for fs_info->fs_state when the filesystem is in RO mode. This is because we were doing the RO check using the flags of the superblock and setting the RO mode simply by ORing into the superblock's flags - those operations are not atomic and could result in the cleaner not seeing the update from the remount task after it clears BTRFS_FS_CLEANER_RUNNING. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 20 +++++++++++++++++++- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/super.c | 22 +++++++++++++++++++--- fs/btrfs/volumes.c | 4 ++-- 4 files changed, 44 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3935d297d198..0225c5208f44 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -132,6 +132,8 @@ enum { * defrag */ BTRFS_FS_STATE_REMOUNTING, + /* Filesystem in RO mode */ + BTRFS_FS_STATE_RO, /* Track if a transaction abort has been reported on this filesystem */ BTRFS_FS_STATE_TRANS_ABORTED, /* @@ -2892,10 +2894,26 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) * If we remount the fs to be R/O or umount the fs, the cleaner needn't do * anything except sleeping. This function is used to check the status of * the fs. + * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, + * since setting and checking for SB_RDONLY in the superblock's flags is not + * atomic. */ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) { - return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info); + return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || + btrfs_fs_closing(fs_info); +} + +static inline void btrfs_set_sb_rdonly(struct super_block *sb) +{ + sb->s_flags |= SB_RDONLY; + set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); +} + +static inline void btrfs_clear_sb_rdonly(struct super_block *sb) +{ + sb->s_flags &= ~SB_RDONLY; + clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); } /* tree mod log functions from ctree.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e941cbae3991..e7bcbd0b93ef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1729,7 +1729,7 @@ static int cleaner_kthread(void *arg) */ btrfs_delete_unused_bgs(fs_info); sleep: - clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); + clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); if (kthread_should_park()) kthread_parkme(); if (kthread_should_stop()) @@ -2830,6 +2830,9 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block return -ENOMEM; btrfs_init_delayed_root(fs_info->delayed_root); + if (sb_rdonly(sb)) + set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); + return btrfs_alloc_stripe_hash_table(fs_info); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b24fa62375e0..38740cc2919f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -175,7 +175,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function btrfs_discard_stop(fs_info); /* btrfs handle error by forcing the filesystem readonly */ - sb->s_flags |= SB_RDONLY; + btrfs_set_sb_rdonly(sb); btrfs_info(fs_info, "forced readonly"); /* * Note that a running device replace operation is not canceled here @@ -1953,7 +1953,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) /* avoid complains from lockdep et al. */ up(&fs_info->uuid_tree_rescan_sem); - sb->s_flags |= SB_RDONLY; + btrfs_set_sb_rdonly(sb); /* * Setting SB_RDONLY will put the cleaner thread to @@ -1964,6 +1964,20 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) */ btrfs_delete_unused_bgs(fs_info); + /* + * The cleaner task could be already running before we set the + * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). + * We must make sure that after we finish the remount, i.e. after + * we call btrfs_commit_super(), the cleaner can no longer start + * a transaction - either because it was dropping a dead root, + * running delayed iputs or deleting an unused block group (the + * cleaner picked a block group from the list of unused block + * groups before we were able to in the previous call to + * btrfs_delete_unused_bgs()). + */ + wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, + TASK_UNINTERRUPTIBLE); + btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); @@ -2014,7 +2028,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; - sb->s_flags &= ~SB_RDONLY; + btrfs_clear_sb_rdonly(sb); set_bit(BTRFS_FS_OPEN, &fs_info->flags); } @@ -2036,6 +2050,8 @@ restore: /* We've hit an error - don't reset SB_RDONLY */ if (sb_rdonly(sb)) old_flags |= SB_RDONLY; + if (!(old_flags & SB_RDONLY)) + clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); sb->s_flags = old_flags; fs_info->mount_opt = old_opts; fs_info->compress_type = old_compress_type; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7930e1c78c45..2c0aa03b6437 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2593,7 +2593,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); if (seeding_dev) { - sb->s_flags &= ~SB_RDONLY; + btrfs_clear_sb_rdonly(sb); ret = btrfs_prepare_sprout(fs_info); if (ret) { btrfs_abort_transaction(trans, ret); @@ -2729,7 +2729,7 @@ error_sysfs: mutex_unlock(&fs_info->fs_devices->device_list_mutex); error_trans: if (seeding_dev) - sb->s_flags |= SB_RDONLY; + btrfs_set_sb_rdonly(sb); if (trans) btrfs_end_transaction(trans); error_free_zone: -- cgit 1.4.1 From 0a31daa4b602ff6861fdf182236d64b2a353bace Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:48 +0000 Subject: btrfs: add assertion for empty list of transactions at late stage of umount Add an assertion to close_ctree(), after destroying all the work queues, to verify we do not have any transaction still open or committing at that at that point. If we have any, it means something is seriously wrong and that can cause memory leaks and use-after-free problems. This is motivated by the previous patches that fixed bugs where we ended up leaking an open transaction after unmounting the filesystem. Tested-by: Fabian Vogt Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e7bcbd0b93ef..1dfd4b2d0e1e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4181,6 +4181,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); + /* We shouldn't have any transaction open at this point */ + ASSERT(list_empty(&fs_info->trans_list)); + clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); btrfs_free_fs_roots(fs_info); -- cgit 1.4.1 From a8cc263eb58ca133617662a5a5e07131d0ebf299 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 14 Dec 2020 10:10:49 +0000 Subject: btrfs: run delayed iputs when remounting RO to avoid leaking them When remounting RO, after setting the superblock with the RO flag, the cleaner task will start sleeping and do nothing, since the call to btrfs_need_cleaner_sleep() keeps returning 'true'. However, when the cleaner task goes to sleep, the list of delayed iputs may not be empty. As long as we are in RO mode, the cleaner task will keep sleeping and never run the delayed iputs. This means that if a filesystem unmount is started, we get into close_ctree() with a non-empty list of delayed iputs, and because the filesystem is in RO mode and is not in an error state (or a transaction aborted), btrfs_error_commit_super() and btrfs_commit_super(), which run the delayed iputs, are never called, and later we fail the assertion that checks if the delayed iputs list is empty: assertion failed: list_empty(&fs_info->delayed_iputs), in fs/btrfs/disk-io.c:4049 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3153! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI CPU: 1 PID: 3780621 Comm: umount Tainted: G L 5.6.0-rc2-btrfs-next-73 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 RIP: 0010:assertfail.constprop.0+0x18/0x26 [btrfs] Code: 8b 7b 58 48 85 ff 74 (...) RSP: 0018:ffffb748c89bbdf8 EFLAGS: 00010246 RAX: 0000000000000051 RBX: ffff9608f2584000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff91998988 RDI: 00000000ffffffff RBP: ffff9608f25870d8 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffffc0cbc500 R13: ffffffff92411750 R14: 0000000000000000 R15: ffff9608f2aab250 FS: 00007fcbfaa66c80(0000) GS:ffff960936c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffc2c2dd38 CR3: 0000000235e54002 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: close_ctree+0x1a2/0x2e6 [btrfs] generic_shutdown_super+0x6c/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x70 cleanup_mnt+0x100/0x160 task_work_run+0x93/0xc0 exit_to_usermode_loop+0xf9/0x100 do_syscall_64+0x20d/0x260 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fcbfaca6307 Code: eb 0b 00 f7 d8 64 89 (...) RSP: 002b:00007fffc2c2ed68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000558203b559b0 RCX: 00007fcbfaca6307 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000558203b55bc0 RBP: 0000000000000000 R08: 0000000000000001 R09: 00007fffc2c2dad0 R10: 0000558203b55bf0 R11: 0000000000000246 R12: 0000558203b55bc0 R13: 00007fcbfadcc204 R14: 0000558203b55aa8 R15: 0000000000000000 Modules linked in: btrfs dm_flakey dm_log_writes (...) ---[ end trace d44d303790049ef6 ]--- So fix this by making the remount RO path run any remaining delayed iputs after waiting for the cleaner to become inactive. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 38740cc2919f..12d7d3be7cd4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1978,6 +1978,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, TASK_UNINTERRUPTIBLE); + /* + * We've set the superblock to RO mode, so we might have made + * the cleaner task sleep without running all pending delayed + * iputs. Go through all the delayed iputs here, so that if an + * unmount happens without remounting RW we don't end up at + * finishing close_ctree() with a non-empty list of delayed + * iputs. + */ + btrfs_run_delayed_iputs(fs_info); + btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); -- cgit 1.4.1 From f09ced4053bc0a2094a12b60b646114c966ef4c6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 18 Dec 2020 14:45:24 +0100 Subject: xsk: Fix race in SKB mode transmit with shared cq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a race when multiple sockets are simultaneously calling sendto() when the completion ring is shared in the SKB case. This is the case when you share the same netdev and queue id through the XDP_SHARED_UMEM bind flag. The problem is that multiple processes can be in xsk_generic_xmit() and call the backpressure mechanism in xskq_prod_reserve(xs->pool->cq). As this is a shared resource in this specific scenario, a race might occur since the rings are single-producer single-consumer. Fix this by moving the tx_completion_lock from the socket to the pool as the pool is shared between the sockets that share the completion ring. (The pool is not shared when this is not the case.) And then protect the accesses to xskq_prod_reserve() with this lock. The tx_completion_lock is renamed cq_lock to better reflect that it protects accesses to the potentially shared completion ring. Fixes: 35fcde7f8deb ("xsk: support for Tx") Reported-by: Xuan Zhuo Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201218134525.13119-2-magnus.karlsson@gmail.com --- include/net/xdp_sock.h | 4 ---- include/net/xsk_buff_pool.h | 5 +++++ net/xdp/xsk.c | 9 ++++++--- net/xdp/xsk_buff_pool.c | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 4f4e93bf814c..cc17bc957548 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -58,10 +58,6 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head tx_list; - /* Mutual exclusion of NAPI TX thread and sendmsg error paths - * in the SKB destructor callback. - */ - spinlock_t tx_completion_lock; /* Protects generic receive. */ spinlock_t rx_lock; diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 01755b838c74..eaa8386dbc63 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -73,6 +73,11 @@ struct xsk_buff_pool { bool dma_need_sync; bool unaligned; void *addrs; + /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: + * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when + * sockets share a single cq when the same netdev and queue id is shared. + */ + spinlock_t cq_lock; struct xdp_buff_xsk *free_heads[]; }; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c6532d77fde7..d531f9cd0de6 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -423,9 +423,9 @@ static void xsk_destruct_skb(struct sk_buff *skb) struct xdp_sock *xs = xdp_sk(skb->sk); unsigned long flags; - spin_lock_irqsave(&xs->tx_completion_lock, flags); + spin_lock_irqsave(&xs->pool->cq_lock, flags); xskq_prod_submit_addr(xs->pool->cq, addr); - spin_unlock_irqrestore(&xs->tx_completion_lock, flags); + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); sock_wfree(skb); } @@ -437,6 +437,7 @@ static int xsk_generic_xmit(struct sock *sk) bool sent_frame = false; struct xdp_desc desc; struct sk_buff *skb; + unsigned long flags; int err = 0; mutex_lock(&xs->mutex); @@ -468,10 +469,13 @@ static int xsk_generic_xmit(struct sock *sk) * if there is space in it. This avoids having to implement * any buffering in the Tx path. */ + spin_lock_irqsave(&xs->pool->cq_lock, flags); if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) { + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); kfree_skb(skb); goto out; } + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); skb->dev = xs->dev; skb->priority = sk->sk_priority; @@ -1303,7 +1307,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, xs->state = XSK_READY; mutex_init(&xs->mutex); spin_lock_init(&xs->rx_lock); - spin_lock_init(&xs->tx_completion_lock); INIT_LIST_HEAD(&xs->map_list); spin_lock_init(&xs->map_list_lock); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 818b75060922..20598eea658c 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -71,6 +71,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xsk_tx_list); spin_lock_init(&pool->xsk_tx_list_lock); + spin_lock_init(&pool->cq_lock); refcount_set(&pool->users, 1); pool->fq = xs->fq_tmp; -- cgit 1.4.1 From b1b95cb5c0a9694d47d5f845ba97e226cfda957d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 18 Dec 2020 14:45:25 +0100 Subject: xsk: Rollback reservation at NETDEV_TX_BUSY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rollback the reservation in the completion ring when we get a NETDEV_TX_BUSY. When this error is received from the driver, we are supposed to let the user application retry the transmit again. And in order to do this, we need to roll back the failed send so it can be retried. Unfortunately, we did not cancel the reservation we had made in the completion ring. By not doing this, we actually make the completion ring one entry smaller per NETDEV_TX_BUSY error we get, and after enough of these errors the completion ring will be of size zero and transmit will stop working. Fix this by cancelling the reservation when we get a NETDEV_TX_BUSY error. Fixes: 642e450b6b59 ("xsk: Do not discard packet when NETDEV_TX_BUSY") Reported-by: Xuan Zhuo Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20201218134525.13119-3-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 3 +++ net/xdp/xsk_queue.h | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d531f9cd0de6..8037b04a9edd 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -487,6 +487,9 @@ static int xsk_generic_xmit(struct sock *sk) if (err == NETDEV_TX_BUSY) { /* Tell user-space to retry the send */ skb->destructor = sock_wfree; + spin_lock_irqsave(&xs->pool->cq_lock, flags); + xskq_prod_cancel(xs->pool->cq); + spin_unlock_irqrestore(&xs->pool->cq_lock, flags); /* Free skb without triggering the perf drop trace */ consume_skb(skb); err = -EAGAIN; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 4a9663aa7afe..2823b7c3302d 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -334,6 +334,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q) return xskq_prod_nb_free(q, 1) ? false : true; } +static inline void xskq_prod_cancel(struct xsk_queue *q) +{ + q->cached_prod--; +} + static inline int xskq_prod_reserve(struct xsk_queue *q) { if (xskq_prod_is_full(q)) -- cgit 1.4.1 From e79bb299ccad6983876686a4d8c87c92ebbe5657 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 14 Dec 2020 22:35:39 +0000 Subject: selftests/bpf: Fix spelling mistake "tranmission" -> "transmission" There are two spelling mistakes in output messages. Fix these. Signed-off-by: Colin Ian King Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201214223539.83168-1-colin.king@canonical.com --- tools/testing/selftests/bpf/xdpxceiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 014dedaa4dd2..1e722ee76b1f 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -715,7 +715,7 @@ static void worker_pkt_dump(void) int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE)); if (payload == EOT) { - ksft_print_msg("End-of-tranmission frame received\n"); + ksft_print_msg("End-of-transmission frame received\n"); fprintf(stdout, "---------------------------------------\n"); break; } @@ -747,7 +747,7 @@ static void worker_pkt_validate(void) } if (payloadseqnum == EOT) { - ksft_print_msg("End-of-tranmission frame received: PASS\n"); + ksft_print_msg("End-of-transmission frame received: PASS\n"); sigvar = 1; break; } -- cgit 1.4.1 From d467d80dc399ba77875d647f2f37b7d1a70d94c2 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 16 Dec 2020 10:47:15 +0800 Subject: bpf: Remove unused including Remove including that don't need it. Signed-off-by: Tian Tao Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1608086835-54523-1-git-send-email-tiantao6@hisilicon.com --- kernel/bpf/syscall.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 287be337d5f6..bb2700ec5bf3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- cgit 1.4.1 From 72d78717c6d06adf65d2e3dccc96d9e9dc978593 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 11 Dec 2020 12:26:14 -0500 Subject: nfsd: Fixes for nfsd4_encode_read_plus_data() Ensure that we encode the data payload + padding, and that we truncate the preallocated buffer to the actual read size. Fixes: 528b84934eb9 ("NFSD: Add READ_PLUS data support") Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 45ee6b12ce5b..5dacc673ef17 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4756,6 +4756,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); tmp = htonl(NFS4_CONTENT_DATA); write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); @@ -4763,6 +4764,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); tmp = htonl(*maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); + + tmp = xdr_zero; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, + xdr_pad_size(*maxcount)); return nfs_ok; } -- cgit 1.4.1 From b68f0cbd3f95f2df81e525c310a41fc73c2ed0d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 11 Dec 2020 12:26:15 -0500 Subject: nfsd: Don't set eof on a truncated READ_PLUS If the READ_PLUS operation was truncated due to an error, then ensure we clear the 'eof' flag. Fixes: 9f0b5792f07d ("NFSD: Encode a full READ_PLUS reply") Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5dacc673ef17..20178f60f612 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4860,14 +4860,15 @@ out: if (nfserr && segments == 0) xdr_truncate_encode(xdr, starting_len); else { - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); if (nfserr) { xdr_truncate_encode(xdr, last_segment); nfserr = nfs_ok; + eof = 0; } + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp = htonl(segments); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); } return nfserr; -- cgit 1.4.1 From d6c9e4368cc6a61bf25c9c72437ced509c854563 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:23 -0500 Subject: NFSD: Fix sparse warning in nfssvc.c fs/nfsd/nfssvc.c:36:6: warning: symbol 'inter_copy_offload_enable' was not declared. Should it be static? The parameter was added by commit ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy"). Relocate it into the source file that uses it, and make it static. This approach is similar to the nfs4_disable_idmapping, cltrack_prog, and cltrack_legacy_disable module parameters. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 5 +++++ fs/nfsd/nfssvc.c | 6 ------ fs/nfsd/xdr4.h | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4727b7f03c5b..8d6d2678abad 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -50,6 +50,11 @@ #include "pnfs.h" #include "trace.h" +static bool inter_copy_offload_enable; +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #include diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 00384c332f9b..f9c9f4c63cc7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -33,12 +33,6 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC -bool inter_copy_offload_enable; -EXPORT_SYMBOL_GPL(inter_copy_offload_enable); -module_param(inter_copy_offload_enable, bool, 0644); -MODULE_PARM_DESC(inter_copy_offload_enable, - "Enable inter server to server copy offload. Default: false"); - extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index a60ff5ce1a37..c300885ae75d 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -568,7 +568,6 @@ struct nfsd4_copy { struct nfs_fh c_fh; nfs4_stateid stateid; }; -extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ -- cgit 1.4.1 From 4a85a6a3320b4a622315d2e0ea91a1d2b013bce4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:41 -0500 Subject: SUNRPC: Handle TCP socket sends with kernel_sendpage() again Daire Byrne reports a ~50% aggregrate throughput regression on his Linux NFS server after commit da1661b93bf4 ("SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends"), which replaced kernel_send_page() calls in NFSD's socket send path with calls to sock_sendmsg() using iov_iter. Investigation showed that tcp_sendmsg() was not using zero-copy to send the xdr_buf's bvec pages, but instead was relying on memcpy. This means copying every byte of a large NFS READ payload. It looks like TLS sockets do indeed support a ->sendpage method, so it's really not necessary to use xprt_sock_sendmsg() to support TLS fully on the server. A mechanical reversion of da1661b93bf4 is not possible at this point, but we can re-implement the server's TCP socket sendmsg path using kernel_sendpage(). Reported-by: Daire Byrne BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209439 Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b248f2349437..c9766d07eb81 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1062,6 +1062,90 @@ err_noclose: return 0; /* record not complete */ } +static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, + int flags) +{ + return kernel_sendpage(sock, virt_to_page(vec->iov_base), + offset_in_page(vec->iov_base), + vec->iov_len, flags); +} + +/* + * kernel_sendpage() is used exclusively to reduce the number of + * copy operations in this path. Therefore the caller must ensure + * that the pages backing @xdr are unchanging. + * + * In addition, the logic assumes that * .bv_len is never larger + * than PAGE_SIZE. + */ +static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, + struct xdr_buf *xdr, rpc_fraghdr marker, + unsigned int *sentp) +{ + const struct kvec *head = xdr->head; + const struct kvec *tail = xdr->tail; + struct kvec rm = { + .iov_base = &marker, + .iov_len = sizeof(marker), + }; + int flags, ret; + + *sentp = 0; + xdr_alloc_bvec(xdr, GFP_KERNEL); + + msg->msg_flags = MSG_MORE; + ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != rm.iov_len) + return -EAGAIN; + + flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0; + ret = svc_tcp_send_kvec(sock, head, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != head->iov_len) + goto out; + + if (xdr->page_len) { + unsigned int offset, len, remaining; + struct bio_vec *bvec; + + bvec = xdr->bvec; + offset = xdr->page_base; + remaining = xdr->page_len; + flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; + while (remaining > 0) { + if (remaining <= PAGE_SIZE && tail->iov_len == 0) + flags = 0; + len = min(remaining, bvec->bv_len); + ret = kernel_sendpage(sock, bvec->bv_page, + bvec->bv_offset + offset, + len, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != len) + goto out; + remaining -= len; + offset = 0; + bvec++; + } + } + + if (tail->iov_len) { + ret = svc_tcp_send_kvec(sock, tail, 0); + if (ret < 0) + return ret; + *sentp += ret; + } + +out: + return 0; +} + /** * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst @@ -1089,7 +1173,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) goto out_notconn; - err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); + err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent); xdr_free_bvec(xdr); trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) -- cgit 1.4.1 From 7b723008f9c95624c848fad661c01b06e47b20da Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:58 -0500 Subject: NFSD: Restore NFSv4 decoding's SAVEMEM functionality While converting the NFSv4 decoder to use xdr_stream-based XDR processing, I removed the old SAVEMEM() macro. This macro wrapped a bit of logic that avoided a memory allocation by recognizing when the decoded item resides in a linear section of the Receive buffer. In that case, it returned a pointer into that buffer instead of allocating a bounce buffer. The bounce buffer is necessary only when xdr_inline_decode() has placed the decoded item in the xdr_stream's scratch buffer, which disappears the next time xdr_inline_decode() is called with that xdr_stream. That happens only if the data item crosses a page boundary in the receive buffer, an exceedingly rare occurrence. Allocating a bounce buffer every time results in a minor performance regression that was introduced by the recent NFSv4 decoder overhaul. Let's restore the previous behavior. On average, it saves about 1.5 kmalloc() calls per COMPOUND. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20178f60f612..eaaa1605b5b5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -147,6 +147,25 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) return p; } +static void * +svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len) +{ + __be32 *tmp; + + /* + * The location of the decoded data item is stable, + * so @p is OK to use. This is the common case. + */ + if (p != argp->xdr->scratch.iov_base) + return p; + + tmp = svcxdr_tmpalloc(argp, len); + if (!tmp) + return NULL; + memcpy(tmp, p, len); + return tmp; +} + /* * NFSv4 basic data type decoders */ @@ -183,11 +202,10 @@ nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) p = xdr_inline_decode(argp->xdr, len); if (!p) return nfserr_bad_xdr; - o->data = svcxdr_tmpalloc(argp, len); + o->data = svcxdr_savemem(argp, p, len); if (!o->data) return nfserr_jukebox; o->len = len; - memcpy(o->data, p, len); return nfs_ok; } @@ -205,10 +223,9 @@ nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) status = check_filename((char *)p, *lenp); if (status) return status; - *namp = svcxdr_tmpalloc(argp, *lenp); + *namp = svcxdr_savemem(argp, p, *lenp); if (!*namp) return nfserr_jukebox; - memcpy(*namp, p, *lenp); return nfs_ok; } @@ -1200,10 +1217,9 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen); if (!p) return nfserr_bad_xdr; - putfh->pf_fhval = svcxdr_tmpalloc(argp, putfh->pf_fhlen); + putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen); if (!putfh->pf_fhval) return nfserr_jukebox; - memcpy(putfh->pf_fhval, p, putfh->pf_fhlen); return nfs_ok; } @@ -1318,24 +1334,20 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len); if (!p) return nfserr_bad_xdr; - setclientid->se_callback_netid_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_netid_val = svcxdr_savemem(argp, p, setclientid->se_callback_netid_len); if (!setclientid->se_callback_netid_val) return nfserr_jukebox; - memcpy(setclientid->se_callback_netid_val, p, - setclientid->se_callback_netid_len); if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0) return nfserr_bad_xdr; p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len); if (!p) return nfserr_bad_xdr; - setclientid->se_callback_addr_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_addr_val = svcxdr_savemem(argp, p, setclientid->se_callback_addr_len); if (!setclientid->se_callback_addr_val) return nfserr_jukebox; - memcpy(setclientid->se_callback_addr_val, p, - setclientid->se_callback_addr_len); if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0) return nfserr_bad_xdr; @@ -1375,10 +1387,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify p = xdr_inline_decode(argp->xdr, verify->ve_attrlen); if (!p) return nfserr_bad_xdr; - verify->ve_attrval = svcxdr_tmpalloc(argp, verify->ve_attrlen); + verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen); if (!verify->ve_attrval) return nfserr_jukebox; - memcpy(verify->ve_attrval, p, verify->ve_attrlen); return nfs_ok; } @@ -2333,10 +2344,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) p = xdr_inline_decode(argp->xdr, argp->taglen); if (!p) return 0; - argp->tag = svcxdr_tmpalloc(argp, argp->taglen); + argp->tag = svcxdr_savemem(argp, p, argp->taglen); if (!argp->tag) return 0; - memcpy(argp->tag, p, argp->taglen); max_reply += xdr_align_size(argp->taglen); } -- cgit 1.4.1 From 4aa1464acbe3697710279a4bd65cb4801ed30425 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:11 -0800 Subject: spi: spi-geni-qcom: Fix geni_spi_isr() NULL dereference in timeout case In commit 7ba9bdcb91f6 ("spi: spi-geni-qcom: Don't keep a local state variable") we changed handle_fifo_timeout() so that we set "mas->cur_xfer" to NULL to make absolutely sure that we don't mess with the buffers from the previous transfer in the timeout case. Unfortunately, this caused the IRQ handler to dereference NULL in some cases. One case: CPU0 CPU1 ---- ---- setup_fifo_xfer() geni_se_setup_m_cmd() ... handle_fifo_timeout() spin_lock_irq(mas->lock) mas->cur_xfer = NULL geni_se_cancel_m_cmd() spin_unlock_irq(mas->lock) geni_spi_isr() spin_lock(mas->lock) if (m_irq & M_RX_FIFO_WATERMARK_EN) geni_spi_handle_rx() mas->cur_xfer NULL dereference! tl;dr: Seriously delayed interrupts for RX/TX can lead to timeout handling setting mas->cur_xfer to NULL. Let's check for the NULL transfer in the TX and RX cases and reset the watermark or clear out the fifo respectively to put the hardware back into a sane state. NOTE: things still could get confused if we get timeouts all the way through handle_fifo_timeout() and then start a new transfer because interrupts from the old transfer / cancel / abort could still be pending. A future patch will help this corner case. Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.1.I99ee04f0cb823415df59bd4f550d6ff5756e43d6@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 512e925d5ea4..096edfbde451 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -354,6 +354,12 @@ static bool geni_spi_handle_tx(struct spi_geni_master *mas) unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas); unsigned int i = 0; + /* Stop the watermark IRQ if nothing to send */ + if (!mas->cur_xfer) { + writel(0, se->base + SE_GENI_TX_WATERMARK_REG); + return false; + } + max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word; if (mas->tx_rem_bytes < max_bytes) max_bytes = mas->tx_rem_bytes; @@ -396,6 +402,14 @@ static void geni_spi_handle_rx(struct spi_geni_master *mas) if (rx_last_byte_valid && rx_last_byte_valid < 4) rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid; } + + /* Clear out the FIFO and bail if nowhere to put it */ + if (!mas->cur_xfer) { + for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++) + readl(se->base + SE_GENI_RX_FIFOn); + return; + } + if (mas->rx_rem_bytes < rx_bytes) rx_bytes = mas->rx_rem_bytes; -- cgit 1.4.1 From 690d8b917bbe64772cb0b652311bcd50908aea6b Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:12 -0800 Subject: spi: spi-geni-qcom: Fail new xfers if xfer/cancel/abort pending If we got a timeout when trying to send an abort command then it means that we just got 3 timeouts in a row: 1. The original timeout that caused handle_fifo_timeout() to be called. 2. A one second timeout waiting for the cancel command to finish. 3. A one second timeout waiting for the abort command to finish. SPI is clocked by the controller, so nothing (aside from a hardware fault or a totally broken sequencer) should be causing the actual commands to fail in hardware. However, even though the hardware itself is not expected to fail (and it'd be hard to predict how we should handle things if it did), it's easy to hit the timeout case by simply blocking our interrupt handler from running for a long period of time. Obviously the system is in pretty bad shape if a interrupt handler is blocked for > 2 seconds, but there are certainly bugs (even bugs in other unrelated drivers) that can make this happen. Let's make things a bit more robust against this case. If we fail to abort we'll set a flag and then we'll block all future transfers until we have no more interrupts pending. Fixes: 561de45f72bd ("spi: spi-geni-qcom: Add SPI driver support for GENI based QUP") Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.2.Ibade998ed587e070388b4bf58801f1107a40eb53@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 59 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 096edfbde451..32c053705dec 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -83,6 +83,7 @@ struct spi_geni_master { spinlock_t lock; int irq; bool cs_flag; + bool abort_failed; }; static int get_spi_clk_cfg(unsigned int speed_hz, @@ -141,8 +142,49 @@ static void handle_fifo_timeout(struct spi_master *spi, spin_unlock_irq(&mas->lock); time_left = wait_for_completion_timeout(&mas->abort_done, HZ); - if (!time_left) + if (!time_left) { dev_err(mas->dev, "Failed to cancel/abort m_cmd\n"); + + /* + * No need for a lock since SPI core has a lock and we never + * access this from an interrupt. + */ + mas->abort_failed = true; + } +} + +static bool spi_geni_is_abort_still_pending(struct spi_geni_master *mas) +{ + struct geni_se *se = &mas->se; + u32 m_irq, m_irq_en; + + if (!mas->abort_failed) + return false; + + /* + * The only known case where a transfer times out and then a cancel + * times out then an abort times out is if something is blocking our + * interrupt handler from running. Avoid starting any new transfers + * until that sorts itself out. + */ + spin_lock_irq(&mas->lock); + m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS); + m_irq_en = readl(se->base + SE_GENI_M_IRQ_EN); + spin_unlock_irq(&mas->lock); + + if (m_irq & m_irq_en) { + dev_err(mas->dev, "Interrupts pending after abort: %#010x\n", + m_irq & m_irq_en); + return true; + } + + /* + * If we're here the problem resolved itself so no need to check more + * on future transfers. + */ + mas->abort_failed = false; + + return false; } static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) @@ -158,9 +200,15 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) if (set_flag == mas->cs_flag) return; + pm_runtime_get_sync(mas->dev); + + if (spi_geni_is_abort_still_pending(mas)) { + dev_err(mas->dev, "Can't set chip select\n"); + goto exit; + } + mas->cs_flag = set_flag; - pm_runtime_get_sync(mas->dev); spin_lock_irq(&mas->lock); reinit_completion(&mas->cs_done); if (set_flag) @@ -173,6 +221,7 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) if (!time_left) handle_fifo_timeout(spi, NULL); +exit: pm_runtime_put(mas->dev); } @@ -280,6 +329,9 @@ static int spi_geni_prepare_message(struct spi_master *spi, int ret; struct spi_geni_master *mas = spi_master_get_devdata(spi); + if (spi_geni_is_abort_still_pending(mas)) + return -EBUSY; + ret = setup_fifo_params(spi_msg->spi, spi); if (ret) dev_err(mas->dev, "Couldn't select mode %d\n", ret); @@ -509,6 +561,9 @@ static int spi_geni_transfer_one(struct spi_master *spi, { struct spi_geni_master *mas = spi_master_get_devdata(spi); + if (spi_geni_is_abort_still_pending(mas)) + return -EBUSY; + /* Terminate and return success for 0 byte length transfer */ if (!xfer->len) return 0; -- cgit 1.4.1 From 3d7d916f9bc98ce88272b3e4405c7c685afbfcd6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:13 -0800 Subject: spi: spi-geni-qcom: Don't try to set CS if an xfer is pending If we get a timeout sending then this happens: spi_transfer_one_message() ->transfer_one() AKA spi_geni_transfer_one() setup_fifo_xfer() mas->cur_xfer = non-NULL spi_transfer_wait() => TIMES OUT if (msg->status != -EINPROGRESS) goto out if (ret != 0 ...) spi_set_cs() ->set_cs AKA spi_geni_set_cs() # mas->cur_xfer is non-NULL The above happens _before_ the SPI core calls ->handle_err() AKA handle_fifo_timeout(). Unfortunately that won't work so well on geni. If we got a timeout transferring then it's likely that our interrupt handler is blocked, but we need that same interrupt handler to run and the command channel to be unblocked in order to adjust the chip select. Trying to set the chip select doesn't crash us but ends up confusing our state machine and leads to messages like: Premature done. rx_rem = 32 bpw8 Let's just drop the chip select request in this case. We can detect the case because cur_xfer is non-NULL--it would have been set to NULL in the interrupt handler if the previous transfer had finished. Sure, we might leave the chip select in the wrong state but it's likely it was going to fail anyway and this avoids getting the driver even more confused about what it's doing. The SPI core in general assumes that setting chip select is a simple operation that doesn't fail. Yet another reason to just reconfigure the chip select line as GPIOs. Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.3.I07afdedcc49655c5d26880f8df9170aac5792378@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 32c053705dec..365565d3a998 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -207,9 +207,14 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) goto exit; } - mas->cs_flag = set_flag; - spin_lock_irq(&mas->lock); + if (mas->cur_xfer) { + dev_err(mas->dev, "Can't set CS when prev xfer running\n"); + spin_unlock_irq(&mas->lock); + goto exit; + } + + mas->cs_flag = set_flag; reinit_completion(&mas->cs_done); if (set_flag) geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0); -- cgit 1.4.1 From 17fa81aa702ec118f2b835715897041675b06336 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 17 Dec 2020 14:29:14 -0800 Subject: spi: spi-geni-qcom: Print an error when we timeout setting the CS If we're using geni to manage the chip select line (don't do it--use a GPIO!) and we happen to get a timeout waiting for the chip select command to be completed, no errors are printed even though things might not be in the best shape. Let's add a print. Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20201217142842.v3.4.I666b37646de9652cef438ac7c2c6c2053367fc6b@changeid Signed-off-by: Mark Brown --- drivers/spi/spi-geni-qcom.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 365565d3a998..881f645661cc 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -223,8 +223,10 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag) spin_unlock_irq(&mas->lock); time_left = wait_for_completion_timeout(&mas->cs_done, HZ); - if (!time_left) + if (!time_left) { + dev_warn(mas->dev, "Timeout setting chip select\n"); handle_fifo_timeout(spi, NULL); + } exit: pm_runtime_put(mas->dev); -- cgit 1.4.1 From abdcd06c4dedbcabaec68c433c7f53f33307811f Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 16 Dec 2020 09:28:04 +0200 Subject: net: af_packet: fix procfs header for 64-bit pointers On 64-bit systems the packet procfs header field names following 'sk' are not aligned correctly: sk RefCnt Type Proto Iface R Rmem User Inode 00000000605d2c64 3 3 0003 7 1 450880 0 16643 00000000080e9b80 2 2 0000 0 0 0 0 17404 00000000b23b8a00 2 2 0000 0 0 0 0 17421 ... With this change field names are correctly aligned: sk RefCnt Type Proto Iface R Rmem User Inode 000000005c3b1d97 3 3 0003 7 1 21568 0 16178 000000007be55bb7 3 3 fbce 8 1 0 0 16250 00000000be62127d 3 3 fbcd 8 1 0 0 16254 ... Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/54917251d8433735d9a24e935a6cb8eb88b4058a.1608103684.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index de8e8dbbdeb8..6bbc7a448593 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4595,7 +4595,9 @@ static void packet_seq_stop(struct seq_file *seq, void *v) static int packet_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); + seq_printf(seq, + "%*sRefCnt Type Proto Iface R Rmem User Inode\n", + IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk"); else { struct sock *s = sk_entry(v); const struct packet_sock *po = pkt_sk(s); -- cgit 1.4.1 From 698285da79f5b0b099db15a37ac661ac408c80eb Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 17 Dec 2020 22:29:46 +0100 Subject: net/sched: sch_taprio: ensure to reset/destroy all child qdiscs taprio_graft() can insert a NULL element in the array of child qdiscs. As a consquence, taprio_reset() might not reset child qdiscs completely, and taprio_destroy() might leak resources. Fix it by ensuring that loops that iterate over q->qdiscs[] don't end when they find the first NULL item. Fixes: 44d4775ca518 ("net/sched: sch_taprio: reset child qdiscs before freeing them") Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Suggested-by: Jakub Kicinski Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/13edef6778fef03adc751582562fba4a13e06d6a.1608240532.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c74817ec9964..6f775275826a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1605,8 +1605,9 @@ static void taprio_reset(struct Qdisc *sch) hrtimer_cancel(&q->advance_timer); if (q->qdiscs) { - for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) - qdisc_reset(q->qdiscs[i]); + for (i = 0; i < dev->num_tx_queues; i++) + if (q->qdiscs[i]) + qdisc_reset(q->qdiscs[i]); } sch->qstats.backlog = 0; sch->q.qlen = 0; @@ -1626,7 +1627,7 @@ static void taprio_destroy(struct Qdisc *sch) taprio_disable_offload(dev, q, NULL); if (q->qdiscs) { - for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) + for (i = 0; i < dev->num_tx_queues; i++) qdisc_put(q->qdiscs[i]); kfree(q->qdiscs); -- cgit 1.4.1 From 87508224485323ce2d4e7fb929ec80f51adcc238 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 17 Dec 2020 16:52:15 +0200 Subject: net: mvpp2: disable force link UP during port init procedure Force link UP can be enabled by bootloader during tftpboot and breaks NFS support. Force link UP disabled during port init procedure. Fixes: f84bf386f395 ("net: mvpp2: initialize the GoP") Signed-off-by: Stefan Chulski Acked-by: Marcin Wojtas Link: https://lore.kernel.org/r/1608216735-14501-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index afdd22827223..0f18d034ce3e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5487,7 +5487,7 @@ static int mvpp2_port_init(struct mvpp2_port *port) struct mvpp2 *priv = port->priv; struct mvpp2_txq_pcpu *txq_pcpu; unsigned int thread; - int queue, err; + int queue, err, val; /* Checks for hardware constraints */ if (port->first_rxq + port->nrxqs > @@ -5501,6 +5501,18 @@ static int mvpp2_port_init(struct mvpp2_port *port) mvpp2_egress_disable(port); mvpp2_port_disable(port); + if (mvpp2_is_xlg(port->phy_interface)) { + val = readl(port->base + MVPP22_XLG_CTRL0_REG); + val &= ~MVPP22_XLG_CTRL0_FORCE_LINK_PASS; + val |= MVPP22_XLG_CTRL0_FORCE_LINK_DOWN; + writel(val, port->base + MVPP22_XLG_CTRL0_REG); + } else { + val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); + val &= ~MVPP2_GMAC_FORCE_LINK_PASS; + val |= MVPP2_GMAC_FORCE_LINK_DOWN; + writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + } + port->tx_time_coal = MVPP2_TXDONE_COAL_USEC; port->txqs = devm_kcalloc(dev, port->ntxqs, sizeof(*port->txqs), -- cgit 1.4.1 From 3f48fab62bb81a7f9d01e9d43c40395fad011dd5 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 17 Dec 2020 20:30:17 +0200 Subject: net: mvpp2: Add TCAM entry to drop flow control pause frames Issue: Flow control frame used to pause GoP(MAC) was delivered to the CPU and created a load on the CPU. Since XOFF/XON frames are used only by MAC, these frames should be dropped inside MAC. Fix: According to 802.3-2012 - IEEE Standard for Ethernet pause frame has unique destination MAC address 01-80-C2-00-00-01. Add TCAM parser entry to track and drop pause frames by destination MAC. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608229817-21951-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 33 ++++++++++++++++++++++++++ drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index 5692c6087bbb..f069593d7e50 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -405,6 +405,38 @@ static int mvpp2_prs_tcam_first_free(struct mvpp2 *priv, unsigned char start, return -EINVAL; } +/* Drop flow control pause frames */ +static void mvpp2_prs_drop_fc(struct mvpp2 *priv) +{ + unsigned char da[ETH_ALEN] = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x01 }; + struct mvpp2_prs_entry pe; + unsigned int len; + + memset(&pe, 0, sizeof(pe)); + + /* For all ports - drop flow control frames */ + pe.index = MVPP2_PE_FC_DROP; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_MAC); + + /* Set match on DA */ + len = ETH_ALEN; + while (len--) + mvpp2_prs_tcam_data_byte_set(&pe, len, da[len], 0xff); + + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK, + MVPP2_PRS_RI_DROP_MASK); + + mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_MAC); + mvpp2_prs_hw_write(priv, &pe); +} + /* Enable/disable dropping all mac da's */ static void mvpp2_prs_mac_drop_all_set(struct mvpp2 *priv, int port, bool add) { @@ -1162,6 +1194,7 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv) mvpp2_prs_hw_write(priv, &pe); /* Create dummy entries for drop all and promiscuous modes */ + mvpp2_prs_drop_fc(priv); mvpp2_prs_mac_drop_all_set(priv, 0, false); mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_UNI_CAST, false); mvpp2_prs_mac_promisc_set(priv, 0, MVPP2_PRS_L2_MULTI_CAST, false); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h index e22f6c85d380..4b68dd374733 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.h @@ -129,7 +129,7 @@ #define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7) #define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 6) #define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 5) -/* reserved */ +#define MVPP2_PE_FC_DROP (MVPP2_PRS_TCAM_SRAM_SIZE - 4) #define MVPP2_PE_MAC_MC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 3) #define MVPP2_PE_MAC_UC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2) #define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1) -- cgit 1.4.1 From fec6079b2eeab319d9e3d074f54d3b6f623e9701 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Thu, 17 Dec 2020 20:37:46 +0200 Subject: net: mvpp2: prs: fix PPPoE with ipv6 packet parse Current PPPoE+IPv6 entry is jumping to 'next-hdr' field and not to 'DIP' field as done for IPv4. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Reported-by: Liron Himi Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608230266-22111-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index f069593d7e50..a30eb90ba3d2 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -1680,8 +1680,9 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP6); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP6, MVPP2_PRS_RI_L3_PROTO_MASK); - /* Skip eth_type + 4 bytes of IPv6 header */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4, + /* Jump to DIP of IPV6 header */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 8 + + MVPP2_MAX_L3_ADDR_SIZE, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); /* Set L3 offset */ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, -- cgit 1.4.1 From e16ab3db87b3d5d4118dfb68e955f62c4e09573a Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 5 Dec 2020 11:35:25 +0100 Subject: mt76: usb: remove wake logic in mt76u_status_worker Similar to mmio code path, remove wake logic in mt76u_status_worker handler. Starting from commit 90d494c99a99 ("mt76: improve tx queue stop/wake")', the wake queue logic on the usb status path is no longer necessary since the hw queues are no longer stopped on the mt76 tx path. Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/00009bf0cfdc9565e4432cad3ed51888c667c25d.1607164041.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/usb.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index 8b08cad131c8..b95d093728b9 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -811,7 +811,6 @@ static void mt76u_status_worker(struct mt76_worker *w) struct mt76_dev *dev = container_of(usb, struct mt76_dev, usb); struct mt76_queue_entry entry; struct mt76_queue *q; - bool wake; int i; for (i = 0; i < IEEE80211_NUM_ACS; i++) { @@ -829,10 +828,6 @@ static void mt76u_status_worker(struct mt76_worker *w) mt76_queue_tx_complete(dev, q, &entry); } - wake = q->stopped && q->queued < q->ndesc - 8; - if (wake) - q->stopped = false; - if (!q->queued) wake_up(&dev->tx_wait); @@ -841,8 +836,6 @@ static void mt76u_status_worker(struct mt76_worker *w) if (dev->drv->tx_status_data && !test_and_set_bit(MT76_READING_STATS, &dev->phy.state)) queue_work(dev->wq, &dev->usb.stat_work); - if (wake) - ieee80211_wake_queue(dev->hw, i); } } -- cgit 1.4.1 From 123bb2b737881127b450e8b3b1bae69a8949498e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 5 Dec 2020 11:35:26 +0100 Subject: mt76: sdio: remove wake logic in mt76s_process_tx_queue Similar to mmio/usb code path, remove wake logic in mt76s_process_tx_queue routine. Starting from commit 90d494c99a99 ("mt76: improve tx queue stop/wake"), the wake queue logic on the sdio status path is no longer necessary since the hw queues are no longer stopped on the mt76 tx path. Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/d2d7d9d437f4dec2ef1df0ed070b9cf299f021ad.1607164041.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/sdio.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c index 62b5b912818f..7cd995118257 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/sdio.c @@ -157,7 +157,7 @@ static void mt76s_net_worker(struct mt76_worker *w) static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q) { - bool wake, mcu = q == dev->q_mcu[MT_MCUQ_WM]; + bool mcu = q == dev->q_mcu[MT_MCUQ_WM]; struct mt76_queue_entry entry; int nframes = 0; @@ -177,21 +177,12 @@ static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q) nframes++; } - wake = q->stopped && q->queued < q->ndesc - 8; - if (wake) - q->stopped = false; - if (!q->queued) wake_up(&dev->tx_wait); - if (mcu) - goto out; - - mt76_txq_schedule(&dev->phy, q->qid); + if (!mcu) + mt76_txq_schedule(&dev->phy, q->qid); - if (wake) - ieee80211_wake_queue(dev->hw, q->qid); -out: return nframes; } -- cgit 1.4.1 From f7217f718747641fc80cd062f183107439f2a066 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 8 Dec 2020 10:18:11 +0100 Subject: mt76: mt76s: fix NULL pointer dereference in mt76s_process_tx_queue Fix a possible NULL pointer dereference in mt76s_process_tx_queue that can occur if status thread runs before allocating tx queues Fixes: 6a618acb7e62 ("mt76: sdio: convert {status/net}_work to mt76_worker") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/b49c1b4edacd87b2241a9fd0431dd4864c8963f6.1607418933.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/sdio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/sdio.c b/drivers/net/wireless/mediatek/mt76/sdio.c index 7cd995118257..0b6facb17ff7 100644 --- a/drivers/net/wireless/mediatek/mt76/sdio.c +++ b/drivers/net/wireless/mediatek/mt76/sdio.c @@ -157,10 +157,14 @@ static void mt76s_net_worker(struct mt76_worker *w) static int mt76s_process_tx_queue(struct mt76_dev *dev, struct mt76_queue *q) { - bool mcu = q == dev->q_mcu[MT_MCUQ_WM]; struct mt76_queue_entry entry; int nframes = 0; + bool mcu; + if (!q) + return 0; + + mcu = q == dev->q_mcu[MT_MCUQ_WM]; while (q->queued > 0) { if (!q->entry[q->tail].done) break; -- cgit 1.4.1 From 0bd157fa2aaa2c77d6254321d7751aa9eec68c7b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 18 Dec 2020 09:32:02 -0800 Subject: mt76: mt7915: fix MESH ifdef block Fix a build error when CONFIG_MAC80211_MESH is not enabled: ../drivers/net/wireless/mediatek/mt76/mt7915/init.c:47:2: error: expected expression before '}' token }, { ^ Fixes: af901eb4ab80 ("mt76: mt7915: get rid of dbdc debugfs knob") Signed-off-by: Randy Dunlap Cc: Shayne Chen Cc: Ryder Lee Cc: Lorenzo Bianconi Cc: Felix Fietkau Cc: linux-wireless@vger.kernel.org Cc: Kalle Valo Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201218173202.23159-1-rdunlap@infradead.org --- drivers/net/wireless/mediatek/mt76/mt7915/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index ed4635bd151a..102a8f14c22d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -40,9 +40,9 @@ static const struct ieee80211_iface_limit if_limits[] = { .types = BIT(NL80211_IFTYPE_ADHOC) }, { .max = 16, - .types = BIT(NL80211_IFTYPE_AP) | + .types = BIT(NL80211_IFTYPE_AP) #ifdef CONFIG_MAC80211_MESH - BIT(NL80211_IFTYPE_MESH_POINT) + | BIT(NL80211_IFTYPE_MESH_POINT) #endif }, { .max = MT7915_MAX_INTERFACES, -- cgit 1.4.1 From bfe55584713b4d4d518ffe9cf2dab1129eba6321 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Sun, 20 Dec 2020 15:18:07 +0100 Subject: MAINTAINERS: switch to different email address Switching to private mail account as work email is polluted with a legal disclaimer. Just making it extra clear by changing the email address in the MAINTAINERS file as well. Signed-off-by: Arend van Spriel Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201220141807.17278-1-aspriel@gmail.com --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 5c1a6ba5ef26..85a7c3a2ed63 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3545,7 +3545,7 @@ S: Supported F: drivers/net/ethernet/broadcom/bnxt/ BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER -M: Arend van Spriel +M: Arend van Spriel M: Franky Lin M: Hante Meuleman M: Chi-hsien Lin -- cgit 1.4.1 From a590370d918fc66c62df6620445791fbe840344a Mon Sep 17 00:00:00 2001 From: Roman Guskov Date: Mon, 21 Dec 2020 13:35:32 +0100 Subject: spi: stm32: FIFO threshold level - fix align packet size if cur_bpw <= 8 and xfer_len < 4 then the value of fthlv will be 1 and SPI registers content may have been lost. * If SPI data register is accessed as a 16-bit register and DSIZE <= 8bit, better to select FTHLV = 2, 4, 6 etc * If SPI data register is accessed as a 32-bit register and DSIZE > 8bit, better to select FTHLV = 2, 4, 6 etc, while if DSIZE <= 8bit, better to select FTHLV = 4, 8, 12 etc Signed-off-by: Roman Guskov Fixes: dcbe0d84dfa5 ("spi: add driver for STM32 SPI controller") Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/20201221123532.27272-1-rguskov@dh-electronics.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 471dedf3d339..6017209c6d2f 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -493,9 +493,9 @@ static u32 stm32h7_spi_prepare_fthlv(struct stm32_spi *spi, u32 xfer_len) /* align packet size with data registers access */ if (spi->cur_bpw > 8) - fthlv -= (fthlv % 2); /* multiple of 2 */ + fthlv += (fthlv % 2) ? 1 : 0; else - fthlv -= (fthlv % 4); /* multiple of 4 */ + fthlv += (fthlv % 4) ? (4 - (fthlv % 4)) : 0; if (!fthlv) fthlv = 1; -- cgit 1.4.1 From 3b66e4a8e58a85af3212c7117d7a29c9ef6679a2 Mon Sep 17 00:00:00 2001 From: Guido Günther Date: Fri, 18 Dec 2020 19:38:07 +0100 Subject: regulator: bd718x7: Add enable times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the typical startup times from the data sheet so boards get a reasonable default. Not setting any enable time can lead to board hangs when e.g. clocks are enabled too soon afterwards. This fixes gpu power domain resume on the Librem 5. [Moved #defines into driver, seems to be general agreement and avoids any cross tree issues -- broonie] Signed-off-by: Guido Günther Reviewed-by: Matti Vaittinen Link: https://lore.kernel.org/r/41fb2ed19f584f138336344e2297ae7301f72b75.1608316658.git.agx@sigxcpu.org Signed-off-by: Mark Brown --- drivers/regulator/bd718x7-regulator.c | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c index e6d5d98c3cea..9309765d0450 100644 --- a/drivers/regulator/bd718x7-regulator.c +++ b/drivers/regulator/bd718x7-regulator.c @@ -15,6 +15,36 @@ #include #include +/* Typical regulator startup times as per data sheet in uS */ +#define BD71847_BUCK1_STARTUP_TIME 144 +#define BD71847_BUCK2_STARTUP_TIME 162 +#define BD71847_BUCK3_STARTUP_TIME 162 +#define BD71847_BUCK4_STARTUP_TIME 240 +#define BD71847_BUCK5_STARTUP_TIME 270 +#define BD71847_BUCK6_STARTUP_TIME 200 +#define BD71847_LDO1_STARTUP_TIME 440 +#define BD71847_LDO2_STARTUP_TIME 370 +#define BD71847_LDO3_STARTUP_TIME 310 +#define BD71847_LDO4_STARTUP_TIME 400 +#define BD71847_LDO5_STARTUP_TIME 530 +#define BD71847_LDO6_STARTUP_TIME 400 + +#define BD71837_BUCK1_STARTUP_TIME 160 +#define BD71837_BUCK2_STARTUP_TIME 180 +#define BD71837_BUCK3_STARTUP_TIME 180 +#define BD71837_BUCK4_STARTUP_TIME 180 +#define BD71837_BUCK5_STARTUP_TIME 160 +#define BD71837_BUCK6_STARTUP_TIME 240 +#define BD71837_BUCK7_STARTUP_TIME 220 +#define BD71837_BUCK8_STARTUP_TIME 200 +#define BD71837_LDO1_STARTUP_TIME 440 +#define BD71837_LDO2_STARTUP_TIME 370 +#define BD71837_LDO3_STARTUP_TIME 310 +#define BD71837_LDO4_STARTUP_TIME 400 +#define BD71837_LDO5_STARTUP_TIME 310 +#define BD71837_LDO6_STARTUP_TIME 400 +#define BD71837_LDO7_STARTUP_TIME 530 + /* * BD718(37/47/50) have two "enable control modes". ON/OFF can either be * controlled by software - or by PMIC internal HW state machine. Whether @@ -613,6 +643,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK1_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK1_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -646,6 +677,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK2_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK2_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -680,6 +712,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd71847_buck3_volt_range_sel, .enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -706,6 +739,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_range_mask = BD71847_BUCK4_RANGE_MASK, .linear_range_selectors = bd71847_buck4_volt_range_sel, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -727,6 +761,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -750,6 +785,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71847_BUCK6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -775,6 +811,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd718xx_ldo1_volt_range_sel, .enable_reg = BD718XX_REG_LDO1_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO1_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -796,6 +833,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .n_voltages = ARRAY_SIZE(ldo_2_volts), .enable_reg = BD718XX_REG_LDO2_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO2_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -818,6 +856,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO3_MASK, .enable_reg = BD718XX_REG_LDO3_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -840,6 +879,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO4_MASK, .enable_reg = BD718XX_REG_LDO4_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -865,6 +905,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .linear_range_selectors = bd71847_ldo5_volt_range_sel, .enable_reg = BD718XX_REG_LDO5_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -889,6 +930,7 @@ static struct bd718xx_regulator_data bd71847_regulators[] = { .vsel_mask = BD718XX_LDO6_MASK, .enable_reg = BD718XX_REG_LDO6_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71847_LDO6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -942,6 +984,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK1_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK1_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -975,6 +1018,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD718XX_REG_BUCK2_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK2_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1005,6 +1049,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD71837_REG_BUCK3_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK3_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1033,6 +1078,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = DVS_BUCK_RUN_MASK, .enable_reg = BD71837_REG_BUCK4_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK4_STARTUP_TIME, .owner = THIS_MODULE, .of_parse_cb = buck_set_hw_dvs_levels, }, @@ -1065,6 +1111,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .linear_range_selectors = bd71837_buck5_volt_range_sel, .enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1088,6 +1135,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_BUCK6_MASK, .enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1109,6 +1157,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK7_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1132,6 +1181,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK, .enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL, .enable_mask = BD718XX_BUCK_EN, + .enable_time = BD71837_BUCK8_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1157,6 +1207,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .linear_range_selectors = bd718xx_ldo1_volt_range_sel, .enable_reg = BD718XX_REG_LDO1_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO1_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1178,6 +1229,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .n_voltages = ARRAY_SIZE(ldo_2_volts), .enable_reg = BD718XX_REG_LDO2_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO2_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1200,6 +1252,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO3_MASK, .enable_reg = BD718XX_REG_LDO3_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO3_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1222,6 +1275,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO4_MASK, .enable_reg = BD718XX_REG_LDO4_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO4_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1246,6 +1300,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_LDO5_MASK, .enable_reg = BD718XX_REG_LDO5_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO5_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1272,6 +1327,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD718XX_LDO6_MASK, .enable_reg = BD718XX_REG_LDO6_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO6_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { @@ -1296,6 +1352,7 @@ static struct bd718xx_regulator_data bd71837_regulators[] = { .vsel_mask = BD71837_LDO7_MASK, .enable_reg = BD71837_REG_LDO7_VOLT, .enable_mask = BD718XX_LDO_EN, + .enable_time = BD71837_LDO7_STARTUP_TIME, .owner = THIS_MODULE, }, .init = { -- cgit 1.4.1 From e7e518053c267bb6be3799520d9f4a34c7264a2e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 21 Dec 2020 11:25:06 -0800 Subject: bpf: Add schedule point in htab_init_buckets() We noticed that with a LOCKDEP enabled kernel, allocating a hash table with 65536 buckets would use more than 60ms. htab_init_buckets() runs from process context, it is safe to schedule to avoid latency spikes. Fixes: c50eb518e262 ("bpf: Use separate lockdep class for each hashtab") Reported-by: John Sperbeck Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201221192506.707584-1-eric.dumazet@gmail.com --- kernel/bpf/hashtab.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 7e848200cd26..c1ac7f964bc9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -152,6 +152,7 @@ static void htab_init_buckets(struct bpf_htab *htab) lockdep_set_class(&htab->buckets[i].lock, &htab->lockdep_key); } + cond_resched(); } } -- cgit 1.4.1 From 54ddbdb024882e226055cc4c3c246592ddde2ee5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 18 Dec 2020 09:38:43 -0800 Subject: net: systemport: set dev->max_mtu to UMAC_MAX_MTU_SIZE The driver is already allocating receive buffers of 2KiB and the Ethernet MAC is configured to accept frames up to UMAC_MAX_MTU_SIZE. Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports") Signed-off-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20201218173843.141046-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0fdd19d99d99..b1ae9eb8f247 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2577,6 +2577,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) NETIF_F_HW_VLAN_CTAG_TX; dev->hw_features |= dev->features; dev->vlan_features |= dev->features; + dev->max_mtu = UMAC_MAX_MTU_SIZE; /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = 1; -- cgit 1.4.1 From 1385ae5c30f238f81bc6528d897c6d7a0816783f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Dec 2020 11:55:36 +0100 Subject: ethernet: ucc_geth: set dev->max_mtu to 1518 All the buffers and registers are already set up appropriately for an MTU slightly above 1500, so we just need to expose this to the networking stack. AFAICT, there's no need to implement .ndo_change_mtu when the receive buffers are always set up to support the max_mtu. This fixes several warnings during boot on our mpc8309-board with an embedded mv88e6250 switch: mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 0 ... mv88e6085 mdio@e0102120:10: nonfatal error -34 setting MTU 1500 on port 4 ucc_geth e0102000.ethernet eth1: error -22 setting MTU to 1504 to include DSA overhead The last line explains what the DSA stack tries to do: achieving an MTU of 1500 on-the-wire requires that the master netdevice connected to the CPU port supports an MTU of 1500+the tagging overhead. Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports") Reviewed-by: Andrew Lunn Signed-off-by: Rasmus Villemoes Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index ba8869c3d891..d6dbae480716 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3889,6 +3889,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work); netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64); dev->mtu = 1500; + dev->max_mtu = 1518; ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT); ugeth->phy_interface = phy_interface; -- cgit 1.4.1 From 887078de2a23689e29d6fa1b75d7cbc544c280be Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Dec 2020 11:55:37 +0100 Subject: ethernet: ucc_geth: fix definition and size of ucc_geth_tx_global_pram Table 8-53 in the QUICC Engine Reference manual shows definitions of fields up to a size of 192 bytes, not just 128. But in table 8-111, one does find the text Base Address of the Global Transmitter Parameter RAM Page. [...] The user needs to allocate 128 bytes for this page. The address must be aligned to the page size. I've checked both rev. 7 (11/2015) and rev. 9 (05/2018) of the manual; they both have this inconsistency (and the table numbers are the same). Adding a bit of debug printing, on my board the struct ucc_geth_tx_global_pram is allocated at offset 0x880, while the (opaque) ucc_geth_thread_data_tx gets allocated immediately afterwards, at 0x900. So whatever the engine writes into the thread data overlaps with the tail of the global tx pram (and devmem says that something does get written during a simple ping). I haven't observed any failure that could be attributed to this, but it seems to be the kind of thing that would be extremely hard to debug. So extend the struct definition so that we do allocate 192 bytes. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 1a9bdf66a7d8..11d4bf5dc21f 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -575,7 +575,14 @@ struct ucc_geth_tx_global_pram { u32 vtagtable[0x8]; /* 8 4-byte VLAN tags */ u32 tqptr; /* a base pointer to the Tx Queues Memory Region */ - u8 res2[0x80 - 0x74]; + u8 res2[0x78 - 0x74]; + u64 snums_en; + u32 l2l3baseptr; /* top byte consists of a few other bit fields */ + + u16 mtu[8]; + u8 res3[0xa8 - 0x94]; + u32 wrrtablebase; /* top byte is reserved */ + u8 res4[0xc0 - 0xac]; } __packed; /* structure representing Extended Filtering Global Parameters in PRAM */ -- cgit 1.4.1 From e925e0cd2a705aaacb0b907bb3691fcac3a973a4 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 18 Dec 2020 11:55:38 +0100 Subject: ethernet: ucc_geth: fix use-after-free in ucc_geth_remove() ugeth is the netdiv_priv() part of the netdevice. Accessing the memory pointed to by ugeth (such as done by ucc_geth_memclean() and the two of_node_puts) after free_netdev() is thus use-after-free. Fixes: 80a9fad8e89a ("ucc_geth: fix module removal") Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index d6dbae480716..6d853f018d53 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3935,12 +3935,12 @@ static int ucc_geth_remove(struct platform_device* ofdev) struct device_node *np = ofdev->dev.of_node; unregister_netdev(dev); - free_netdev(dev); ucc_geth_memclean(ugeth); if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(ugeth->ug_info->tbi_node); of_node_put(ugeth->ug_info->phy_node); + free_netdev(dev); return 0; } -- cgit 1.4.1 From 83469893204281ecf65d572bddf02de29a19787c Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Fri, 18 Dec 2020 13:50:01 -0800 Subject: ionic: account for vlan tag len in rx buffer len Let the FW know we have enough receive buffer space for the vlan tag if it isn't stripped. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/20201218215001.64696-1-snelson@pensando.io Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 9156c9825a16..ac4cd5d82e69 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -337,7 +337,7 @@ void ionic_rx_fill(struct ionic_queue *q) unsigned int i, j; unsigned int len; - len = netdev->mtu + ETH_HLEN; + len = netdev->mtu + ETH_HLEN + VLAN_HLEN; nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE; for (i = ionic_q_space_avail(q); i; i--) { -- cgit 1.4.1 From 8df66af5c1e5f80562fe728db5ec069b21810144 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 19 Dec 2020 14:01:44 +0300 Subject: atm: idt77252: call pci_disable_device() on error path This error path needs to disable the pci device before returning. Fixes: ede58ef28e10 ("atm: remove deprecated use of pci api") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X93dmC4NX0vbTpGp@mwanda Signed-off-by: Jakub Kicinski --- drivers/atm/idt77252.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 65a3886f68c9..5f0472c18bcb 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3607,7 +3607,7 @@ static int idt77252_init_one(struct pci_dev *pcidev, if ((err = dma_set_mask_and_coherent(&pcidev->dev, DMA_BIT_MASK(32)))) { printk("idt77252: can't enable DMA for PCI device at %s\n", pci_name(pcidev)); - return err; + goto err_out_disable_pdev; } card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL); -- cgit 1.4.1 From bcce55f556e824d43f352d76b94509185585e38d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 19 Dec 2020 13:19:24 +0100 Subject: ppp: Fix PPPIOCUNBRIDGECHAN request number PPPIOCGL2TPSTATS already uses 54. This shouldn't be a problem in practice, but let's keep the logical decreasing assignment scheme. Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls") Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/e3a4c355e3820331d8e1fffef8522739aae58b57.1608380117.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ppp-ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h index 8dbecb3ad036..1cc5ce0ae062 100644 --- a/include/uapi/linux/ppp-ioctl.h +++ b/include/uapi/linux/ppp-ioctl.h @@ -116,7 +116,7 @@ struct pppol2tp_ioc_stats { #define PPPIOCGCHAN _IOR('t', 55, int) /* get ppp channel number */ #define PPPIOCGL2TPSTATS _IOR('t', 54, struct pppol2tp_ioc_stats) #define PPPIOCBRIDGECHAN _IOW('t', 53, int) /* bridge one channel to another */ -#define PPPIOCUNBRIDGECHAN _IO('t', 54) /* unbridge channel */ +#define PPPIOCUNBRIDGECHAN _IO('t', 52) /* unbridge channel */ #define SIOCGPPPSTATS (SIOCDEVPRIVATE + 0) #define SIOCGPPPVER (SIOCDEVPRIVATE + 1) /* NEVER change this!! */ -- cgit 1.4.1 From 2575bc1aa9d52a62342b57a0b7d0a12146cf6aed Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Sun, 20 Dec 2020 13:02:29 +0200 Subject: net: mvpp2: Fix GoP port 3 Networking Complex Control configurations During GoP port 2 Networking Complex Control mode of operation configurations, also GoP port 3 mode of operation was wrongly set. Patch removes these configurations. Fixes: f84bf386f395 ("net: mvpp2: initialize the GoP") Acked-by: Marcin Wojtas Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608462149-1702-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 0f18d034ce3e..f20b31327027 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -1231,7 +1231,7 @@ static void mvpp22_gop_init_rgmii(struct mvpp2_port *port) regmap_read(priv->sysctrl_base, GENCONF_CTRL0, &val); if (port->gop_id == 2) - val |= GENCONF_CTRL0_PORT0_RGMII | GENCONF_CTRL0_PORT1_RGMII; + val |= GENCONF_CTRL0_PORT0_RGMII; else if (port->gop_id == 3) val |= GENCONF_CTRL0_PORT1_RGMII_MII; regmap_write(priv->sysctrl_base, GENCONF_CTRL0, val); -- cgit 1.4.1 From 74a2921948ed8c0e7f079a98442ec3493168cc85 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:57 +0800 Subject: scsi: hisi_sas: Expose HW queues for v2 hw As a performance enhancement, make the completion queue interrupts managed. In addition, in commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline"), CPU hotplug for MQ devices using managed interrupts is made safe. So expose HW queues to blk-mq to take advantage of this. Flag Scsi_host.host_tagset is also set to ensure that the HBA is not sent more commands than it can handle. However the driver still does not use request tag for IPTT as there are many HW bugs means that special rules apply for IPTT allocation. Link: https://lore.kernel.org/r/1606905417-183214-6-git-send-email-john.garry@huawei.com Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas.h | 4 +++ drivers/scsi/hisi_sas/hisi_sas_main.c | 11 ++++++ drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 66 +++++++++++++++++++++++++++------- 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index a25cfc11c96d..aa67807c5693 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -312,6 +313,7 @@ enum { struct hisi_sas_hw { int (*hw_init)(struct hisi_hba *hisi_hba); + int (*interrupt_preinit)(struct hisi_hba *hisi_hba); void (*setup_itct)(struct hisi_hba *hisi_hba, struct hisi_sas_device *device); int (*slot_index_alloc)(struct hisi_hba *hisi_hba, @@ -418,6 +420,8 @@ struct hisi_hba { u32 refclk_frequency_mhz; u8 sas_addr[SAS_ADDR_SIZE]; + int *irq_map; /* v2 hw */ + int n_phy; spinlock_t lock; struct semaphore sem; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 274ccf18ce2d..061d65b8a287 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2620,6 +2620,13 @@ err_out: return NULL; } +static int hisi_sas_interrupt_preinit(struct hisi_hba *hisi_hba) +{ + if (hisi_hba->hw->interrupt_preinit) + return hisi_hba->hw->interrupt_preinit(hisi_hba); + return 0; +} + int hisi_sas_probe(struct platform_device *pdev, const struct hisi_sas_hw *hw) { @@ -2677,6 +2684,10 @@ int hisi_sas_probe(struct platform_device *pdev, sha->sas_port[i] = &hisi_hba->port[i].sas_port; } + rc = hisi_sas_interrupt_preinit(hisi_hba); + if (rc) + goto err_out_ha; + rc = scsi_add_host(shost, &pdev->dev); if (rc) goto err_out_ha; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index b57177b52fac..9adfdefef9ca 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3302,6 +3302,28 @@ static irq_handler_t fatal_interrupts[HISI_SAS_FATAL_INT_NR] = { fatal_axi_int_v2_hw }; +#define CQ0_IRQ_INDEX (96) + +static int hisi_sas_v2_interrupt_preinit(struct hisi_hba *hisi_hba) +{ + struct platform_device *pdev = hisi_hba->platform_dev; + struct Scsi_Host *shost = hisi_hba->shost; + struct irq_affinity desc = { + .pre_vectors = CQ0_IRQ_INDEX, + .post_vectors = 16, + }; + int resv = desc.pre_vectors + desc.post_vectors, minvec = resv + 1, nvec; + + nvec = devm_platform_get_irqs_affinity(pdev, &desc, minvec, 128, + &hisi_hba->irq_map); + if (nvec < 0) + return nvec; + + shost->nr_hw_queues = hisi_hba->cq_nvecs = nvec - resv; + + return 0; +} + /* * There is a limitation in the hip06 chipset that we need * to map in all mbigen interrupts, even if they are not used. @@ -3310,14 +3332,11 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) { struct platform_device *pdev = hisi_hba->platform_dev; struct device *dev = &pdev->dev; - int irq, rc = 0, irq_map[128]; + int irq, rc = 0; int i, phy_no, fatal_no, queue_no; - for (i = 0; i < 128; i++) - irq_map[i] = platform_get_irq(pdev, i); - for (i = 0; i < HISI_SAS_PHY_INT_NR; i++) { - irq = irq_map[i + 1]; /* Phy up/down is irq1 */ + irq = hisi_hba->irq_map[i + 1]; /* Phy up/down is irq1 */ rc = devm_request_irq(dev, irq, phy_interrupts[i], 0, DRV_NAME " phy", hisi_hba); if (rc) { @@ -3331,7 +3350,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) for (phy_no = 0; phy_no < hisi_hba->n_phy; phy_no++) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; - irq = irq_map[phy_no + 72]; + irq = hisi_hba->irq_map[phy_no + 72]; rc = devm_request_irq(dev, irq, sata_int_v2_hw, 0, DRV_NAME " sata", phy); if (rc) { @@ -3343,7 +3362,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) } for (fatal_no = 0; fatal_no < HISI_SAS_FATAL_INT_NR; fatal_no++) { - irq = irq_map[fatal_no + 81]; + irq = hisi_hba->irq_map[fatal_no + 81]; rc = devm_request_irq(dev, irq, fatal_interrupts[fatal_no], 0, DRV_NAME " fatal", hisi_hba); if (rc) { @@ -3354,24 +3373,22 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) } } - for (queue_no = 0; queue_no < hisi_hba->queue_count; queue_no++) { + for (queue_no = 0; queue_no < hisi_hba->cq_nvecs; queue_no++) { struct hisi_sas_cq *cq = &hisi_hba->cq[queue_no]; - cq->irq_no = irq_map[queue_no + 96]; + cq->irq_no = hisi_hba->irq_map[queue_no + 96]; rc = devm_request_threaded_irq(dev, cq->irq_no, cq_interrupt_v2_hw, cq_thread_v2_hw, IRQF_ONESHOT, DRV_NAME " cq", cq); if (rc) { dev_err(dev, "irq init: could not request cq interrupt %d, rc=%d\n", - irq, rc); + cq->irq_no, rc); rc = -ENOENT; goto err_out; } + cq->irq_mask = irq_get_affinity_mask(cq->irq_no); } - - hisi_hba->cq_nvecs = hisi_hba->queue_count; - err_out: return rc; } @@ -3529,6 +3546,26 @@ static struct device_attribute *host_attrs_v2_hw[] = { NULL }; +static int map_queues_v2_hw(struct Scsi_Host *shost) +{ + struct hisi_hba *hisi_hba = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < qmap->nr_queues; queue++) { + mask = irq_get_affinity_mask(hisi_hba->irq_map[96 + queue]); + if (!mask) + continue; + + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } + + return 0; + +} + static struct scsi_host_template sht_v2_hw = { .name = DRV_NAME, .proc_name = DRV_NAME, @@ -3553,10 +3590,13 @@ static struct scsi_host_template sht_v2_hw = { #endif .shost_attrs = host_attrs_v2_hw, .host_reset = hisi_sas_host_reset, + .map_queues = map_queues_v2_hw, + .host_tagset = 1, }; static const struct hisi_sas_hw hisi_sas_v2_hw = { .hw_init = hisi_sas_v2_init, + .interrupt_preinit = hisi_sas_v2_interrupt_preinit, .setup_itct = setup_itct_v2_hw, .slot_index_alloc = slot_index_alloc_quirk_v2_hw, .alloc_dev = alloc_dev_quirk_v2_hw, -- cgit 1.4.1 From 37309f47e2f5674f3e86cb765312ace42cfcedf5 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Wed, 9 Dec 2020 20:52:30 -0800 Subject: HID: wacom: Fix memory leakage caused by kfifo_alloc As reported by syzbot below, kfifo_alloc'd memory would not be freed if a non-zero return value is triggered in wacom_probe. This patch creates and uses devm_kfifo_alloc to allocate and free itself. BUG: memory leak unreferenced object 0xffff88810dc44a00 (size 512): comm "kworker/1:2", pid 3674, jiffies 4294943617 (age 14.100s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000023e1afac>] kmalloc_array include/linux/slab.h:592 [inline] [<0000000023e1afac>] __kfifo_alloc+0xad/0x100 lib/kfifo.c:43 [<00000000c477f737>] wacom_probe+0x1a1/0x3b0 drivers/hid/wacom_sys.c:2727 [<00000000b3109aca>] hid_device_probe+0x16b/0x210 drivers/hid/hid-core.c:2281 [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 [<00000000efb7c59e>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431 [<0000000024ab1590>] __device_attach+0x122/0x250 drivers/base/dd.c:912 [<000000004c7ac048>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491 [<00000000b93050a3>] device_add+0x5ac/0xc30 drivers/base/core.c:2936 [<00000000e5b46ea5>] hid_add_device+0x151/0x390 drivers/hid/hid-core.c:2437 [<00000000c6add147>] usbhid_probe+0x412/0x560 drivers/hid/usbhid/hid-core.c:1407 [<00000000c33acdb4>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396 [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 https://syzkaller.appspot.com/bug?extid=5b49c9695968d7250a26 Reported-by: syzbot+5b49c9695968d7250a26@syzkaller.appspotmail.com Signed-off-by: Ping Cheng Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 045c464228d9..e8acd235db2a 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -1270,6 +1270,37 @@ static int wacom_devm_sysfs_create_group(struct wacom *wacom, group); } +static void wacom_devm_kfifo_release(struct device *dev, void *res) +{ + struct kfifo_rec_ptr_2 *devres = res; + + kfifo_free(devres); +} + +static int wacom_devm_kfifo_alloc(struct wacom *wacom) +{ + struct wacom_wac *wacom_wac = &wacom->wacom_wac; + struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo; + int error; + + pen_fifo = devres_alloc(wacom_devm_kfifo_release, + sizeof(struct kfifo_rec_ptr_2), + GFP_KERNEL); + + if (!pen_fifo) + return -ENOMEM; + + error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + if (error) { + devres_free(pen_fifo); + return error; + } + + devres_add(&wacom->hdev->dev, pen_fifo); + + return 0; +} + enum led_brightness wacom_leds_brightness_get(struct wacom_led *led) { struct wacom *wacom = led->wacom; @@ -2724,7 +2755,7 @@ static int wacom_probe(struct hid_device *hdev, if (features->check_for_hid_type && features->hid_type != hdev->type) return -ENODEV; - error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + error = wacom_devm_kfifo_alloc(wacom); if (error) return error; @@ -2786,8 +2817,6 @@ static void wacom_remove(struct hid_device *hdev) if (wacom->wacom_wac.features.type != REMOTE) wacom_release_resources(wacom); - - kfifo_free(&wacom_wac->pen_fifo); } #ifdef CONFIG_PM -- cgit 1.4.1 From 2a5f1b67ec577fb1544b563086e0377f095f88e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 10 Dec 2020 08:30:59 +0000 Subject: KVM: arm64: Don't access PMCR_EL0 when no PMU is available We reset the guest's view of PMCR_EL0 unconditionally, based on the host's view of this register. It is however legal for an implementation not to provide any PMU, resulting in an UNDEF. The obvious fix is to skip the reset of this shadow register when no PMU is available, sidestepping the issue entirely. If no PMU is available, the guest is not able to request a virtual PMU anyway, so not doing nothing is the right thing to do! It is unlikely that this bug can hit any HW implementation though, as they all provide a PMU. It has been found using nested virt with the host KVM not implementing the PMU itself. Fixes: ab9468340d2bc ("arm64: KVM: Add access handler for PMCR register") Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201210083059.1277162-1-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3313dedfa505..d46e7f706cb0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -594,6 +594,10 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; + /* No PMU available, PMCR_EL0 may UNDEF... */ + if (!kvm_arm_support_pmu_v3()) + return; + pmcr = read_sysreg(pmcr_el0); /* * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN -- cgit 1.4.1 From ff367fe473a9857160c17827931375a899076394 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:47 +0000 Subject: KVM: arm64: Prevent use of invalid PSCI v0.1 function IDs PSCI driver exposes a struct containing the PSCI v0.1 function IDs configured in the DT. However, the struct does not convey the information whether these were set from DT or contain the default value zero. This could be a problem for PSCI proxy in KVM protected mode. Extend config passed to KVM with a bit mask with individual bits set depending on whether the corresponding function pointer in psci_ops is set, eg. set bit for PSCI_CPU_SUSPEND if psci_ops.cpu_suspend != NULL. Previously config was split into multiple global variables. Put everything into a single struct for convenience. Reported-by: Mark Rutland Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-2-dbrazdil@google.com --- arch/arm64/include/asm/kvm_host.h | 20 ++++++++++++++ arch/arm64/kvm/arm.c | 14 +++++++--- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 53 +++++++++++++++++++++++++++--------- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 11beda85ee7e..828d50d40dc2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -240,6 +241,25 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; +#define KVM_HOST_PSCI_0_1_CPU_SUSPEND BIT(0) +#define KVM_HOST_PSCI_0_1_CPU_ON BIT(1) +#define KVM_HOST_PSCI_0_1_CPU_OFF BIT(2) +#define KVM_HOST_PSCI_0_1_MIGRATE BIT(3) + +struct kvm_host_psci_config { + /* PSCI version used by host. */ + u32 version; + + /* Function IDs used by host if version is v0.1. */ + struct psci_0_1_function_ids function_ids_0_1; + + /* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */ + unsigned int enabled_functions_0_1; +}; + +extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); +#define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config) + struct vcpu_reset_state { unsigned long pc; unsigned long r0; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6e637d2b4cfb..6a2f4e01b04f 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -66,8 +66,6 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; -extern u32 kvm_nvhe_sym(kvm_host_psci_version); -extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { @@ -1618,8 +1616,16 @@ static bool init_psci_relay(void) return false; } - kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version(); - kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids(); + kvm_host_psci_config.version = psci_ops.get_version(); + + if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { + kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); + kvm_host_psci_config.enabled_functions_0_1 = + (psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) | + (psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) | + (psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) | + (psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0); + } return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 08dc9de69314..0d6f4aa39621 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -22,9 +22,8 @@ void kvm_hyp_cpu_resume(unsigned long r0); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); /* Config options set by the host. */ -__ro_after_init u32 kvm_host_psci_version; -__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; -__ro_after_init s64 hyp_physvirt_offset; +struct kvm_host_psci_config __ro_after_init kvm_host_psci_config; +s64 __ro_after_init hyp_physvirt_offset; #define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) @@ -54,12 +53,41 @@ static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) return func_id; } +static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit) +{ + return kvm_host_psci_config.enabled_functions_0_1 & fn_bit; +} + +static inline bool is_psci_0_1_cpu_suspend(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend); +} + +static inline bool is_psci_0_1_cpu_on(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on); +} + +static inline bool is_psci_0_1_cpu_off(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off); +} + +static inline bool is_psci_0_1_migrate(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) && + (func_id == kvm_host_psci_config.function_ids_0_1.migrate); +} + static bool is_psci_0_1_call(u64 func_id) { - return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) || - (func_id == kvm_host_psci_0_1_function_ids.cpu_on) || - (func_id == kvm_host_psci_0_1_function_ids.cpu_off) || - (func_id == kvm_host_psci_0_1_function_ids.migrate); + return is_psci_0_1_cpu_suspend(func_id) || + is_psci_0_1_cpu_on(func_id) || + is_psci_0_1_cpu_off(func_id) || + is_psci_0_1_migrate(func_id); } static bool is_psci_0_2_call(u64 func_id) @@ -71,7 +99,7 @@ static bool is_psci_0_2_call(u64 func_id) static bool is_psci_call(u64 func_id) { - switch (kvm_host_psci_version) { + switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): return is_psci_0_1_call(func_id); default: @@ -248,12 +276,11 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || - (func_id == kvm_host_psci_0_1_function_ids.migrate)) + if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id)) return psci_forward(host_ctxt); - else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) + else if (is_psci_0_1_cpu_on(func_id)) return psci_cpu_on(func_id, host_ctxt); - else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) + else if (is_psci_0_1_cpu_suspend(func_id)) return psci_cpu_suspend(func_id, host_ctxt); else return PSCI_RET_NOT_SUPPORTED; @@ -304,7 +331,7 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) if (!is_psci_call(func_id)) return false; - switch (kvm_host_psci_version) { + switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): ret = psci_0_1_handler(func_id, host_ctxt); break; -- cgit 1.4.1 From 7a96a0687b80a1870c689418d7b72012c8bdd53d Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:48 +0000 Subject: KVM: arm64: Use lm_alias in nVHE-only VA conversion init_hyp_physvirt_offset() computes PA from a kernel VA. Conversion to kernel linear-map is required first but the code used kvm_ksym_ref() for this purpose. Under VHE that is a NOP and resulted in a runtime warning. Replace kvm_ksym_ref with lm_alias. Reported-by: Qian Cai Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-3-dbrazdil@google.com --- arch/arm64/kvm/va_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index d8cc51bd60bf..914732b88c69 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -42,7 +42,7 @@ static void init_hyp_physvirt_offset(void) u64 kern_va, hyp_va; /* Compute the offset from the hyp VA and PA of a random symbol. */ - kern_va = (u64)kvm_ksym_ref(__hyp_text_start); + kern_va = (u64)lm_alias(__hyp_text_start); hyp_va = __early_kern_hyp_va(kern_va); CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; } -- cgit 1.4.1 From c3e181aec96f6ada84df1cb72a72be8970f8b284 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:49 +0000 Subject: KVM: arm64: Skip computing hyp VA layout for VHE Computing the hyp VA layout is redundant when the kernel runs in EL2 and hyp shares its VA mappings. Make calling kvm_compute_layout() conditional on not just CONFIG_KVM but also !is_kernel_in_hyp_mode(). Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-4-dbrazdil@google.com --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 18e9727d3f64..4e585cc892e8 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -434,7 +434,7 @@ static void __init hyp_mode_check(void) "CPU: CPUs started in inconsistent modes"); else pr_info("CPU: All CPU(s) started at EL1\n"); - if (IS_ENABLED(CONFIG_KVM)) + if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) kvm_compute_layout(); } -- cgit 1.4.1 From 61fe0c37af57ac35472a870581a7d0bb5ac2f63a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:50 +0000 Subject: KVM: arm64: Minor cleanup of hyp variables used in host Small cleanup moving declarations of hyp-exported variables to kvm_host.h and using macros to avoid having to refer to them with kvm_nvhe_sym() in host. No functional change intended. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-5-dbrazdil@google.com --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/arm.c | 4 +--- arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 6 +++--- arch/arm64/kvm/va_layout.c | 5 ++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 828d50d40dc2..bce2452b305c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -260,6 +260,12 @@ struct kvm_host_psci_config { extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config) +extern s64 kvm_nvhe_sym(hyp_physvirt_offset); +#define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset) + +extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS]; +#define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map) + struct vcpu_reset_state { unsigned long pc; unsigned long r0; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6a2f4e01b04f..836ca763b91d 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -65,8 +65,6 @@ static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); -extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; - int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -1602,7 +1600,7 @@ static void init_cpu_logical_map(void) * allow any other CPUs from the `possible` set to boot. */ for_each_online_cpu(cpu) - kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); + hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); } static bool init_psci_relay(void) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index cbab0c6246e2..2997aa156d8e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -14,14 +14,14 @@ * Other CPUs should not be allowed to boot because their features were * not checked against the finalized system capabilities. */ -u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; +u64 __ro_after_init hyp_cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; u64 cpu_logical_map(unsigned int cpu) { - if (cpu >= ARRAY_SIZE(__cpu_logical_map)) + if (cpu >= ARRAY_SIZE(hyp_cpu_logical_map)) hyp_panic(); - return __cpu_logical_map[cpu]; + return hyp_cpu_logical_map[cpu]; } unsigned long __hyp_per_cpu_offset(unsigned int cpu) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 914732b88c69..70fcd6a12fe1 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -34,17 +34,16 @@ static u64 __early_kern_hyp_va(u64 addr) } /* - * Store a hyp VA <-> PA offset into a hyp-owned variable. + * Store a hyp VA <-> PA offset into a EL2-owned variable. */ static void init_hyp_physvirt_offset(void) { - extern s64 kvm_nvhe_sym(hyp_physvirt_offset); u64 kern_va, hyp_va; /* Compute the offset from the hyp VA and PA of a random symbol. */ kern_va = (u64)lm_alias(__hyp_text_start); hyp_va = __early_kern_hyp_va(kern_va); - CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; + hyp_physvirt_offset = (s64)__pa(kern_va) - (s64)hyp_va; } /* -- cgit 1.4.1 From e6829e0384a49efe68537298132230bebd8bd1b3 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:51 +0000 Subject: KVM: arm64: Remove unused includes in psci-relay.c Minor cleanup removing unused includes. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-6-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 0d6f4aa39621..1f7237e45148 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -7,11 +7,8 @@ #include #include #include -#include #include #include -#include -#include #include #include -- cgit 1.4.1 From 860a4c3d1e04a3c3e62bacbbba64417bf49768e2 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:52 +0000 Subject: KVM: arm64: Move skip_host_instruction to adjust_pc.h Move function for skipping host instruction in the host trap handler to a header file containing analogical helpers for guests. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-7-dbrazdil@google.com --- arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 9 +++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 12 ++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h index b1f60923a8fe..61716359035d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -59,4 +59,13 @@ static inline void __adjust_pc(struct kvm_vcpu *vcpu) } } +/* + * Skip an instruction while host sysregs are live. + * Assumes host is always 64-bit. + */ +static inline void kvm_skip_host_instr(void) +{ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); +} + #endif diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index bde658d51404..a906f9e2ff34 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -157,11 +157,6 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) __kvm_hyp_host_forward_smc(host_ctxt); } -static void skip_host_instruction(void) -{ - write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); -} - static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { bool handled; @@ -170,11 +165,8 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) if (!handled) default_host_smc_handler(host_ctxt); - /* - * Unlike HVC, the return address of an SMC is the instruction's PC. - * Move the return address past the instruction. - */ - skip_host_instruction(); + /* SMC was trapped, move ELR past the current PC. */ + kvm_skip_host_instr(); } void handle_trap(struct kvm_cpu_context *host_ctxt) -- cgit 1.4.1 From 767c973f2e4a9264a4f159c9fad5ca8acdb9915e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Dec 2020 12:46:41 +0000 Subject: KVM: arm64: Declutter host PSCI 0.1 handling Although there is nothing wrong with the current host PSCI relay implementation, we can clean it up and remove some of the helpers that do not improve the overall readability of the legacy PSCI 0.1 handling. Opportunity is taken to turn the bitmap into a set of booleans, and creative use of preprocessor macros make init and check more concise/readable. Suggested-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 11 ++---- arch/arm64/kvm/arm.c | 12 +++--- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 77 +++++++++--------------------------- 3 files changed, 30 insertions(+), 70 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bce2452b305c..8fcfab0c2567 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -241,11 +241,6 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; -#define KVM_HOST_PSCI_0_1_CPU_SUSPEND BIT(0) -#define KVM_HOST_PSCI_0_1_CPU_ON BIT(1) -#define KVM_HOST_PSCI_0_1_CPU_OFF BIT(2) -#define KVM_HOST_PSCI_0_1_MIGRATE BIT(3) - struct kvm_host_psci_config { /* PSCI version used by host. */ u32 version; @@ -253,8 +248,10 @@ struct kvm_host_psci_config { /* Function IDs used by host if version is v0.1. */ struct psci_0_1_function_ids function_ids_0_1; - /* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */ - unsigned int enabled_functions_0_1; + bool psci_0_1_cpu_suspend_implemented; + bool psci_0_1_cpu_on_implemented; + bool psci_0_1_cpu_off_implemented; + bool psci_0_1_migrate_implemented; }; extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 836ca763b91d..e207e4541f55 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1603,6 +1603,9 @@ static void init_cpu_logical_map(void) hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); } +#define init_psci_0_1_impl_state(config, what) \ + config.psci_0_1_ ## what ## _implemented = psci_ops.what + static bool init_psci_relay(void) { /* @@ -1618,11 +1621,10 @@ static bool init_psci_relay(void) if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); - kvm_host_psci_config.enabled_functions_0_1 = - (psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) | - (psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) | - (psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) | - (psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off); + init_psci_0_1_impl_state(kvm_host_psci_config, migrate); } return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 1f7237e45148..e3947846ffcb 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -43,48 +43,16 @@ struct psci_boot_args { static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT; -static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) -{ - DECLARE_REG(u64, func_id, host_ctxt, 0); - - return func_id; -} - -static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit) -{ - return kvm_host_psci_config.enabled_functions_0_1 & fn_bit; -} - -static inline bool is_psci_0_1_cpu_suspend(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend); -} - -static inline bool is_psci_0_1_cpu_on(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on); -} - -static inline bool is_psci_0_1_cpu_off(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off); -} - -static inline bool is_psci_0_1_migrate(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) && - (func_id == kvm_host_psci_config.function_ids_0_1.migrate); -} +#define is_psci_0_1(what, func_id) \ + (kvm_host_psci_config.psci_0_1_ ## what ## _implemented && \ + (func_id) == kvm_host_psci_config.function_ids_0_1.what) static bool is_psci_0_1_call(u64 func_id) { - return is_psci_0_1_cpu_suspend(func_id) || - is_psci_0_1_cpu_on(func_id) || - is_psci_0_1_cpu_off(func_id) || - is_psci_0_1_migrate(func_id); + return (is_psci_0_1(cpu_suspend, func_id) || + is_psci_0_1(cpu_on, func_id) || + is_psci_0_1(cpu_off, func_id) || + is_psci_0_1(migrate, func_id)); } static bool is_psci_0_2_call(u64 func_id) @@ -94,16 +62,6 @@ static bool is_psci_0_2_call(u64 func_id) (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31)); } -static bool is_psci_call(u64 func_id) -{ - switch (kvm_host_psci_config.version) { - case PSCI_VERSION(0, 1): - return is_psci_0_1_call(func_id); - default: - return is_psci_0_2_call(func_id); - } -} - static unsigned long psci_call(unsigned long fn, unsigned long arg0, unsigned long arg1, unsigned long arg2) { @@ -273,14 +231,14 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id)) + if (is_psci_0_1(cpu_off, func_id) || is_psci_0_1(migrate, func_id)) return psci_forward(host_ctxt); - else if (is_psci_0_1_cpu_on(func_id)) + if (is_psci_0_1(cpu_on, func_id)) return psci_cpu_on(func_id, host_ctxt); - else if (is_psci_0_1_cpu_suspend(func_id)) + if (is_psci_0_1(cpu_suspend, func_id)) return psci_cpu_suspend(func_id, host_ctxt); - else - return PSCI_RET_NOT_SUPPORTED; + + return PSCI_RET_NOT_SUPPORTED; } static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) @@ -322,20 +280,23 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) { - u64 func_id = get_psci_func_id(host_ctxt); + DECLARE_REG(u64, func_id, host_ctxt, 0); unsigned long ret; - if (!is_psci_call(func_id)) - return false; - switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): + if (!is_psci_0_1_call(func_id)) + return false; ret = psci_0_1_handler(func_id, host_ctxt); break; case PSCI_VERSION(0, 2): + if (!is_psci_0_2_call(func_id)) + return false; ret = psci_0_2_handler(func_id, host_ctxt); break; default: + if (!is_psci_0_2_call(func_id)) + return false; ret = psci_1_0_handler(func_id, host_ctxt); break; } -- cgit 1.4.1 From e789ca0cc1d51296832b8424fa4008ce6e9d1703 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:37 +0100 Subject: ext4: combine ext4_handle_error() and save_error_info() save_error_info() is always called together with ext4_handle_error(). Combine them into a single call and move unconditional bits out of save_error_info() into ext4_handle_error(). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4bbfb05aae58..cdf2a377d884 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -592,9 +592,6 @@ static void __save_error_info(struct super_block *sb, int error, { struct ext4_sb_info *sbi = EXT4_SB(sb); - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - if (bdev_read_only(sb->s_bdev)) - return; /* We default to EFSCORRUPTED error... */ if (error == 0) error = EFSCORRUPTED; @@ -647,13 +644,19 @@ static void save_error_info(struct super_block *sb, int error, * used to deal with unrecoverable failures such as journal IO errors or ENOMEM * at a critical moment in log management. */ -static void ext4_handle_error(struct super_block *sb, bool force_ro) +static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { journal_t *journal = EXT4_SB(sb)->s_journal; + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); + if (!bdev_read_only(sb->s_bdev)) + save_error_info(sb, error, ino, block, func, line); + if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) return; @@ -710,8 +713,7 @@ void __ext4_error(struct super_block *sb, const char *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); } - save_error_info(sb, error, 0, block, function, line); - ext4_handle_error(sb, force_ro); + ext4_handle_error(sb, force_ro, error, 0, block, function, line); } void __ext4_error_inode(struct inode *inode, const char *function, @@ -741,9 +743,8 @@ void __ext4_error_inode(struct inode *inode, const char *function, current->comm, &vaf); va_end(args); } - save_error_info(inode->i_sb, error, inode->i_ino, block, - function, line); - ext4_handle_error(inode->i_sb, false); + ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block, + function, line); } void __ext4_error_file(struct file *file, const char *function, @@ -780,9 +781,8 @@ void __ext4_error_file(struct file *file, const char *function, current->comm, path, &vaf); va_end(args); } - save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block, - function, line); - ext4_handle_error(inode->i_sb, false); + ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block, + function, line); } const char *ext4_decode_error(struct super_block *sb, int errno, @@ -849,8 +849,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, sb->s_id, function, line, errstr); } - save_error_info(sb, -errno, 0, 0, function, line); - ext4_handle_error(sb, false); + ext4_handle_error(sb, false, -errno, 0, 0, function, line); } void __ext4_msg(struct super_block *sb, @@ -944,13 +943,14 @@ __acquires(bitlock) if (test_opt(sb, ERRORS_CONT)) { if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - schedule_work(&EXT4_SB(sb)->s_error_work); + if (!bdev_read_only(sb->s_bdev)) + schedule_work(&EXT4_SB(sb)->s_error_work); return; } ext4_unlock_group(sb, grp); - save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - ext4_handle_error(sb, false); + ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the -- cgit 1.4.1 From 4392fbc4bab57db3760f0fb61258cb7089b37665 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:38 +0100 Subject: ext4: drop sync argument of ext4_commit_super() Everybody passes 1 as sync argument of ext4_commit_super(). Just drop it. Reviewed-by: Harshad Shirwadkar Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cdf2a377d884..8bf31003416a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -65,7 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); -static int ext4_commit_super(struct super_block *sb, int sync); +static int ext4_commit_super(struct super_block *sb); static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); static int ext4_clear_journal_err(struct super_block *sb, @@ -621,7 +621,7 @@ static void save_error_info(struct super_block *sb, int error, { __save_error_info(sb, error, ino, block, func, line); if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } /* Deal with the reporting of failure conditions on a filesystem such as @@ -686,7 +686,7 @@ static void flush_stashed_error_work(struct work_struct *work) struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, s_error_work); - ext4_commit_super(sbi->s_sb, 1); + ext4_commit_super(sbi->s_sb); } #define ext4_error_ratelimit(sb) \ @@ -1152,7 +1152,7 @@ static void ext4_put_super(struct super_block *sb) es->s_state = cpu_to_le16(sbi->s_mount_state); } if (!sb_rdonly(sb)) - ext4_commit_super(sb, 1); + ext4_commit_super(sb); rcu_read_lock(); group_desc = rcu_dereference(sbi->s_group_desc); @@ -2642,7 +2642,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); - err = ext4_commit_super(sb, 1); + err = ext4_commit_super(sb); done: if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " @@ -4869,7 +4869,7 @@ no_journal: if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && !ext4_has_feature_encrypt(sb)) { ext4_set_feature_encrypt(sb); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } /* @@ -5424,7 +5424,7 @@ static int ext4_load_journal(struct super_block *sb, es->s_journal_dev = cpu_to_le32(journal_devnum); /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } return 0; @@ -5434,7 +5434,7 @@ err_out: return err; } -static int ext4_commit_super(struct super_block *sb, int sync) +static int ext4_commit_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -5515,8 +5515,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - if (sync) - lock_buffer(sbh); + lock_buffer(sbh); if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5532,16 +5531,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) set_buffer_uptodate(sbh); } mark_buffer_dirty(sbh); - if (sync) { - unlock_buffer(sbh); - error = __sync_dirty_buffer(sbh, - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); - if (buffer_write_io_error(sbh)) { - ext4_msg(sb, KERN_ERR, "I/O error while writing " - "superblock"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } + unlock_buffer(sbh); + error = __sync_dirty_buffer(sbh, + REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); + if (buffer_write_io_error(sbh)) { + ext4_msg(sb, KERN_ERR, "I/O error while writing " + "superblock"); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); } return error; } @@ -5572,7 +5569,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb, if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) { ext4_clear_feature_journal_needs_recovery(sb); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } out: jbd2_journal_unlock_updates(journal); @@ -5614,7 +5611,7 @@ static int ext4_clear_journal_err(struct super_block *sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); jbd2_journal_clear_err(journal); jbd2_journal_update_sb_errno(journal); @@ -5716,7 +5713,7 @@ static int ext4_freeze(struct super_block *sb) ext4_clear_feature_journal_needs_recovery(sb); } - error = ext4_commit_super(sb, 1); + error = ext4_commit_super(sb); out: if (journal) /* we rely on upper layer to stop further updates */ @@ -5738,7 +5735,7 @@ static int ext4_unfreeze(struct super_block *sb) ext4_set_feature_journal_needs_recovery(sb); } - ext4_commit_super(sb, 1); + ext4_commit_super(sb); return 0; } @@ -5998,7 +5995,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) { - err = ext4_commit_super(sb, 1); + err = ext4_commit_super(sb); if (err) goto restore_opts; } -- cgit 1.4.1 From 05c2c00f3769abb9e323fcaca70d2de0b48af7ba Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:39 +0100 Subject: ext4: protect superblock modifications with a buffer lock Protect all superblock modifications (including checksum computation) with a superblock buffer lock. That way we are sure computed checksum matches current superblock contents (a mismatch could cause checksum failures in nojournal mode or if an unjournalled superblock update races with a journalled one). Also we avoid modifying superblock contents while it is being written out (which can cause DIF/DIX failures if we are running in nojournal mode). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-4-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 1 - fs/ext4/file.c | 3 +++ fs/ext4/inode.c | 3 +++ fs/ext4/namei.c | 6 ++++++ fs/ext4/resize.c | 12 ++++++++++++ fs/ext4/super.c | 2 +- fs/ext4/xattr.c | 3 +++ 7 files changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 1a0a827a7f34..c7e410c4ab7d 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -379,7 +379,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; - ext4_superblock_csum_set(sb); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); if (err) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3ed8c048fb12..26907d5835d0 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -809,8 +809,11 @@ static int ext4_sample_last_mounted(struct super_block *sb, err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto out_journal; + lock_buffer(sbi->s_sbh); strlcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); ext4_handle_dirty_super(handle, sb); out_journal: ext4_journal_stop(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 27946882d4ce..27de6f0a33c8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5150,7 +5150,10 @@ static int ext4_do_update_inode(handle_t *handle, err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (err) goto out_brelse; + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_set_feature_large_file(sb); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_sync(handle); err = ext4_handle_dirty_super(handle, sb); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7a890ff214f1..40970a509dcc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2978,7 +2978,10 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) (le32_to_cpu(sbi->s_es->s_inodes_count))) { /* Insert this inode at the head of the on-disk orphan list */ NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); dirty = true; } list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); @@ -3061,7 +3064,10 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; } + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + ext4_superblock_csum_set(inode->i_sb); + unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_super(handle, inode->i_sb); } else { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 928700d57eb6..6155f2b9538c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -899,7 +899,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, EXT4_SB(sb)->s_gdb_count++; ext4_kvfree_array_rcu(o_group_desc); + lock_buffer(EXT4_SB(sb)->s_sbh); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); err = ext4_handle_dirty_super(handle, sb); if (err) ext4_std_error(sb, err); @@ -1384,6 +1387,7 @@ static void ext4_update_super(struct super_block *sb, reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); + lock_buffer(sbi->s_sbh); ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * @@ -1421,6 +1425,8 @@ static void ext4_update_super(struct super_block *sb, * active. */ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + reserved_blocks); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, @@ -1717,8 +1723,11 @@ static int ext4_group_extend_no_check(struct super_block *sb, goto errout; } + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_blocks_count_set(es, o_blocks_count + add); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ @@ -1874,10 +1883,13 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (err) goto errout; + lock_buffer(sbi->s_sbh); ext4_clear_feature_resize_inode(sb); ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_super(handle, sb); if (err) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8bf31003416a..2d653ad14200 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5444,6 +5444,7 @@ static int ext4_commit_super(struct super_block *sb) if (!sbh || block_device_ejected(sb)) return error; + lock_buffer(sbh); /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock @@ -5515,7 +5516,6 @@ static int ext4_commit_super(struct super_block *sb) BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - lock_buffer(sbh); if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4e3b1f8c2e81..1db7ca778d69 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -792,7 +792,10 @@ static void ext4_xattr_update_super_block(handle_t *handle, BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_set_feature_xattr(sb); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_dirty_super(handle, sb); } } -- cgit 1.4.1 From 2d01ddc86606564fb08c56e3bc93a0693895f710 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:40 +0100 Subject: ext4: save error info to sb through journal if available If journalling is still working at the moment we get to writing error information to the superblock we cannot write directly to the superblock as such write could race with journalled update of the superblock and cause journal checksum failures, writing inconsistent information to the journal or other problems. We cannot journal the superblock directly from the error handling functions as we are running in uncertain context and could deadlock so just punt journalled superblock update to a workqueue. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-5-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 101 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2d653ad14200..9a256c558e04 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -65,6 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); +static void ext4_update_super(struct super_block *sb); static int ext4_commit_super(struct super_block *sb); static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); @@ -586,9 +587,9 @@ static int ext4_errno_to_code(int errno) return EXT4_ERR_UNKNOWN; } -static void __save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) +static void save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -615,15 +616,6 @@ static void __save_error_info(struct super_block *sb, int error, spin_unlock(&sbi->s_error_lock); } -static void save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - __save_error_info(sb, error, ino, block, func, line); - if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb); -} - /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -649,20 +641,35 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, const char *func, unsigned int line) { journal_t *journal = EXT4_SB(sb)->s_journal; + bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (!bdev_read_only(sb->s_bdev)) + if (!continue_fs && !sb_rdonly(sb)) { + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + if (journal) + jbd2_journal_abort(journal, -EIO); + } + + if (!bdev_read_only(sb->s_bdev)) { save_error_info(sb, error, ino, block, func, line); + /* + * In case the fs should keep running, we need to writeout + * superblock through the journal. Due to lock ordering + * constraints, it may not be safe to do it right here so we + * defer superblock flushing to a workqueue. + */ + if (continue_fs) + schedule_work(&EXT4_SB(sb)->s_error_work); + else + ext4_commit_super(sb); + } - if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) + if (sb_rdonly(sb) || continue_fs) return; - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (journal) - jbd2_journal_abort(journal, -EIO); /* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already @@ -685,7 +692,38 @@ static void flush_stashed_error_work(struct work_struct *work) { struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, s_error_work); + journal_t *journal = sbi->s_journal; + handle_t *handle; + /* + * If the journal is still running, we have to write out superblock + * through the journal to avoid collisions of other journalled sb + * updates. + * + * We use directly jbd2 functions here to avoid recursing back into + * ext4 error handling code during handling of previous errors. + */ + if (!sb_rdonly(sbi->s_sb) && journal) { + handle = jbd2_journal_start(journal, 1); + if (IS_ERR(handle)) + goto write_directly; + if (jbd2_journal_get_write_access(handle, sbi->s_sbh)) { + jbd2_journal_stop(handle); + goto write_directly; + } + ext4_update_super(sbi->s_sb); + if (jbd2_journal_dirty_metadata(handle, sbi->s_sbh)) { + jbd2_journal_stop(handle); + goto write_directly; + } + jbd2_journal_stop(handle); + return; + } +write_directly: + /* + * Write through journal failed. Write sb directly to get error info + * out and hope for the best. + */ ext4_commit_super(sbi->s_sb); } @@ -944,9 +982,11 @@ __acquires(bitlock) if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - if (!bdev_read_only(sb->s_bdev)) + if (!bdev_read_only(sb->s_bdev)) { + save_error_info(sb, EFSCORRUPTED, ino, block, function, + line); schedule_work(&EXT4_SB(sb)->s_error_work); + } return; } ext4_unlock_group(sb, grp); @@ -5434,15 +5474,12 @@ err_out: return err; } -static int ext4_commit_super(struct super_block *sb) +/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */ +static void ext4_update_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; - int error = 0; - - if (!sbh || block_device_ejected(sb)) - return error; lock_buffer(sbh); /* @@ -5514,8 +5551,20 @@ static int ext4_commit_super(struct super_block *sb) } spin_unlock(&sbi->s_error_lock); - BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); + unlock_buffer(sbh); +} + +static int ext4_commit_super(struct super_block *sb) +{ + struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + int error = 0; + + if (!sbh || block_device_ejected(sb)) + return error; + + ext4_update_super(sb); + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5530,8 +5579,8 @@ static int ext4_commit_super(struct super_block *sb) clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } + BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); - unlock_buffer(sbh); error = __sync_dirty_buffer(sbh, REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); if (buffer_write_io_error(sbh)) { -- cgit 1.4.1 From e92ad03fa53498f12b3f5ecb8822adc3bf815b28 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:41 +0100 Subject: ext4: use sbi instead of EXT4_SB(sb) in ext4_update_super() No behavioral change. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-6-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a256c558e04..0f0db49031dc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5478,8 +5478,8 @@ err_out: static void ext4_update_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + struct ext4_super_block *es = sbi->s_es; + struct buffer_head *sbh = sbi->s_sbh; lock_buffer(sbh); /* @@ -5496,21 +5496,20 @@ static void ext4_update_super(struct super_block *sb) ext4_update_tstamp(es, s_wtime); if (sb->s_bdev->bd_part) es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + cpu_to_le64(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); + sbi->s_sectors_written_start) >> 1)); else - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) + es->s_kbytes_written = cpu_to_le64(sbi->s_kbytes_written); + if (percpu_counter_initialized(&sbi->s_freeclusters_counter)) ext4_free_blocks_count_set(es, - EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeclusters_counter))); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) + EXT4_C2B(sbi, percpu_counter_sum_positive( + &sbi->s_freeclusters_counter))); + if (percpu_counter_initialized(&sbi->s_freeinodes_counter)) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeinodes_counter)); + &sbi->s_freeinodes_counter)); /* Copy error information to the on-disk superblock */ spin_lock(&sbi->s_error_lock); if (sbi->s_add_error_count > 0) { -- cgit 1.4.1 From dfd56c2c0c0dbb11be939b804ddc8d5395ab3432 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:43 +0100 Subject: ext4: fix superblock checksum failure when setting password salt When setting password salt in the superblock, we forget to recompute the superblock checksum so it will not match until the next superblock modification which recomputes the checksum. Fix it. CC: Michael Halcrow Reported-by: Andreas Dilger Fixes: 9bd8212f981e ("ext4 crypto: add encryption policy and password salt support") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-8-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f0381876a7e5..106bf149e8ca 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1157,7 +1157,10 @@ resizefs_out: err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto pwsalt_err_journal; + lock_buffer(sbi->s_sbh); generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); pwsalt_err_journal: -- cgit 1.4.1 From a3f5cf14ff917d46a4d491cf86210fd639d1ff38 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:44 +0100 Subject: ext4: drop ext4_handle_dirty_super() The wrapper is now useless since it does what ext4_handle_dirty_metadata() does. Just remove it. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-9-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 16 ---------------- fs/ext4/ext4_jbd2.h | 5 ----- fs/ext4/file.c | 2 +- fs/ext4/inode.c | 3 ++- fs/ext4/namei.c | 4 ++-- fs/ext4/resize.c | 8 ++++---- fs/ext4/xattr.c | 2 +- 7 files changed, 10 insertions(+), 30 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c7e410c4ab7d..be799040a415 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -372,19 +372,3 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, } return err; } - -int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb) -{ - struct buffer_head *bh = EXT4_SB(sb)->s_sbh; - int err = 0; - - if (ext4_handle_valid(handle)) { - err = jbd2_journal_dirty_metadata(handle, bh); - if (err) - ext4_journal_abort_handle(where, line, __func__, - bh, handle, err); - } else - mark_buffer_dirty(bh); - return err; -} diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a124c68b0c75..0d2fa423b7ad 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -244,9 +244,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh); -int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb); - #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ @@ -257,8 +254,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) -#define ext4_handle_dirty_super(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, int type, int blocks, int rsv_blocks, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 26907d5835d0..1cd3d26e3217 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -814,7 +814,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, sizeof(sbi->s_es->s_last_mounted)); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); out_journal: ext4_journal_stop(handle); out: diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 27de6f0a33c8..c173c8405856 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5155,7 +5155,8 @@ static int ext4_do_update_inode(handle_t *handle, ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_sync(handle); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, + EXT4_SB(sb)->s_sbh); } ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 40970a509dcc..a3b28ef2455a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2988,7 +2988,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) mutex_unlock(&sbi->s_orphan_lock); if (dirty) { - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -3069,7 +3069,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) ext4_superblock_csum_set(inode->i_sb); unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); - err = ext4_handle_dirty_super(handle, inode->i_sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6155f2b9538c..bd0d185654f3 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -903,7 +903,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, le16_add_cpu(&es->s_reserved_gdt_blocks, -1); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); if (err) ext4_std_error(sb, err); return err; @@ -1521,7 +1521,7 @@ static int ext4_flex_group_add(struct super_block *sb, ext4_update_super(sb, flex_gd); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); exit_journal: err2 = ext4_journal_stop(handle); @@ -1734,7 +1734,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); if (err) goto errout; - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); errout: @@ -1891,7 +1891,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); if (err) { ext4_std_error(sb, err); goto errout; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1db7ca778d69..372208500f4e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -796,7 +796,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, ext4_set_feature_xattr(sb); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); } } -- cgit 1.4.1 From 5a3b590d4b2db187faa6f06adc9a53d6199fb1f9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Dec 2020 13:24:15 -0500 Subject: ext4: don't leak old mountpoint samples When the first file is opened, ext4 samples the mountpoint of the filesystem in 64 bytes of the super block. It does so using strlcpy(), this means that the remaining bytes in the super block string buffer are untouched. If the mount point before had a longer path than the current one, it can be reconstructed. Consider the case where the fs was mounted to "/media/johnjdeveloper" and later to "/". The super block buffer then contains "/\x00edia/johnjdeveloper". This case was seen in the wild and caused confusion how the name of a developer ands up on the super block of a filesystem used in production... Fix this by using strncpy() instead of strlcpy(). The superblock field is defined to be a fixed-size char array, and it is already marked using __nonstring in fs/ext4/ext4.h. The consumer of the field in e2fsprogs already assumes that in the case of a 64+ byte mount path, that s_last_mounted will not be NUL terminated. Link: https://lore.kernel.org/r/X9ujIOJG/HqMr88R@mit.edu Reported-by: Richard Weinberger Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1cd3d26e3217..349b27f0dda0 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -810,7 +810,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, if (err) goto out_journal; lock_buffer(sbi->s_sbh); - strlcpy(sbi->s_es->s_last_mounted, cp, + strncpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); -- cgit 1.4.1 From 8b3fd902391fdee526f6ba46899a3f8005983ae1 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 17 Dec 2020 08:15:01 +0100 Subject: MAINTAINERS: include governors into CPU IDLE TIME MANAGEMENT FRAMEWORK The current pattern in the file entry does not make the files in the governors subdirectory to be a part of the CPU IDLE TIME MANAGEMENT FRAMEWORK. Adjust the file pattern to include files in governors. Signed-off-by: Lukas Bulwahn Signed-off-by: Rafael J. Wysocki --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7e76ae7ee8a2..59be280069d1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4583,7 +4583,7 @@ B: https://bugzilla.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git F: Documentation/admin-guide/pm/cpuidle.rst F: Documentation/driver-api/pm/cpuidle.rst -F: drivers/cpuidle/* +F: drivers/cpuidle/ F: include/linux/cpuidle.h CPU POWER MONITORING SUBSYSTEM -- cgit 1.4.1 From 7887cc89d5851cbdec49219e9614beec776af150 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Dec 2020 23:34:13 +0100 Subject: ARM: dts: ux500/golden: Set display max brightness A too high brightness by default (default is max) makes the screen go blank. Set this to 15 as in the Vendor tree. Signed-off-by: Linus Walleij Cc: Stephan Gerhold Link: https://lore.kernel.org/r/20201214223413.253893-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-ux500-samsung-golden.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts index a1093cb37dc7..aed1f2d5f246 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts @@ -326,6 +326,7 @@ panel@0 { compatible = "samsung,s6e63m0"; reg = <0>; + max-brightness = <15>; vdd3-supply = <&panel_reg_3v0>; vci-supply = <&panel_reg_1v8>; reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>; -- cgit 1.4.1 From f87777a3c30cf50c66a20e1d153f0e003bb30774 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 19 Dec 2020 14:50:36 +0100 Subject: net: stmmac: dwmac-meson8b: ignore the second clock input The dwmac glue registers on Amlogic Meson8b and newer SoCs has two clock inputs: - Meson8b and Meson8m2: MPLL2 and MPLL2 (the same parent is wired to both inputs) - GXBB, GXL, GXM, AXG, G12A, G12B, SM1: FCLK_DIV2 and MPLL2 All known vendor kernels and u-boots are using the first input only. We let the common clock framework automatically choose the "right" parent. For some boards this causes a problem though, specificially with G12A and newer SoCs. The clock input is used for generating the 125MHz RGMII TX clock. For the two input clocks this means on G12A: - FCLK_DIV2: 999999985Hz / 8 = 124999998.125Hz - MPLL2: 499999993Hz / 4 = 124999998.25Hz In theory MPLL2 is the "better" clock input because it's gets us 0.125Hz closer to the requested frequency than FCLK_DIV2. In reality however there is a resource conflict because MPLL2 is needed to generate some of the audio clocks. dwmac-meson8b probes first and sets up the clock tree with MPLL2. This works fine until the audio driver comes and "steals" the MPLL2 clocks and configures it with it's own rate (294909637Hz). The common clock framework happily changes the MPLL2 rate but does not reconfigure our RGMII TX clock tree, which then ends up at 73727409Hz, which is more than 40% off the requested 125MHz. Don't use the second clock input for now to force the common clock framework to always select the first parent. This mimics the behavior from the vendor driver and fixes the clock resource conflict with the audio driver on G12A boards. Once the common clock framework can handle this situation this change can be reverted again. Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC") Reported-by: Thomas Graichen Signed-off-by: Martin Blumenstingl Tested-by: thomas graichen Link: https://lore.kernel.org/r/20201219135036.3216017-1-martin.blumenstingl@googlemail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 459ae715b33d..f184b00f5116 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -135,7 +135,7 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) struct device *dev = dwmac->dev; static const struct clk_parent_data mux_parents[] = { { .fw_name = "clkin0", }, - { .fw_name = "clkin1", }, + { .index = -1, }, }; static const struct clk_div_table div_table[] = { { .div = 2, .val = 2, }, -- cgit 1.4.1 From 8b0f64b113d617c995ffdf50196948c3e99c6e49 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 19 Dec 2020 10:55:38 -0800 Subject: MAINTAINERS: remove names from mailing list maintainers When searching for inactive maintainers it's useful to filter out mailing list addresses. Such "maintainers" will obviously never feature in a "From:" line of an email or a review tag. Since "L:" entries only provide the address of a mailing list without a fancy name extend this pattern to "M:" entries. Alternatively we could reserve M: entries for humans only and move the fake "maintainers" to L:. While I'd personally prefer to reserve M: for humans only, I'm not 100% that's a great choice either, given most L: entries are in fact open mailing lists with public archives. Link: https://lore.kernel.org/r/20201219185538.750076-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 28d7acdb0591..20cd4cc08dd1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -203,8 +203,8 @@ F: include/uapi/linux/nl80211.h F: net/wireless/ 8169 10/100/1000 GIGABIT ETHERNET DRIVER -M: Realtek linux nic maintainers M: Heiner Kallweit +M: nic_swsd@realtek.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/realtek/r8169* @@ -2119,7 +2119,7 @@ N: atmel ARM/Microchip Sparx5 SoC support M: Lars Povlsen M: Steen Hegelund -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported T: git git://github.com/microchip-ung/linux-upstream.git @@ -3958,7 +3958,7 @@ F: net/can/ CAN-J1939 NETWORK LAYER M: Robin van der Gracht M: Oleksij Rempel -R: Pengutronix Kernel Team +R: kernel@pengutronix.de L: linux-can@vger.kernel.org S: Maintained F: Documentation/networking/j1939.rst @@ -11651,7 +11651,7 @@ F: drivers/media/platform/atmel/atmel-isi.h MICROCHIP KSZ SERIES ETHERNET SWITCH DRIVER M: Woojung Huh -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml @@ -11661,7 +11661,7 @@ F: net/dsa/tag_ksz.c MICROCHIP LAN743X ETHERNET DRIVER M: Bryan Whitehead -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/microchip/lan743x_* @@ -11755,7 +11755,7 @@ F: drivers/net/wireless/microchip/wilc1000/ MICROSEMI MIPS SOCS M: Alexandre Belloni -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: linux-mips@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/mips/mscc.txt @@ -12809,10 +12809,10 @@ F: tools/objtool/ F: include/linux/objtool.h OCELOT ETHERNET SWITCH DRIVER -M: Microchip Linux Driver Support M: Vladimir Oltean M: Claudiu Manoil M: Alexandre Belloni +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Supported F: drivers/net/dsa/ocelot/* @@ -13874,7 +13874,7 @@ F: drivers/platform/x86/peaq-wmi.c PENSANDO ETHERNET DRIVERS M: Shannon Nelson -M: Pensando Drivers +M: drivers@pensando.io L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/pensando/ionic.rst @@ -14653,7 +14653,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git F: drivers/net/wireless/ath/ath11k/ QUALCOMM ATHEROS ATH9K WIRELESS DRIVER -M: QCA ath9k Development +M: ath9k-devel@qca.qualcomm.com L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath9k @@ -18338,7 +18338,7 @@ F: include/linux/usb/isp116x.h USB LAN78XX ETHERNET DRIVER M: Woojung Huh -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/microchip,lan78xx.txt @@ -18452,7 +18452,7 @@ F: drivers/net/usb/smsc75xx.* USB SMSC95XX ETHERNET DRIVER M: Steve Glendinning -M: Microchip Linux Driver Support +M: UNGLinuxDriver@microchip.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/usb/smsc95xx.* @@ -18999,7 +18999,7 @@ F: drivers/input/mouse/vmmouse.h VMWARE VMXNET3 ETHERNET DRIVER M: Ronak Doshi -M: "VMware, Inc." +M: pv-drivers@vmware.com L: netdev@vger.kernel.org S: Maintained F: drivers/net/vmxnet3/ -- cgit 1.4.1 From a0c8be56affa7d5ffbdec24c992223be54db3b6e Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Sat, 19 Dec 2020 15:39:19 -0600 Subject: ibmvnic: fix login buffer memory leak Commit 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") frees login_rsp_buffer in release_resources() and send_login() because handle_login_rsp() does not free it. Commit f3ae59c0c015 ("ibmvnic: store RX and TX subCRQ handle array in ibmvnic_adapter struct") frees login_rsp_buffer in handle_login_rsp(). It seems unnecessary to free it in release_resources() and send_login(). There are chances that handle_login_rsp returns earlier without freeing buffers. Double-checking the buffer is harmless since release_login_buffer and release_login_rsp_buffer will do nothing if buffer is already freed. Fixes: f3ae59c0c015 ("ibmvnic: store RX and TX subCRQ handle array in ibmvnic_adapter struct") Fixes: 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20201219213919.21045-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index f302504faa8a..bd0281020d51 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -955,6 +955,7 @@ static void release_resources(struct ibmvnic_adapter *adapter) release_rx_pools(adapter); release_napi(adapter); + release_login_buffer(adapter); release_login_rsp_buffer(adapter); } @@ -3873,7 +3874,9 @@ static int send_login(struct ibmvnic_adapter *adapter) return -1; } + release_login_buffer(adapter); release_login_rsp_buffer(adapter); + client_data_len = vnic_client_data_len(adapter); buffer_size = -- cgit 1.4.1 From 58f60329a6be35a5653edb3fd2023ccef9eb9943 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 20 Dec 2020 16:29:30 +0800 Subject: net: ethernet: mvneta: Fix error handling in mvneta_probe When mvneta_port_power_up() fails, we should execute cleanup functions after label err_netdev to avoid memleak. Fixes: 41c2b6b4f0f80 ("net: ethernet: mvneta: Add back interface mode validation") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20201220082930.21623-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 563ceac3060f..3369ec717a51 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -5255,7 +5255,7 @@ static int mvneta_probe(struct platform_device *pdev) err = mvneta_port_power_up(pp, pp->phy_interface); if (err < 0) { dev_err(&pdev->dev, "can't power up port\n"); - return err; + goto err_netdev; } /* Armada3700 network controller does not support per-cpu -- cgit 1.4.1 From 1d898b283576c38dedcb6b21fcbb65968ab03581 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 20 Dec 2020 10:49:47 +0200 Subject: docs: netdev-FAQ: fix question headers formatting Join adjacent questions to a single question line. This fixes the formatting of questions that were not part of the heading. Also, drop Q: and A: prefixes. We don't need them now that questions and answers are visually separated. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/f76078ba5547744f2ec178984c32fbc7dcd29a2b.1608454187.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- Documentation/networking/netdev-FAQ.rst | 126 +++++++++++++++----------------- 1 file changed, 59 insertions(+), 67 deletions(-) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 4b9ed5874d5a..ae2ae37cd921 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -6,9 +6,9 @@ netdev FAQ ========== -Q: What is netdev? ------------------- -A: It is a mailing list for all network-related Linux stuff. This +What is netdev? +--------------- +It is a mailing list for all network-related Linux stuff. This includes anything found under net/ (i.e. core code like IPv6) and drivers/net (i.e. hardware specific drivers) in the Linux source tree. @@ -25,9 +25,9 @@ Aside from subsystems like that mentioned above, all network-related Linux development (i.e. RFC, review, comments, etc.) takes place on netdev. -Q: How do the changes posted to netdev make their way into Linux? ------------------------------------------------------------------ -A: There are always two trees (git repositories) in play. Both are +How do the changes posted to netdev make their way into Linux? +-------------------------------------------------------------- +There are always two trees (git repositories) in play. Both are driven by David Miller, the main network maintainer. There is the ``net`` tree, and the ``net-next`` tree. As you can probably guess from the names, the ``net`` tree is for fixes to existing code already in the @@ -37,9 +37,9 @@ for the future release. You can find the trees here: - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git -Q: How often do changes from these trees make it to the mainline Linus tree? ----------------------------------------------------------------------------- -A: To understand this, you need to know a bit of background information on +How often do changes from these trees make it to the mainline Linus tree? +------------------------------------------------------------------------- +To understand this, you need to know a bit of background information on the cadence of Linux development. Each new release starts off with a two week "merge window" where the main maintainers feed their new stuff to Linus for merging into the mainline tree. After the two weeks, the @@ -81,7 +81,8 @@ focus for ``net`` is on stabilization and bug fixes. Finally, the vX.Y gets released, and the whole cycle starts over. -Q: So where are we now in this cycle? +So where are we now in this cycle? +---------------------------------- Load the mainline (Linus) page here: @@ -91,9 +92,9 @@ and note the top of the "tags" section. If it is rc1, it is early in the dev cycle. If it was tagged rc7 a week ago, then a release is probably imminent. -Q: How do I indicate which tree (net vs. net-next) my patch should be in? -------------------------------------------------------------------------- -A: Firstly, think whether you have a bug fix or new "next-like" content. +How do I indicate which tree (net vs. net-next) my patch should be in? +---------------------------------------------------------------------- +Firstly, think whether you have a bug fix or new "next-like" content. Then once decided, assuming that you use git, use the prefix flag, i.e. :: @@ -105,48 +106,45 @@ in the above is just the subject text of the outgoing e-mail, and you can manually change it yourself with whatever MUA you are comfortable with. -Q: I sent a patch and I'm wondering what happened to it? --------------------------------------------------------- -Q: How can I tell whether it got merged? -A: Start by looking at the main patchworks queue for netdev: +I sent a patch and I'm wondering what happened to it - how can I tell whether it got merged? +-------------------------------------------------------------------------------------------- +Start by looking at the main patchworks queue for netdev: https://patchwork.kernel.org/project/netdevbpf/list/ The "State" field will tell you exactly where things are at with your patch. -Q: The above only says "Under Review". How can I find out more? ----------------------------------------------------------------- -A: Generally speaking, the patches get triaged quickly (in less than +The above only says "Under Review". How can I find out more? +------------------------------------------------------------- +Generally speaking, the patches get triaged quickly (in less than 48h). So be patient. Asking the maintainer for status updates on your patch is a good way to ensure your patch is ignored or pushed to the bottom of the priority list. -Q: I submitted multiple versions of the patch series ----------------------------------------------------- -Q: should I directly update patchwork for the previous versions of these -patch series? -A: No, please don't interfere with the patch status on patchwork, leave +I submitted multiple versions of the patch series. Should I directly update patchwork for the previous versions of these patch series? +-------------------------------------------------------------------------------------------------------------------------------------- +No, please don't interfere with the patch status on patchwork, leave it to the maintainer to figure out what is the most recent and current version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. -Q: I made changes to only a few patches in a patch series should I resend only those changed? ---------------------------------------------------------------------------------------------- -A: No, please resend the entire patch series and make sure you do number your +I made changes to only a few patches in a patch series should I resend only those changed? +------------------------------------------------------------------------------------------ +No, please resend the entire patch series and make sure you do number your patches such that it is clear this is the latest and greatest set of patches that can be applied. -Q: I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? -------------------------------------------------------------------------------------------------------------------------------------------- -A: There is no revert possible, once it is pushed out, it stays like that. +I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? +---------------------------------------------------------------------------------------------------------------------------------------- +There is no revert possible, once it is pushed out, it stays like that. Please send incremental versions on top of what has been merged in order to fix the patches the way they would look like if your latest patch series was to be merged. -Q: How can I tell what patches are queued up for backporting to the various stable releases? --------------------------------------------------------------------------------------------- -A: Normally Greg Kroah-Hartman collects stable commits himself, but for +How can I tell what patches are queued up for backporting to the various stable releases? +----------------------------------------------------------------------------------------- +Normally Greg Kroah-Hartman collects stable commits himself, but for networking, Dave collects up patches he deems critical for the networking subsystem, and then hands them off to Greg. @@ -169,11 +167,9 @@ simply clone the repo, and then git grep the mainline commit ID, e.g. releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch stable/stable-queue$ -Q: I see a network patch and I think it should be backported to stable. ------------------------------------------------------------------------ -Q: Should I request it via stable@vger.kernel.org like the references in -the kernel's Documentation/process/stable-kernel-rules.rst file say? -A: No, not for networking. Check the stable queues as per above first +I see a network patch and I think it should be backported to stable. Should I request it via stable@vger.kernel.org like the references in the kernel's Documentation/process/stable-kernel-rules.rst file say? +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +No, not for networking. Check the stable queues as per above first to see if it is already queued. If not, then send a mail to netdev, listing the upstream commit ID and why you think it should be a stable candidate. @@ -190,11 +186,9 @@ mainline, the better the odds that it is an OK candidate for stable. So scrambling to request a commit be added the day after it appears should be avoided. -Q: I have created a network patch and I think it should be backported to stable. --------------------------------------------------------------------------------- -Q: Should I add a Cc: stable@vger.kernel.org like the references in the -kernel's Documentation/ directory say? -A: No. See above answer. In short, if you think it really belongs in +I have created a network patch and I think it should be backported to stable. Should I add a Cc: stable@vger.kernel.org like the references in the kernel's Documentation/ directory say? +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +No. See above answer. In short, if you think it really belongs in stable, then ensure you write a decent commit log that describes who gets impacted by the bug fix and how it manifests itself, and when the bug was introduced. If you do that properly, then the commit will get @@ -207,18 +201,18 @@ marker line as described in :ref:`Documentation/process/submitting-patches.rst ` to temporarily embed that information into the patch that you send. -Q: Are all networking bug fixes backported to all stable releases? ------------------------------------------------------------------- -A: Due to capacity, Dave could only take care of the backports for the +Are all networking bug fixes backported to all stable releases? +--------------------------------------------------------------- +Due to capacity, Dave could only take care of the backports for the last two stable releases. For earlier stable releases, each stable branch maintainer is supposed to take care of them. If you find any patch is missing from an earlier stable branch, please notify stable@vger.kernel.org with either a commit ID or a formal patch backported, and CC Dave and other relevant networking developers. -Q: Is the comment style convention different for the networking content? ------------------------------------------------------------------------- -A: Yes, in a largely trivial way. Instead of this:: +Is the comment style convention different for the networking content? +--------------------------------------------------------------------- +Yes, in a largely trivial way. Instead of this:: /* * foobar blah blah blah @@ -231,32 +225,30 @@ it is requested that you make it look like this:: * another line of text */ -Q: I am working in existing code that has the former comment style and not the latter. --------------------------------------------------------------------------------------- -Q: Should I submit new code in the former style or the latter? -A: Make it the latter style, so that eventually all code in the domain +I am working in existing code that has the former comment style and not the latter. Should I submit new code in the former style or the latter? +----------------------------------------------------------------------------------------------------------------------------------------------- +Make it the latter style, so that eventually all code in the domain of netdev is of this format. -Q: I found a bug that might have possible security implications or similar. ---------------------------------------------------------------------------- -Q: Should I mail the main netdev maintainer off-list?** -A: No. The current netdev maintainer has consistently requested that +I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list? +--------------------------------------------------------------------------------------------------------------------------- +No. The current netdev maintainer has consistently requested that people use the mailing lists and not reach out directly. If you aren't OK with that, then perhaps consider mailing security@kernel.org or reading about http://oss-security.openwall.org/wiki/mailing-lists/distros as possible alternative mechanisms. -Q: What level of testing is expected before I submit my change? ---------------------------------------------------------------- -A: If your changes are against ``net-next``, the expectation is that you +What level of testing is expected before I submit my change? +------------------------------------------------------------ +If your changes are against ``net-next``, the expectation is that you have tested by layering your changes on top of ``net-next``. Ideally you will have done run-time testing specific to your change, but at a minimum, your changes should survive an ``allyesconfig`` and an ``allmodconfig`` build without new warnings or failures. -Q: How do I post corresponding changes to user space components? ----------------------------------------------------------------- -A: User space code exercising kernel features should be posted +How do I post corresponding changes to user space components? +------------------------------------------------------------- +User space code exercising kernel features should be posted alongside kernel patches. This gives reviewers a chance to see how any new interface is used and how well it works. @@ -280,9 +272,9 @@ to the mailing list, e.g.:: Posting as one thread is discouraged because it confuses patchwork (as of patchwork 2.2.2). -Q: Any other tips to help ensure my net/net-next patch gets OK'd? ------------------------------------------------------------------ -A: Attention to detail. Re-read your own work as if you were the +Any other tips to help ensure my net/net-next patch gets OK'd? +-------------------------------------------------------------- +Attention to detail. Re-read your own work as if you were the reviewer. You can start with using ``checkpatch.pl``, perhaps even with the ``--strict`` flag. But do not be mindlessly robotic in doing so. If your change is a bug fix, make sure your commit log indicates the -- cgit 1.4.1 From 5d5647dad259bb416fd5d3d87012760386d97530 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 21 Dec 2020 06:55:30 -0800 Subject: qede: fix offload for IPIP tunnel packets IPIP tunnels packets are unknown to device, hence these packets are incorrectly parsed and caused the packet corruption, so disable offlods for such packets at run time. Signed-off-by: Manish Chopra Signed-off-by: Sudarsana Kalluru Signed-off-by: Igor Russkikh Link: https://lore.kernel.org/r/20201221145530.7771-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qede/qede_fp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index a2494bf85007..ca0ee29a57b5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1799,6 +1799,11 @@ netdev_features_t qede_features_check(struct sk_buff *skb, ntohs(udp_hdr(skb)->dest) != gnv_port)) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } else if (l4_proto == IPPROTO_IPIP) { + /* IPIP tunnels are unknown to the device or at least unsupported natively, + * offloads for them can't be done trivially, so disable them for such skb. + */ + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); } } -- cgit 1.4.1 From 3557ae187c32203d1bb8b48ee1e2e7bdb23d98d5 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:53 +0000 Subject: KVM: Documentation: Add arm64 KVM_RUN error codes The API documentation states that general error codes are not detailed, but errors with specific meanings are. On arm64, KVM_RUN can return error numbers with a different meaning than what is described by POSIX or the C99 standard (as taken from man 3 errno). Absent from the newly documented error codes is ERANGE which can be returned when making a change to the EL2 stage 1 tables if the address is larger than the largest supported input address. Assuming no bugs in the implementation, that is not possible because the input addresses which are mapped are the result of applying the macro kern_hyp_va() on kernel virtual addresses. CC: Paolo Bonzini Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-2-alexandru.elisei@arm.com --- Documentation/virt/kvm/api.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e00a66d72372..4e5316ed10e9 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -380,9 +380,14 @@ This ioctl is obsolete and has been removed. Errors: - ===== ============================= + ======= ============================================================== EINTR an unmasked signal is pending - ===== ============================= + ENOEXEC the vcpu hasn't been initialized or the guest tried to execute + instructions from device memory (arm64) + ENOSYS data abort outside memslots with no syndrome info and + KVM_CAP_ARM_NISV_TO_USER not enabled (arm64) + EPERM SVE feature set but not finalized (arm64) + ======= ============================================================== This ioctl is used to run a guest virtual cpu. While there are no explicit parameters, there is an implicit parameter block that can be -- cgit 1.4.1 From f16570ba47ff2b3766ebeaba6f4b80ad48cfd6a1 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:54 +0000 Subject: KVM: arm64: arch_timer: Remove VGIC initialization check kvm_timer_enable() is called in kvm_vcpu_first_run_init() after kvm_vgic_map_resources() if the VGIC wasn't ready. kvm_vgic_map_resources() is the only place where kvm->arch.vgic.ready is set to true. For a v2 VGIC, kvm_vgic_map_resources() will attempt to initialize the VGIC and set the initialized flag. For a v3 VGIC, kvm_vgic_map_resources() will return an error code if the VGIC isn't already initialized. The end result is that if we've reached kvm_timer_enable(), the VGIC is initialzed and ready and vgic_initialized() will always be true, so remove this check. Signed-off-by: Alexandru Elisei Reviewed-by: Eric Auger [maz: added comment about vgic initialisation, as suggested by Eric] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-3-alexandru.elisei@arm.com --- arch/arm64/kvm/arch_timer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 32ba6fbc3814..74e0699661e9 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1129,9 +1129,10 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) goto no_vgic; - if (!vgic_initialized(vcpu->kvm)) - return -ENODEV; - + /* + * At this stage, we have the guarantee that the vgic is both + * available and initialized. + */ if (!timer_irqs_are_valid(vcpu)) { kvm_debug("incorrectly configured timer irqs\n"); return -EINVAL; -- cgit 1.4.1 From 1c91f06d296de4f0c27022f5ec464e047d471215 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:55 +0000 Subject: KVM: arm64: Move double-checked lock to kvm_vgic_map_resources() kvm_vgic_map_resources() is called when a VCPU if first run and it maps all the VGIC MMIO regions. To prevent double-initialization, the VGIC uses the ready variable to keep track of the state of resources and the global KVM mutex to protect against concurrent accesses. After the lock is taken, the variable is checked again in case another VCPU took the lock between the current VCPU reading ready equals false and taking the lock. The double-checked lock pattern is spread across four different functions: in kvm_vcpu_first_run_init(), in kvm_vgic_map_resource() and in vgic_{v2,v3}_map_resources(), which makes it hard to reason about and introduces minor code duplication. Consolidate the checks in kvm_vgic_map_resources(), where the lock is taken. No functional change intended. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-4-alexandru.elisei@arm.com --- arch/arm64/kvm/arm.c | 8 +++----- arch/arm64/kvm/vgic/vgic-init.c | 6 ++++++ arch/arm64/kvm/vgic/vgic-v2.c | 3 --- arch/arm64/kvm/vgic/vgic-v3.c | 3 --- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e207e4541f55..ab782c480e9a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -580,11 +580,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the * first time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { - ret = kvm_vgic_map_resources(kvm); - if (ret) - return ret; - } + ret = kvm_vgic_map_resources(kvm); + if (ret) + return ret; } else { /* * Tell the rest of the code that there are userspace irqchip diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 32e32d67a127..a2f4d1c85f00 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -428,7 +428,13 @@ int kvm_vgic_map_resources(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; + if (likely(vgic_ready(kvm))) + return 0; + mutex_lock(&kvm->lock); + if (vgic_ready(kvm)) + goto out; + if (!irqchip_in_kernel(kvm)) goto out; diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index ebf53a4e1296..7f38c1a93639 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -306,9 +306,6 @@ int vgic_v2_map_resources(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; - if (vgic_ready(kvm)) - goto out; - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 9cdf39a94a63..35029c5cb0f1 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -500,9 +500,6 @@ int vgic_v3_map_resources(struct kvm *kvm) int ret = 0; int c; - if (vgic_ready(kvm)) - goto out; - kvm_for_each_vcpu(c, vcpu, kvm) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; -- cgit 1.4.1 From de33212f768c5d9e2fe791b008cb26f92f0aa31c Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Tue, 22 Dec 2020 21:54:21 -0500 Subject: virtio_net: Fix recursive call to cpus_read_lock() virtnet_set_channels can recursively call cpus_read_lock if CONFIG_XPS and CONFIG_HOTPLUG are enabled. The path is: virtnet_set_channels - calls get_online_cpus(), which is a trivial wrapper around cpus_read_lock() netif_set_real_num_tx_queues netif_reset_xps_queues_gt netif_reset_xps_queues - calls cpus_read_lock() This call chain and potential deadlock happens when the number of TX queues is reduced. This commit the removes netif_set_real_num_[tr]x_queues calls from inside the get/put_online_cpus section, as they don't require that it be held. Fixes: 47be24796c13 ("virtio-net: fix the set affinity bug when CPU IDs are not consecutive") Signed-off-by: Jeff Dike Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20201223025421.671-1-jdike@akamai.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 052975ea0af4..ee611d107294 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2093,14 +2093,16 @@ static int virtnet_set_channels(struct net_device *dev, get_online_cpus(); err = _virtnet_set_queues(vi, queue_pairs); - if (!err) { - netif_set_real_num_tx_queues(dev, queue_pairs); - netif_set_real_num_rx_queues(dev, queue_pairs); - - virtnet_set_affinity(vi); + if (err) { + put_online_cpus(); + goto err; } + virtnet_set_affinity(vi); put_online_cpus(); + netif_set_real_num_tx_queues(dev, queue_pairs); + netif_set_real_num_rx_queues(dev, queue_pairs); + err: return err; } -- cgit 1.4.1 From b250bf5f924f7b42725fc9e4135aa0b667dfb119 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 09:16:13 -0600 Subject: net: ipa: fix interconnect enable bug When the core clock rate and interconnect bandwidth specifications were moved into configuration data, a copy/paste bug was introduced, causing the memory interconnect bandwidth to be set three times rather than enabling the three different interconnects. Fix this bug. Fixes: 91d02f9551501 ("net: ipa: use config data for clocking") Signed-off-by: Alex Elder Reviewed-by: Georgi Djakov Link: https://lore.kernel.org/r/20201222151613.5730-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 9dcf16f399b7..135c393437f1 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -115,13 +115,13 @@ static int ipa_interconnect_enable(struct ipa *ipa) return ret; data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(clock->memory_path, data->average_rate, + ret = icc_set_bw(clock->imem_path, data->average_rate, data->peak_rate); if (ret) goto err_memory_path_disable; data = &clock->interconnect_data[IPA_INTERCONNECT_CONFIG]; - ret = icc_set_bw(clock->memory_path, data->average_rate, + ret = icc_set_bw(clock->config_path, data->average_rate, data->peak_rate); if (ret) goto err_imem_path_disable; -- cgit 1.4.1 From 8450e23f142f629e40bd67afc8375c86c7fbf8f1 Mon Sep 17 00:00:00 2001 From: Noor Azura Ahmad Tarmizi Date: Wed, 23 Dec 2020 00:03:37 +0800 Subject: stmmac: intel: Add PCI IDs for TGL-H platform Add TGL-H PCI info and PCI IDs for the new TSN Controller to the list of supported devices. Signed-off-by: Noor Azura Ahmad Tarmizi Signed-off-by: Voon Weifeng Signed-off-by: Muhammad Husaini Zulkifli Link: https://lore.kernel.org/r/20201222160337.30870-1-muhammad.husaini.zulkifli@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index a2e80c89de2d..9a6a519426a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -721,6 +721,8 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, #define PCI_DEVICE_ID_INTEL_EHL_PSE1_RGMII1G_ID 0x4bb0 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII1G_ID 0x4bb1 #define PCI_DEVICE_ID_INTEL_EHL_PSE1_SGMII2G5_ID 0x4bb2 +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0_ID 0x43ac +#define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1_ID 0x43a2 #define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID 0xa0ac static const struct pci_device_id intel_eth_pci_id_table[] = { @@ -735,6 +737,8 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); -- cgit 1.4.1 From 94ad8f3ac6aff5acde3f6c4719997efc61e0dccf Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 12:00:10 -0600 Subject: net: ipa: clear pending interrupts before enabling We enable the completion interrupt for channel or event ring commands only when we issue them. The interrupt is disabled after the interrupt has fired, or after we have timed out waiting for it. If we time out, the command could complete after the interrupt has been disabled, causing a state change in the channel or event ring. The interrupt associated with that state change would be delivered the next time the completion interrupt is enabled. To avoid previous command completions interfering with new commands, clear all pending completion interrupts before re-enabling them for a new command. Fixes: b4175f8731f78 ("net: ipa: only enable GSI event control IRQs when needed") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index c4795249719d..4aee60d62ab0 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -340,7 +340,13 @@ static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id, * is issued here. Only permit *this* event ring to trigger * an interrupt, and only enable the event control IRQ type * when we expect it to occur. + * + * There's a small chance that a previous command completed + * after the interrupt was disabled, so make sure we have no + * pending interrupts before we enable them. */ + iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET); + val = BIT(evt_ring_id); iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); gsi_irq_type_enable(gsi, GSI_EV_CTRL); @@ -453,7 +459,13 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) * issued here. So we only permit *this* channel to trigger * an interrupt and only enable the channel control IRQ type * when we expect it to occur. + * + * There's a small chance that a previous command completed + * after the interrupt was disabled, so make sure we have no + * pending interrupts before we enable them. */ + iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET); + val = BIT(channel_id); iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); gsi_irq_type_enable(gsi, GSI_CH_CTRL); -- cgit 1.4.1 From 6ffddf3b3d182d886d754cfafdf909ccb14f464b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 12:00:11 -0600 Subject: net: ipa: use state to determine channel command success The result of issuing a channel control command should be that the channel changes state. If enabled, a completion interrupt signals that the channel state has changed. This interrupt is enabled by gsi_channel_command() and disabled again after the command has completed (or we time out). There is a window of time--after the completion interrupt is disabled but before the channel state is read--during which the command could complete successfully without interrupting. This would cause the channel to transition to the desired new state. So whether a channel command ends via completion interrupt or timeout, we can consider the command successful if the channel has entered the desired state (and a failure if it has not, regardless of the cause). Fixes: d6c9e3f506ae8 ("net: ipa: only enable generic command completion IRQ when needed"); Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 4aee60d62ab0..4f0e79176423 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -505,15 +505,15 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) ret = gsi_channel_command(channel, GSI_CH_ALLOCATE); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) { - dev_err(dev, "channel %u bad state %u after alloc\n", - channel_id, state); - ret = -EIO; - } + if (state == GSI_CHANNEL_STATE_ALLOCATED) + return 0; - return ret; + dev_err(dev, "channel %u bad state %u after alloc\n", + channel_id, state); + + return -EIO; } /* Start an ALLOCATED channel */ @@ -533,15 +533,15 @@ static int gsi_channel_start_command(struct gsi_channel *channel) ret = gsi_channel_command(channel, GSI_CH_START); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_STARTED) { - dev_err(dev, "channel %u bad state %u after start\n", - gsi_channel_id(channel), state); - ret = -EIO; - } + if (state == GSI_CHANNEL_STATE_STARTED) + return 0; - return ret; + dev_err(dev, "channel %u bad state %u after start\n", + gsi_channel_id(channel), state); + + return -EIO; } /* Stop a GSI channel in STARTED state */ @@ -568,10 +568,10 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) ret = gsi_channel_command(channel, GSI_CH_STOP); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (ret || state == GSI_CHANNEL_STATE_STOPPED) - return ret; + if (state == GSI_CHANNEL_STATE_STOPPED) + return 0; /* We may have to try again if stop is in progress */ if (state == GSI_CHANNEL_STATE_STOP_IN_PROC) @@ -604,9 +604,9 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) ret = gsi_channel_command(channel, GSI_CH_RESET); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) + if (state != GSI_CHANNEL_STATE_ALLOCATED) dev_err(dev, "channel %u bad state %u after reset\n", gsi_channel_id(channel), state); } @@ -628,9 +628,10 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) ret = gsi_channel_command(channel, GSI_CH_DE_ALLOC); - /* Channel state will normally have been updated */ + /* If successful the channel state will have changed */ state = gsi_channel_state(channel); - if (!ret && state != GSI_CHANNEL_STATE_NOT_ALLOCATED) + + if (state != GSI_CHANNEL_STATE_NOT_ALLOCATED) dev_err(dev, "channel %u bad state %u after dealloc\n", channel_id, state); } -- cgit 1.4.1 From 428b448ee764a264b7a2eeed295b282755114aa7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 22 Dec 2020 12:00:12 -0600 Subject: net: ipa: use state to determine event ring command success This patch implements the same basic fix for event rings as the previous one does for channels. The result of issuing an event ring control command should be that the event ring changes state. If enabled, a completion interrupt signals that the event ring state has changed. This interrupt is enabled by gsi_evt_ring_command() and disabled again after the command has completed (or we time out). There is a window of time during which the command could complete successfully without interrupting. This would cause the event ring to transition to the desired new state. So whether a event ring command ends via completion interrupt or timeout, we can consider the command successful if the event ring has entered the desired state (and a failure if it has not, regardless of the cause). Fixes: b4175f8731f78 ("net: ipa: only enable GSI event control IRQs when needed") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 4f0e79176423..579cc3e51677 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -384,13 +384,15 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); - if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) { - dev_err(gsi->dev, "event ring %u bad state %u after alloc\n", - evt_ring_id, evt_ring->state); - ret = -EIO; - } - return ret; + /* If successful the event ring state will have changed */ + if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) + return 0; + + dev_err(gsi->dev, "event ring %u bad state %u after alloc\n", + evt_ring_id, evt_ring->state); + + return -EIO; } /* Reset a GSI event ring in ALLOCATED or ERROR state. */ @@ -408,9 +410,13 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); - if (!ret && evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) - dev_err(gsi->dev, "event ring %u bad state %u after reset\n", - evt_ring_id, evt_ring->state); + + /* If successful the event ring state will have changed */ + if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) + return; + + dev_err(gsi->dev, "event ring %u bad state %u after reset\n", + evt_ring_id, evt_ring->state); } /* Issue a hardware de-allocation request for an allocated event ring */ @@ -426,9 +432,13 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) } ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); - if (!ret && evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) - dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n", - evt_ring_id, evt_ring->state); + + /* If successful the event ring state will have changed */ + if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED) + return; + + dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n", + evt_ring_id, evt_ring->state); } /* Fetch the current state of a channel from hardware */ -- cgit 1.4.1 From 826f328e2b7e8854dd42ea44e6519cd75018e7b1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 22 Dec 2020 22:49:44 +0100 Subject: net: dcb: Validate netlink message in DCB handler DCB uses the same handler function for both RTM_GETDCB and RTM_SETDCB messages. dcb_doit() bounces RTM_SETDCB mesasges if the user does not have the CAP_NET_ADMIN capability. However, the operation to be performed is not decided from the DCB message type, but from the DCB command. Thus DCB_CMD_*_GET commands are used for reading DCB objects, the corresponding SET and DEL commands are used for manipulation. The assumption is that set-like commands will be sent via an RTM_SETDCB message, and get-like ones via RTM_GETDCB. However, this assumption is not enforced. It is therefore possible to manipulate DCB objects without CAP_NET_ADMIN capability by sending the corresponding command in an RTM_GETDCB message. That is a bug. Fix it by validating the type of the request message against the type used for the response. Fixes: 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver") Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/a2a9b88418f3a58ef211b718f2970128ef9e3793.1608673640.git.me@pmachata.org Signed-off-by: Jakub Kicinski --- net/dcb/dcbnl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 084e159a12ba..7d49b6fd6cef 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1765,6 +1765,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, fn = &reply_funcs[dcb->cmd]; if (!fn->cb) return -EOPNOTSUPP; + if (fn->type != nlh->nlmsg_type) + return -EPERM; if (!tb[DCB_ATTR_IFNAME]) return -EINVAL; -- cgit 1.4.1 From 427c940558560bff2583d07fc119a21094675982 Mon Sep 17 00:00:00 2001 From: John Wang Date: Wed, 23 Dec 2020 13:55:23 +0800 Subject: net/ncsi: Use real net-device for response handler When aggregating ncsi interfaces and dedicated interfaces to bond interfaces, the ncsi response handler will use the wrong net device to find ncsi_dev, so that the ncsi interface will not work properly. Here, we use the original net device to fix it. Fixes: 138635cc27c9 ("net/ncsi: NCSI response packet handler") Signed-off-by: John Wang Link: https://lore.kernel.org/r/20201223055523.2069-1-wangzhiqiang.bj@bytedance.com Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-rsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 5b1f4ec66dd9..888ccc2d4e34 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1120,7 +1120,7 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, int payload, i, ret; /* Find the NCSI device */ - nd = ncsi_find_dev(dev); + nd = ncsi_find_dev(orig_dev); ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; if (!ndp) return -ENODEV; -- cgit 1.4.1 From 5d41f9b7ee7a5a5138894f58846a4ffed601498a Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 23 Dec 2020 19:06:12 +0800 Subject: net: ethernet: Fix memleak in ethoc_probe When mdiobus_register() fails, priv->mdio allocated by mdiobus_alloc() has not been freed, which leads to memleak. Fixes: e7f4dc3536a4 ("mdio: Move allocation of interrupts into core") Signed-off-by: Dinghao Liu Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20201223110615.31389-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ethoc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index 0981fe9652e5..3d9b0b161e24 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1211,7 +1211,7 @@ static int ethoc_probe(struct platform_device *pdev) ret = mdiobus_register(priv->mdio); if (ret) { dev_err(&netdev->dev, "failed to register MDIO bus\n"); - goto free2; + goto free3; } ret = ethoc_mdio_probe(netdev); @@ -1243,6 +1243,7 @@ error2: netif_napi_del(&priv->napi); error: mdiobus_unregister(priv->mdio); +free3: mdiobus_free(priv->mdio); free2: clk_disable_unprepare(priv->clk); -- cgit 1.4.1 From 1f45dc22066797479072978feeada0852502e180 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 23 Dec 2020 14:49:04 -0600 Subject: ibmvnic: continue fatal error reset after passive init Commit f9c6cea0b385 ("ibmvnic: Skip fatal error reset after passive init") says "If the passive CRQ initialization occurs before the FATAL reset task is processed, the FATAL error reset task would try to access a CRQ message queue that was freed, causing an oops. The problem may be most likely to occur during DLPAR add vNIC with a non-default MTU, because the DLPAR process will automatically issue a change MTU request. Fix this by not processing fatal error reset if CRQ is passively initialized after client-driven CRQ initialization fails." The original commit skips a specific reset condition, but that does not fix the problem it claims to fix, and misses a reset condition. The effective fix is commit 0e435befaea4 ("ibmvnic: fix NULL pointer dereference in ibmvic_reset_crq") and commit a0faaa27c716 ("ibmvnic: fix NULL pointer dereference in reset_sub_crq_queues"). With above two fixes, there are no more crashes seen as described even without the original commit, so I would like to revert the original commit. Fixes: f9c6cea0b385 ("ibmvnic: Skip fatal error reset after passive init") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20201223204904.12677-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bd0281020d51..91ebcde30292 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2342,8 +2342,7 @@ static void __ibmvnic_reset(struct work_struct *work) set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } - } else if (!(rwi->reset_reason == VNIC_RESET_FATAL && - adapter->from_passive_init)) { + } else { rc = do_reset(adapter, rwi, reset_state); } kfree(rwi); -- cgit 1.4.1 From 808e0d8832cc81738f3e8df12dff0688352baf50 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:32 -0600 Subject: e1000e: Only run S0ix flows if shutdown succeeded If the shutdown failed, the part will be thawed and running S0ix flows will put it into an undefined state. Reported-by: Alexander Duyck Reviewed-by: Alexander Duyck Signed-off-by: Mario Limonciello Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 128ab6898070..6588f5d4a2be 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6970,13 +6970,14 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) e1000e_pm_freeze(dev); rc = __e1000_shutdown(pdev, false); - if (rc) + if (rc) { e1000e_pm_thaw(dev); - - /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) - e1000e_s0ix_entry_flow(adapter); + } else { + /* Introduce S0ix implementation */ + if (hw->mac.type >= e1000_pch_cnp && + !e1000e_check_me(hw->adapter->pdev->device)) + e1000e_s0ix_entry_flow(adapter); + } return rc; } -- cgit 1.4.1 From 3cf31b1a9effd859bb3d6ff9f8b5b0d5e6cac952 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:33 -0600 Subject: e1000e: bump up timeout to wait when ME un-configures ULP mode Per guidance from Intel ethernet architecture team, it may take up to 1 second for unconfiguring ULP mode. However in practice this seems to be taking up to 2 seconds on some Lenovo machines. Detect scenarios that take more than 1 second but less than 2.5 seconds and emit a warning on resume for those scenarios. Suggested-by: Aaron Ma Suggested-by: Sasha Netfin Suggested-by: Hans de Goede CC: Mark Pearson Fixes: f15bb6dde738cc8fa0 ("e1000e: Add support for S0ix") BugLink: https://bugs.launchpad.net/bugs/1865570 Link: https://patchwork.ozlabs.org/project/intel-wired-lan/patch/20200323191639.48826-1-aaron.ma@canonical.com/ Link: https://lkml.org/lkml/2020/12/13/15 Link: https://lkml.org/lkml/2020/12/14/708 Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 9aa6fad8ed47..6fb46682b058 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1240,6 +1240,9 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) return 0; if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + struct e1000_adapter *adapter = hw->adapter; + bool firmware_bug = false; + if (force) { /* Request ME un-configure ULP mode in the PHY */ mac_reg = er32(H2ME); @@ -1248,16 +1251,24 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) ew32(H2ME, mac_reg); } - /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ + /* Poll up to 2.5 seconds for ME to clear ULP_CFG_DONE. + * If this takes more than 1 second, show a warning indicating a + * firmware bug + */ while (er32(FWSM) & E1000_FWSM_ULP_CFG_DONE) { - if (i++ == 30) { + if (i++ == 250) { ret_val = -E1000_ERR_PHY; goto out; } + if (i > 100 && !firmware_bug) + firmware_bug = true; usleep_range(10000, 11000); } - e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); + if (firmware_bug) + e_warn("ULP_CONFIG_DONE took %dmsec. This is a firmware bug\n", i * 10); + else + e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); if (force) { mac_reg = er32(H2ME); -- cgit 1.4.1 From 6cecf02e77ab9bf97e9252f9fcb8f0738a6de12c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:34 -0600 Subject: Revert "e1000e: disable s0ix entry and exit flows for ME systems" commit e086ba2fccda ("e1000e: disable s0ix entry and exit flows for ME systems") disabled s0ix flows for systems that have various incarnations of the i219-LM ethernet controller. This changed caused power consumption regressions on the following shipping Dell Comet Lake based laptops: * Latitude 5310 * Latitude 5410 * Latitude 5410 * Latitude 5510 * Precision 3550 * Latitude 5411 * Latitude 5511 * Precision 3551 * Precision 7550 * Precision 7750 This commit was introduced because of some regressions on certain Thinkpad laptops. This comment was potentially caused by an earlier commit 632fbd5eb5b0e ("e1000e: fix S0ix flows for cable connected case"). or it was possibly caused by a system not meeting platform architectural requirements for low power consumption. Other changes made in the driver with extended timeouts are expected to make the driver more impervious to platform firmware behavior. Fixes: e086ba2fccda ("e1000e: disable s0ix entry and exit flows for ME systems") Reviewed-by: Alexander Duyck Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 45 ++---------------------------- 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 6588f5d4a2be..b9800ba2006c 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -103,45 +103,6 @@ static const struct e1000_reg_info e1000_reg_info_tbl[] = { {0, NULL} }; -struct e1000e_me_supported { - u16 device_id; /* supported device ID */ -}; - -static const struct e1000e_me_supported me_supported[] = { - {E1000_DEV_ID_PCH_LPT_I217_LM}, - {E1000_DEV_ID_PCH_LPTLP_I218_LM}, - {E1000_DEV_ID_PCH_I218_LM2}, - {E1000_DEV_ID_PCH_I218_LM3}, - {E1000_DEV_ID_PCH_SPT_I219_LM}, - {E1000_DEV_ID_PCH_SPT_I219_LM2}, - {E1000_DEV_ID_PCH_LBG_I219_LM3}, - {E1000_DEV_ID_PCH_SPT_I219_LM4}, - {E1000_DEV_ID_PCH_SPT_I219_LM5}, - {E1000_DEV_ID_PCH_CNP_I219_LM6}, - {E1000_DEV_ID_PCH_CNP_I219_LM7}, - {E1000_DEV_ID_PCH_ICP_I219_LM8}, - {E1000_DEV_ID_PCH_ICP_I219_LM9}, - {E1000_DEV_ID_PCH_CMP_I219_LM10}, - {E1000_DEV_ID_PCH_CMP_I219_LM11}, - {E1000_DEV_ID_PCH_CMP_I219_LM12}, - {E1000_DEV_ID_PCH_TGP_I219_LM13}, - {E1000_DEV_ID_PCH_TGP_I219_LM14}, - {E1000_DEV_ID_PCH_TGP_I219_LM15}, - {0} -}; - -static bool e1000e_check_me(u16 device_id) -{ - struct e1000e_me_supported *id; - - for (id = (struct e1000e_me_supported *)me_supported; - id->device_id; id++) - if (device_id == id->device_id) - return true; - - return false; -} - /** * __ew32_prepare - prepare to write to MAC CSR register on certain parts * @hw: pointer to the HW structure @@ -6974,8 +6935,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) e1000e_pm_thaw(dev); } else { /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) + if (hw->mac.type >= e1000_pch_cnp) e1000e_s0ix_entry_flow(adapter); } @@ -6991,8 +6951,7 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) int rc; /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp && - !e1000e_check_me(hw->adapter->pdev->device)) + if (hw->mac.type >= e1000_pch_cnp) e1000e_s0ix_exit_flow(adapter); rc = __e1000_resume(pdev); -- cgit 1.4.1 From 3c98cbf22a96c1b12f48c1b2a4680dfe5cb280f9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 14 Dec 2020 13:29:35 -0600 Subject: e1000e: Export S0ix flags to ethtool This flag can be used by an end user to disable S0ix flows on a buggy system or by an OEM for development purposes. If you need this flag to be persisted across reboots, it's suggested to use a udev rule to call adjust it until the kernel could have your configuration in a disallow list. Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Tested-by: Yijun Shen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/e1000.h | 1 + drivers/net/ethernet/intel/e1000e/ethtool.c | 46 +++++++++++++++++++++++++++++ drivers/net/ethernet/intel/e1000e/netdev.c | 9 +++--- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index ba7a0f8f6937..5b2143f4b1f8 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -436,6 +436,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); #define FLAG2_DFLT_CRC_STRIPPING BIT(12) #define FLAG2_CHECK_RX_HWTSTAMP BIT(13) #define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14) +#define FLAG2_ENABLE_S0IX_FLOWS BIT(15) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 03215b0aee4b..06442e6bef73 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -23,6 +23,13 @@ struct e1000_stats { int stat_offset; }; +static const char e1000e_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define E1000E_PRIV_FLAGS_S0IX_ENABLED BIT(0) + "s0ix-enabled", +}; + +#define E1000E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(e1000e_priv_flags_strings) + #define E1000_STAT(str, m) { \ .stat_string = str, \ .type = E1000_STATS, \ @@ -1776,6 +1783,8 @@ static int e1000e_get_sset_count(struct net_device __always_unused *netdev, return E1000_TEST_LEN; case ETH_SS_STATS: return E1000_STATS_LEN; + case ETH_SS_PRIV_FLAGS: + return E1000E_PRIV_FLAGS_STR_LEN; default: return -EOPNOTSUPP; } @@ -2097,6 +2106,10 @@ static void e1000_get_strings(struct net_device __always_unused *netdev, p += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, e1000e_priv_flags_strings, + E1000E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); + break; } } @@ -2305,6 +2318,37 @@ static int e1000e_get_ts_info(struct net_device *netdev, return 0; } +static u32 e1000e_get_priv_flags(struct net_device *netdev) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) + priv_flags |= E1000E_PRIV_FLAGS_S0IX_ENABLED; + + return priv_flags; +} + +static int e1000e_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct e1000_adapter *adapter = netdev_priv(netdev); + unsigned int flags2 = adapter->flags2; + + flags2 &= ~FLAG2_ENABLE_S0IX_FLOWS; + if (priv_flags & E1000E_PRIV_FLAGS_S0IX_ENABLED) { + struct e1000_hw *hw = &adapter->hw; + + if (hw->mac.type < e1000_pch_cnp) + return -EINVAL; + flags2 |= FLAG2_ENABLE_S0IX_FLOWS; + } + + if (flags2 != adapter->flags2) + adapter->flags2 = flags2; + + return 0; +} + static const struct ethtool_ops e1000_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS, .get_drvinfo = e1000_get_drvinfo, @@ -2336,6 +2380,8 @@ static const struct ethtool_ops e1000_ethtool_ops = { .set_eee = e1000e_set_eee, .get_link_ksettings = e1000_get_link_ksettings, .set_link_ksettings = e1000_set_link_ksettings, + .get_priv_flags = e1000e_get_priv_flags, + .set_priv_flags = e1000e_set_priv_flags, }; void e1000e_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index b9800ba2006c..e9b82c209c2d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6923,7 +6923,6 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; int rc; e1000e_flush_lpic(pdev); @@ -6935,7 +6934,7 @@ static __maybe_unused int e1000e_pm_suspend(struct device *dev) e1000e_pm_thaw(dev); } else { /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp) + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) e1000e_s0ix_entry_flow(adapter); } @@ -6947,11 +6946,10 @@ static __maybe_unused int e1000e_pm_resume(struct device *dev) struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); struct e1000_adapter *adapter = netdev_priv(netdev); struct pci_dev *pdev = to_pci_dev(dev); - struct e1000_hw *hw = &adapter->hw; int rc; /* Introduce S0ix implementation */ - if (hw->mac.type >= e1000_pch_cnp) + if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS) e1000e_s0ix_exit_flow(adapter); rc = __e1000_resume(pdev); @@ -7615,6 +7613,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!(adapter->flags & FLAG_HAS_AMT)) e1000e_get_hw_control(adapter); + if (hw->mac.type >= e1000_pch_cnp) + adapter->flags2 |= FLAG2_ENABLE_S0IX_FLOWS; + strlcpy(netdev->name, "eth%d", sizeof(netdev->name)); err = register_netdev(netdev); if (err) -- cgit 1.4.1 From 11b844b0b7c7c3dc8e8f4d0bbaad5e798351862c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 23 Dec 2020 12:06:52 -0800 Subject: selftests/bpf: Work-around EBUSY errors from hashmap update/delete 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") introduced a possibility of getting EBUSY error on lock contention, which seems to happen very deterministically in test_maps when running 1024 threads on low-CPU machine. In libbpf CI case, it's a 2 CPU VM and it's hitting this 100% of the time. Work around by retrying on EBUSY (and EAGAIN, while we are at it) after a small sleep. sched_yield() is too agressive and fails even after 20 retries, so I went with usleep(1) for backoff. Also log actual error returned to make it easier to see what's going on. Fixes: 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201223200652.3417075-1-andrii@kernel.org --- tools/testing/selftests/bpf/test_maps.c | 48 ++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 0ad3e6305ff0..51adc42b2b40 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1312,22 +1312,58 @@ static void test_map_stress(void) #define DO_UPDATE 1 #define DO_DELETE 0 +#define MAP_RETRIES 20 + +static int map_update_retriable(int map_fd, const void *key, const void *value, + int flags, int attempts) +{ + while (bpf_map_update_elem(map_fd, key, value, flags)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + +static int map_delete_retriable(int map_fd, const void *key, int attempts) +{ + while (bpf_map_delete_elem(map_fd, key)) { + if (!attempts || (errno != EAGAIN && errno != EBUSY)) + return -errno; + + usleep(1); + attempts--; + } + + return 0; +} + static void test_update_delete(unsigned int fn, void *data) { int do_update = ((int *)data)[1]; int fd = ((int *)data)[0]; - int i, key, value; + int i, key, value, err; for (i = fn; i < MAP_SIZE; i += TASKS) { key = value = i; if (do_update) { - assert(bpf_map_update_elem(fd, &key, &value, - BPF_NOEXIST) == 0); - assert(bpf_map_update_elem(fd, &key, &value, - BPF_EXIST) == 0); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } else { - assert(bpf_map_delete_elem(fd, &key) == 0); + err = map_delete_retriable(fd, &key, MAP_RETRIES); + if (err) + printf("error %d %d\n", err, errno); + assert(err == 0); } } } -- cgit 1.4.1 From 69ca310f34168eae0ada434796bfc22fb4a0fa26 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 18 Dec 2020 10:50:30 -0800 Subject: bpf: Save correct stopping point in file seq iteration On some systems, some variant of the following splat is repeatedly seen. The common factor in all traces seems to be the entry point to task_file_seq_next(). With the patch, all warnings go away. rcu: INFO: rcu_sched self-detected stall on CPU rcu: \x0926-....: (20992 ticks this GP) idle=d7e/1/0x4000000000000002 softirq=81556231/81556231 fqs=4876 \x09(t=21033 jiffies g=159148529 q=223125) NMI backtrace for cpu 26 CPU: 26 PID: 2015853 Comm: bpftool Kdump: loaded Not tainted 5.6.13-0_fbk4_3876_gd8d1f9bf80bb #1 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A12 10/08/2018 Call Trace: dump_stack+0x50/0x70 nmi_cpu_backtrace.cold.6+0x13/0x50 ? lapic_can_unplug_cpu.cold.30+0x40/0x40 nmi_trigger_cpumask_backtrace+0xba/0xca rcu_dump_cpu_stacks+0x99/0xc7 rcu_sched_clock_irq.cold.90+0x1b4/0x3aa ? tick_sched_do_timer+0x60/0x60 update_process_times+0x24/0x50 tick_sched_timer+0x37/0x70 __hrtimer_run_queues+0xfe/0x270 hrtimer_interrupt+0xf4/0x210 smp_apic_timer_interrupt+0x5e/0x120 apic_timer_interrupt+0xf/0x20 RIP: 0010:get_pid_task+0x38/0x80 Code: 89 f6 48 8d 44 f7 08 48 8b 00 48 85 c0 74 2b 48 83 c6 55 48 c1 e6 04 48 29 f0 74 19 48 8d 78 20 ba 01 00 00 00 f0 0f c1 50 20 <85> d2 74 27 78 11 83 c2 01 78 0c 48 83 c4 08 c3 31 c0 48 83 c4 08 RSP: 0018:ffffc9000d293dc8 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13 RAX: ffff888637c05600 RBX: ffffc9000d293e0c RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000550 RDI: ffff888637c05620 RBP: ffffffff8284eb80 R08: ffff88831341d300 R09: ffff88822ffd8248 R10: ffff88822ffd82d0 R11: 00000000003a93c0 R12: 0000000000000001 R13: 00000000ffffffff R14: ffff88831341d300 R15: 0000000000000000 ? find_ge_pid+0x1b/0x20 task_seq_get_next+0x52/0xc0 task_file_seq_get_next+0x159/0x220 task_file_seq_next+0x4f/0xa0 bpf_seq_read+0x159/0x390 vfs_read+0x8a/0x140 ksys_read+0x59/0xd0 do_syscall_64+0x42/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f95ae73e76e Code: Bad RIP value. RSP: 002b:00007ffc02c1dbf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000000000170faa0 RCX: 00007f95ae73e76e RDX: 0000000000001000 RSI: 00007ffc02c1dc30 RDI: 0000000000000007 RBP: 00007ffc02c1ec70 R08: 0000000000000005 R09: 0000000000000006 R10: fffffffffffff20b R11: 0000000000000246 R12: 00000000019112a0 R13: 0000000000000000 R14: 0000000000000007 R15: 00000000004283c0 If unable to obtain the file structure for the current task, proceed to the next task number after the one returned from task_seq_get_next(), instead of the next task number from the original iterator. Also, save the stopping task number from task_seq_get_next() on failure in case of restarts. Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets") Signed-off-by: Jonathan Lemon Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201218185032.2464558-2-jonathan.lemon@gmail.com --- kernel/bpf/task_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 0458a40edf10..8033ab19138a 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -158,13 +158,14 @@ again: if (!curr_task) { info->task = NULL; info->files = NULL; + info->tid = curr_tid; return NULL; } curr_files = get_files_struct(curr_task); if (!curr_files) { put_task_struct(curr_task); - curr_tid = ++(info->tid); + curr_tid = curr_tid + 1; info->fd = 0; goto again; } -- cgit 1.4.1 From a61daaf351da7c8493f2586437617d60c24350b0 Mon Sep 17 00:00:00 2001 From: Jonathan Lemon Date: Fri, 18 Dec 2020 10:50:31 -0800 Subject: bpf: Use thread_group_leader() Instead of directly comparing task->tgid and task->pid, use the thread_group_leader() helper. This helps with readability, and there should be no functional change. Signed-off-by: Jonathan Lemon Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20201218185032.2464558-3-jonathan.lemon@gmail.com --- kernel/bpf/task_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 8033ab19138a..dc4007f1843b 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -37,7 +37,7 @@ retry: if (!task) { ++*tid; goto retry; - } else if (skip_if_dup_files && task->tgid != task->pid && + } else if (skip_if_dup_files && !thread_group_leader(task) && task->files == task->group_leader->files) { put_task_struct(task); task = NULL; -- cgit 1.4.1 From e13a6915a03ffc3ce332d28c141a335e25187fa3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 23 Dec 2020 15:36:38 +0100 Subject: vhost/vsock: add IOTLB API support This patch enables the IOTLB API support for vhost-vsock devices, allowing the userspace to emulate an IOMMU for the guest. These changes were made following vhost-net, in details this patch: - exposes VIRTIO_F_ACCESS_PLATFORM feature and inits the iotlb device if the feature is acked - implements VHOST_GET_BACKEND_FEATURES and VHOST_SET_BACKEND_FEATURES ioctls - calls vq_meta_prefetch() before vq processing to prefetch vq metadata address in IOTLB - provides .read_iter, .write_iter, and .poll callbacks for the chardev; they are used by the userspace to exchange IOTLB messages This patch was tested specifying "intel_iommu=strict" in the guest kernel command line. I used QEMU with a patch applied [1] to fix a simple issue (that patch was merged in QEMU v5.2.0): $ qemu -M q35,accel=kvm,kernel-irqchip=split \ -drive file=fedora.qcow2,format=qcow2,if=virtio \ -device intel-iommu,intremap=on,device-iotlb=on \ -device vhost-vsock-pci,guest-cid=3,iommu_platform=on,ats=on [1] https://lists.gnu.org/archive/html/qemu-devel/2020-10/msg09077.html Reviewed-by: Stefan Hajnoczi Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201223143638.123417-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vsock.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a483cec31d5c..5e78fb719602 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -30,7 +30,12 @@ #define VHOST_VSOCK_PKT_WEIGHT 256 enum { - VHOST_VSOCK_FEATURES = VHOST_FEATURES, + VHOST_VSOCK_FEATURES = VHOST_FEATURES | + (1ULL << VIRTIO_F_ACCESS_PLATFORM) +}; + +enum { + VHOST_VSOCK_BACKEND_FEATURES = (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) }; /* Used to track all the vhost_vsock instances on the system. */ @@ -94,6 +99,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, if (!vhost_vq_get_backend(vq)) goto out; + if (!vq_meta_prefetch(vq)) + goto out; + /* Avoid further vmexits, we're already processing the virtqueue */ vhost_disable_notify(&vsock->dev, vq); @@ -449,6 +457,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) if (!vhost_vq_get_backend(vq)) goto out; + if (!vq_meta_prefetch(vq)) + goto out; + vhost_disable_notify(&vsock->dev, vq); do { u32 len; @@ -766,8 +777,12 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) mutex_lock(&vsock->dev.mutex); if ((features & (1 << VHOST_F_LOG_ALL)) && !vhost_log_access_ok(&vsock->dev)) { - mutex_unlock(&vsock->dev.mutex); - return -EFAULT; + goto err; + } + + if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) { + if (vhost_init_device_iotlb(&vsock->dev, true)) + goto err; } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { @@ -778,6 +793,10 @@ static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features) } mutex_unlock(&vsock->dev.mutex); return 0; + +err: + mutex_unlock(&vsock->dev.mutex); + return -EFAULT; } static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, @@ -811,6 +830,18 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (copy_from_user(&features, argp, sizeof(features))) return -EFAULT; return vhost_vsock_set_features(vsock, features); + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VSOCK_BACKEND_FEATURES; + if (copy_to_user(argp, &features, sizeof(features))) + return -EFAULT; + return 0; + case VHOST_SET_BACKEND_FEATURES: + if (copy_from_user(&features, argp, sizeof(features))) + return -EFAULT; + if (features & ~VHOST_VSOCK_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&vsock->dev, features); + return 0; default: mutex_lock(&vsock->dev.mutex); r = vhost_dev_ioctl(&vsock->dev, ioctl, argp); @@ -823,6 +854,34 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, } } +static ssize_t vhost_vsock_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + int noblock = file->f_flags & O_NONBLOCK; + + return vhost_chr_read_iter(dev, to, noblock); +} + +static ssize_t vhost_vsock_chr_write_iter(struct kiocb *iocb, + struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_write_iter(dev, from); +} + +static __poll_t vhost_vsock_chr_poll(struct file *file, poll_table *wait) +{ + struct vhost_vsock *vsock = file->private_data; + struct vhost_dev *dev = &vsock->dev; + + return vhost_chr_poll(file, dev, wait); +} + static const struct file_operations vhost_vsock_fops = { .owner = THIS_MODULE, .open = vhost_vsock_dev_open, @@ -830,6 +889,9 @@ static const struct file_operations vhost_vsock_fops = { .llseek = noop_llseek, .unlocked_ioctl = vhost_vsock_dev_ioctl, .compat_ioctl = compat_ptr_ioctl, + .read_iter = vhost_vsock_chr_read_iter, + .write_iter = vhost_vsock_chr_write_iter, + .poll = vhost_vsock_chr_poll, }; static struct miscdevice vhost_vsock_misc = { -- cgit 1.4.1 From 6cb56218ad9e580e519dcd23bfb3db08d8692e5a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Dec 2020 23:23:56 +0100 Subject: netfilter: xt_RATEEST: reject non-null terminated string from userspace syzbot reports: detected buffer overflow in strlen [..] Call Trace: strlen include/linux/string.h:325 [inline] strlcpy include/linux/string.h:348 [inline] xt_rateest_tg_checkentry+0x2a5/0x6b0 net/netfilter/xt_RATEEST.c:143 strlcpy assumes src is a c-string. Check info->name before its used. Reported-by: syzbot+e86f7c428c8c50db65b4@syzkaller.appspotmail.com Fixes: 5859034d7eb8793 ("[NETFILTER]: x_tables: add RATEEST target") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_RATEEST.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 37253d399c6b..0d5c422f8745 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -115,6 +115,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) } cfg; int ret; + if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name)) + return -ENAMETOOLONG; + net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); mutex_lock(&xn->hash_lock); -- cgit 1.4.1 From 9e5c23b9bd71d00b07720b2a8037b019d356e9df Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:56 +0000 Subject: KVM: arm64: Update comment in kvm_vgic_map_resources() vgic_v3_map_resources() returns -EBUSY if the VGIC isn't initialized, update the comment to kvm_vgic_map_resources() to match what the function does. Signed-off-by: Alexandru Elisei Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-5-alexandru.elisei@arm.com --- arch/arm64/kvm/vgic/vgic-init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index a2f4d1c85f00..5b54787a9ad5 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -419,7 +419,8 @@ int vgic_lazy_init(struct kvm *kvm) * Map the MMIO regions depending on the VGIC model exposed to the guest * called on the first VCPU run. * Also map the virtual CPU interface into the VM. - * v2/v3 derivatives call vgic_init if not already done. + * v2 calls vgic_init() if not already done. + * v3 and derivatives return an error if the VGIC is not initialized. * vgic_ready() returns true if this function has succeeded. * @kvm: kvm struct pointer */ -- cgit 1.4.1 From 282ff80135717cc43f1e33ddd4b0cd9e760d060b Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:57 +0000 Subject: KVM: arm64: Remove redundant call to kvm_pmu_vcpu_reset() KVM_ARM_VCPU_INIT ioctl calls kvm_reset_vcpu(), which in turn resets the PMU with a call to kvm_pmu_vcpu_reset(). The function zeroes the PMU chained counters bitmap and stops all the counters with a perf event attached. Because it is called before the VCPU has had the chance to run, no perf events are in use and none are released. kvm_arm_pmu_v3_enable(), called by kvm_vcpu_first_run_init() only if the VCPU has been initialized, also resets the PMU. kvm_pmu_vcpu_reset() in this case does the exact same thing as the previous call, so remove it. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-6-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 398f6df1bbe4..4ad66a532e38 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -850,8 +850,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - kvm_pmu_vcpu_reset(vcpu); - return 0; } -- cgit 1.4.1 From 101068b566ef227b605d807aad9e72efd8b6bc5b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Dec 2020 14:28:34 +0000 Subject: KVM: arm64: Consolidate dist->ready setting into kvm_vgic_map_resources() dist->ready setting is pointlessly spread across the two vgic backends, while it could be consolidated in kvm_vgic_map_resources(). Move it there, and slightly simplify the flows in both backends. Suggested-by: Eric Auger Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 2 ++ arch/arm64/kvm/vgic/vgic-v2.c | 17 ++++++----------- arch/arm64/kvm/vgic/vgic-v3.c | 18 ++++++------------ 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 5b54787a9ad5..052917deb149 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -446,6 +446,8 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (ret) __kvm_vgic_destroy(kvm); + else + dist->ready = true; out: mutex_unlock(&kvm->lock); diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 7f38c1a93639..11934c2af2f4 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -309,14 +309,12 @@ int vgic_v2_map_resources(struct kvm *kvm) if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { kvm_err("VGIC CPU and dist frames overlap\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } /* @@ -326,13 +324,13 @@ int vgic_v2_map_resources(struct kvm *kvm) ret = vgic_init(kvm); if (ret) { kvm_err("Unable to initialize VGIC dynamic data structures\n"); - goto out; + return ret; } ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); if (ret) { kvm_err("Unable to register VGIC MMIO regions\n"); - goto out; + return ret; } if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { @@ -341,14 +339,11 @@ int vgic_v2_map_resources(struct kvm *kvm) KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + return ret; } } - dist->ready = true; - -out: - return ret; + return 0; } DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 35029c5cb0f1..52915b342351 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -505,21 +505,18 @@ int vgic_v3_map_resources(struct kvm *kvm) if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { kvm_debug("vcpu %d redistributor base not set\n", c); - ret = -ENXIO; - goto out; + return -ENXIO; } } if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { kvm_err("Need to set vgic distributor addresses first\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } if (!vgic_v3_check_base(kvm)) { kvm_err("VGIC redist and dist frames overlap\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } /* @@ -527,22 +524,19 @@ int vgic_v3_map_resources(struct kvm *kvm) * the VGIC before we need to use it. */ if (!vgic_initialized(kvm)) { - ret = -EBUSY; - goto out; + return -EBUSY; } ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); if (ret) { kvm_err("Unable to register VGICv3 dist MMIO regions\n"); - goto out; + return ret; } if (kvm_vgic_global_state.has_gicv4_1) vgic_v4_configure_vsgis(kvm); - dist->ready = true; -out: - return ret; + return 0; } DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); -- cgit 1.4.1 From 976509bb310b913d30577f15b58bdd30effb0542 Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Thu, 24 Dec 2020 18:49:27 +0800 Subject: opp: fix memory leak in _allocate_opp_table In function _allocate_opp_table, opp_dev is allocated and referenced by opp_table via _add_opp_dev. But in the case that the subsequent calls return -EPROBE_DEFER, it will jump to err label and opp_table will be freed. Then opp_dev becomes an unreferenced object to cause memory leak. So let's call _remove_opp_dev to do the cleanup. This fixes the following kmemleak report: unreferenced object 0xffff000801524a00 (size 128): comm "swapper/0", pid 1, jiffies 4294892465 (age 84.616s) hex dump (first 32 bytes): 40 00 56 01 08 00 ff ff 40 00 56 01 08 00 ff ff @.V.....@.V..... b8 52 77 7f 08 00 ff ff 00 3c 4c 00 08 00 ff ff .Rw......] kmemleak_alloc+0x30/0x40 [<0000000056da48f0>] kmem_cache_alloc+0x3d4/0x588 [<00000000a84b3b0e>] _add_opp_dev+0x2c/0x88 [<0000000062a380cd>] _add_opp_table_indexed+0x124/0x268 [<000000008b4c8f1f>] dev_pm_opp_of_add_table+0x20/0x1d8 [<00000000e5316798>] dev_pm_opp_of_cpumask_add_table+0x48/0xf0 [<00000000db0a8ec2>] dt_cpufreq_probe+0x20c/0x448 [<0000000030a3a26c>] platform_probe+0x68/0xd8 [<00000000c618e78d>] really_probe+0xd0/0x3a0 [<00000000642e856f>] driver_probe_device+0x58/0xb8 [<00000000f10f5307>] device_driver_attach+0x74/0x80 [<0000000004f254b8>] __driver_attach+0x58/0xe0 [<0000000009d5d19e>] bus_for_each_dev+0x70/0xc8 [<0000000000d22e1c>] driver_attach+0x24/0x30 [<0000000001d4e952>] bus_add_driver+0x14c/0x1f0 [<0000000089928aaa>] driver_register+0x64/0x120 Cc: v5.10 # v5.10 Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER") Signed-off-by: Quanyang Wang [ Viresh: Added the stable tag ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 4268eb359915..c9e50836b4c2 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1092,7 +1092,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) if (IS_ERR(opp_table->clk)) { ret = PTR_ERR(opp_table->clk); if (ret == -EPROBE_DEFER) - goto err; + goto remove_opp_dev; dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret); } @@ -1101,7 +1101,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); if (ret) { if (ret == -EPROBE_DEFER) - goto err; + goto remove_opp_dev; dev_warn(dev, "%s: Error finding interconnect paths: %d\n", __func__, ret); @@ -1113,6 +1113,8 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) return opp_table; +remove_opp_dev: + _remove_opp_dev(opp_dev, opp_table); err: kfree(opp_table); return ERR_PTR(ret); -- cgit 1.4.1 From 0e1d9ca1766f5d95fb881f57b6c4a1ffa63d4648 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 28 Dec 2020 10:51:04 +0530 Subject: opp: Call the missing clk_put() on error Fix the clock reference counting by calling the missing clk_put() in the error path. Cc: v5.10 # v5.10 Fixes: dd461cd9183f ("opp: Allow dev_pm_opp_get_opp_table() to return -EPROBE_DEFER") Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index c9e50836b4c2..8c905aabacc0 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1101,7 +1101,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); if (ret) { if (ret == -EPROBE_DEFER) - goto remove_opp_dev; + goto put_clk; dev_warn(dev, "%s: Error finding interconnect paths: %d\n", __func__, ret); @@ -1113,6 +1113,9 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) return opp_table; +put_clk: + if (!IS_ERR(opp_table->clk)) + clk_put(opp_table->clk); remove_opp_dev: _remove_opp_dev(opp_dev, opp_table); err: -- cgit 1.4.1 From 105b5ca9b1e38a8db8446a493ca062eea98171eb Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 30 Nov 2020 16:36:30 +0200 Subject: habanalabs: Fix a missing-braces warning Fix a compilation "missing braces around initializer" warning. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 32e6af1db4e3..a9927a06c5ea 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_pll_frequency_info freq_info = {0}; + struct hl_pll_frequency_info freq_info = {}; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; int rc; -- cgit 1.4.1 From 429f1571e8f0b14ec42b8fb14efcfc0576b2788f Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Tue, 1 Dec 2020 18:44:11 +0200 Subject: habanalabs: add comment for pll frequency ioctl opcode Forgot to add the comment for the opcode when it was added. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 8c15a7d336a0..dc8bcec195cc 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -279,6 +279,7 @@ enum hl_device_status { * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption + * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 -- cgit 1.4.1 From 4783489951b78525a6e61b43936cbbd88b7938af Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Thu, 26 Nov 2020 13:05:20 +0200 Subject: habanalabs: fetch PSOC PLL frequency from F/W in goya When the F/W security is enabled, goya needs to fetch the PSOC pll frequency through a dedicated interface Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 61 +++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3e5eb9e3d7bd..b66fd55accb5 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure) static void goya_fetch_psoc_frequency(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 trace_freq = 0; - u32 pll_clk = 0; - u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); - u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); - u32 nr = RREG32(mmPSOC_PCI_PLL_NR); - u32 nf = RREG32(mmPSOC_PCI_PLL_NF); - u32 od = RREG32(mmPSOC_PCI_PLL_OD); - - if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { - if (div_sel == DIV_SEL_REF_CLK) - trace_freq = PLL_REF_CLK; - else - trace_freq = PLL_REF_CLK / (div_fctr + 1); - } else if (div_sel == DIV_SEL_PLL_CLK || - div_sel == DIV_SEL_DIVIDED_PLL) { - pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1)); - if (div_sel == DIV_SEL_PLL_CLK) - trace_freq = pll_clk; - else - trace_freq = pll_clk / (div_fctr + 1); + u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; + int rc; + + if (hdev->asic_prop.fw_security_disabled) { + div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); + div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); + nr = RREG32(mmPSOC_PCI_PLL_NR); + nf = RREG32(mmPSOC_PCI_PLL_NF); + od = RREG32(mmPSOC_PCI_PLL_OD); + + if (div_sel == DIV_SEL_REF_CLK || + div_sel == DIV_SEL_DIVIDED_REF) { + if (div_sel == DIV_SEL_REF_CLK) + freq = PLL_REF_CLK; + else + freq = PLL_REF_CLK / (div_fctr + 1); + } else if (div_sel == DIV_SEL_PLL_CLK || + div_sel == DIV_SEL_DIVIDED_PLL) { + pll_clk = PLL_REF_CLK * (nf + 1) / + ((nr + 1) * (od + 1)); + if (div_sel == DIV_SEL_PLL_CLK) + freq = pll_clk; + else + freq = pll_clk / (div_fctr + 1); + } else { + dev_warn(hdev->dev, + "Received invalid div select value: %d", + div_sel); + freq = 0; + } } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", div_sel); + rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr); + + if (rc) + return; + + freq = pll_freq_arr[1]; } - prop->psoc_timestamp_frequency = trace_freq; + prop->psoc_timestamp_frequency = freq; prop->psoc_pci_pll_nr = nr; prop->psoc_pci_pll_nf = nf; prop->psoc_pci_pll_od = od; -- cgit 1.4.1 From 6585489e808d9964dbde9dad89ac8e792e1185fc Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Thu, 19 Nov 2020 16:34:19 +0200 Subject: habanalabs: remove generic gaudi get_pll_freq function As we only fetch the CPU_PLL frequency in gaudi, we don't need a generic get_pll_frequency function which takes a pll index as input Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 126 +++++++--------------- drivers/misc/habanalabs/gaudi/gaudiP.h | 7 -- 3 files changed, 41 insertions(+), 94 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index a9927a06c5ea..a0c0d20f6f8f 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_pll_frequency_info freq_info = {}; + struct hl_pll_frequency_info freq_info = { {0} }; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; int rc; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 1f1926607c5e..278c4de98e22 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) }; -static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = { - [CPU_PLL] = mmPSOC_CPU_PLL_NR, - [PCI_PLL] = mmPSOC_PCI_PLL_NR, - [SRAM_PLL] = mmSRAM_W_PLL_NR, - [HBM_PLL] = mmPSOC_HBM_PLL_NR, - [NIC_PLL] = mmNIC0_PLL_NR, - [DMA_PLL] = mmDMA_W_PLL_NR, - [MESH_PLL] = mmMESH_W_PLL_NR, - [MME_PLL] = mmPSOC_MME_PLL_NR, - [TPC_PLL] = mmPSOC_TPC_PLL_NR, - [IF_PLL] = mmIF_W_PLL_NR -}; - static inline bool validate_packet_id(enum packet_id id) { switch (id) { @@ -703,93 +690,60 @@ static int gaudi_early_fini(struct hl_device *hdev) } /** - * gaudi_fetch_pll_frequency - Fetch PLL frequency values + * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values * * @hdev: pointer to hl_device structure - * @pll_index: index of the pll to fetch frequency from - * @pll_freq: pointer to store the pll frequency in MHz in each of the available - * outputs. if a certain output is not available a 0 will be set * */ -static int gaudi_fetch_pll_frequency(struct hl_device *hdev, - enum gaudi_pll_index pll_index, - u16 *pll_freq_arr) +static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) { - u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel, - pll_base_addr = gaudi_pll_base_addresses[pll_index]; - u16 freq = 0; - int i, rc; - - if (hdev->asic_prop.fw_security_status_valid && - (hdev->asic_prop.fw_app_security_map & - CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { - rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr); + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; + int rc; - if (rc) - return rc; - } else if (hdev->asic_prop.fw_security_disabled) { + if (hdev->asic_prop.fw_security_disabled) { /* Backward compatibility */ - nr = RREG32(pll_base_addr + PLL_NR_OFFSET); - nf = RREG32(pll_base_addr + PLL_NF_OFFSET); - od = RREG32(pll_base_addr + PLL_OD_OFFSET); - - for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) { - div_fctr = RREG32(pll_base_addr + - PLL_DIV_FACTOR_0_OFFSET + i * 4); - div_sel = RREG32(pll_base_addr + - PLL_DIV_SEL_0_OFFSET + i * 4); + div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); + div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); + nr = RREG32(mmPSOC_CPU_PLL_NR); + nf = RREG32(mmPSOC_CPU_PLL_NF); + od = RREG32(mmPSOC_CPU_PLL_OD); - if (div_sel == DIV_SEL_REF_CLK || + if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { - if (div_sel == DIV_SEL_REF_CLK) - freq = PLL_REF_CLK; - else - freq = PLL_REF_CLK / (div_fctr + 1); - } else if (div_sel == DIV_SEL_PLL_CLK || - div_sel == DIV_SEL_DIVIDED_PLL) { - pll_clk = PLL_REF_CLK * (nf + 1) / - ((nr + 1) * (od + 1)); - if (div_sel == DIV_SEL_PLL_CLK) - freq = pll_clk; - else - freq = pll_clk / (div_fctr + 1); - } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", - div_sel); - } - - pll_freq_arr[i] = freq; + if (div_sel == DIV_SEL_REF_CLK) + freq = PLL_REF_CLK; + else + freq = PLL_REF_CLK / (div_fctr + 1); + } else if (div_sel == DIV_SEL_PLL_CLK || + div_sel == DIV_SEL_DIVIDED_PLL) { + pll_clk = PLL_REF_CLK * (nf + 1) / + ((nr + 1) * (od + 1)); + if (div_sel == DIV_SEL_PLL_CLK) + freq = pll_clk; + else + freq = pll_clk / (div_fctr + 1); + } else { + dev_warn(hdev->dev, + "Received invalid div select value: %d", + div_sel); + freq = 0; } } else { - dev_err(hdev->dev, "Failed to fetch PLL frequency values\n"); - return -EIO; - } - - return 0; -} + rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr); -/** - * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values - * - * @hdev: pointer to hl_device structure - * - */ -static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u16 pll_freq[HL_PLL_NUM_OUTPUTS]; - int rc; + if (rc) + return rc; - rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq); - if (rc) - return rc; + freq = pll_freq_arr[2]; + } - prop->psoc_timestamp_frequency = pll_freq[2]; - prop->psoc_pci_pll_nr = 0; - prop->psoc_pci_pll_nf = 0; - prop->psoc_pci_pll_od = 0; - prop->psoc_pci_pll_div_factor = 0; + prop->psoc_timestamp_frequency = freq; + prop->psoc_pci_pll_nr = nr; + prop->psoc_pci_pll_nf = nf; + prop->psoc_pci_pll_od = od; + prop->psoc_pci_pll_div_factor = div_fctr; return 0; } diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index f2d91f4fcffe..a7ab2d7e57d4 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -105,13 +105,6 @@ #define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE) #define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE) -#define PLL_NR_OFFSET 0 -#define PLL_NF_OFFSET (mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR) -#define PLL_OD_OFFSET (mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR) -#define PLL_DIV_FACTOR_0_OFFSET (mmPSOC_CPU_PLL_DIV_FACTOR_0 - \ - mmPSOC_CPU_PLL_NR) -#define PLL_DIV_SEL_0_OFFSET (mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR) - #define NUM_OF_SOB_IN_BLOCK \ (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2) -- cgit 1.4.1 From 9c9013cbd8338ff8eac732d115c9005bc512cbc5 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 1 Dec 2020 10:39:54 +0200 Subject: habanalabs: preboot hard reset support FW hard reset capability indication is now moved to preboot stage. Driver will check if HW is dirty only after it validated preboot is up. If HW is dirty, driver will perform a hard reset according to the FW capability. In addition, FW defines a new message which driver need to send in order to initiate a hard reset. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 25 +++++++++++----------- drivers/misc/habanalabs/gaudi/gaudi.c | 17 +++++++++------ drivers/misc/habanalabs/goya/goya.c | 12 +++++------ .../misc/habanalabs/include/common/hl_boot_if.h | 2 ++ 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 0e1c629e9800..c970bfc6db66 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -627,7 +627,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, security_status = RREG32(cpu_security_boot_status_reg); /* We read security status multiple times during boot: - * 1. preboot - we check if fw security feature is supported + * 1. preboot - a. Check whether the security status bits are valid + * b. Check whether fw security is enabled + * c. Check whether hard reset is done by fw * 2. boot cpu - we get boot cpu security status * 3. FW application - we get FW application security status * @@ -637,13 +639,20 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, */ if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { hdev->asic_prop.fw_security_status_valid = 1; - prop->fw_security_disabled = - !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN); + + if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)) + prop->fw_security_disabled = true; + + if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + hdev->asic_prop.hard_reset_done_by_fw = true; } else { hdev->asic_prop.fw_security_status_valid = 0; prop->fw_security_disabled = true; } + dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", + hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_info(hdev->dev, "firmware-level security is %s\n", prop->fw_security_disabled ? "disabled" : "enabled"); @@ -797,18 +806,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, } /* Read FW application security bits */ - if (hdev->asic_prop.fw_security_status_valid) { + if (hdev->asic_prop.fw_security_status_valid) hdev->asic_prop.fw_app_security_map = RREG32(cpu_security_boot_status_reg); - if (hdev->asic_prop.fw_app_security_map & - CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - hdev->asic_prop.hard_reset_done_by_fw = true; - } - - dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", - hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); - dev_info(hdev->dev, "Successfully loaded firmware to device\n"); out: diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 278c4de98e22..e465c158eaeb 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -654,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev) if (rc) goto free_queue_props; - if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); - } - /* Before continuing in the initialization, we need to read the preboot * version to determine whether we run with a security-enabled firmware */ @@ -672,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev) goto pci_fini; } + if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->hw_fini(hdev, true); + } + return 0; pci_fini: @@ -3881,7 +3881,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) /* I don't know what is the state of the CPU so make sure it is * stopped in any means necessary */ - WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); + if (hdev->asic_prop.hard_reset_done_by_fw) + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV); + else + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index b66fd55accb5..d61177bf36a5 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev) if (rc) goto free_queue_props; - if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); - } - /* Before continuing in the initialization, we need to read the preboot * version to determine whether we run with a security-enabled firmware */ @@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev) goto pci_fini; } + if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->hw_fini(hdev, true); + } + if (!hdev->pldm) { val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index e5801ecf0cb2..755c4800f002 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -204,6 +204,8 @@ enum kmd_msg { KMD_MSG_GOTO_WFE, KMD_MSG_FIT_RDY, KMD_MSG_SKIP_BMC, + RESERVED, + KMD_MSG_RST_DEV, }; enum cpu_msg_status { -- cgit 1.4.1 From 72ab9ca52de6856380c26b2045aa826ae4308b76 Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Wed, 2 Dec 2020 19:55:30 +0200 Subject: habanalabs/gaudi: do not set EB in collective slave queues We don't need to set EB on signal packets from collective slave queues as it degrades performance. Because the slaves are the network queues, the engine barrier doesn't actually guarantee that the packet has been sent. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/hw_queue.c | 5 ++++- drivers/misc/habanalabs/gaudi/gaudi.c | 8 ++++---- drivers/misc/habanalabs/goya/goya.c | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 571eda6ef5ab..70b778a0d60e 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -944,7 +944,7 @@ struct hl_asic_funcs { u32 (*get_signal_cb_size)(struct hl_device *hdev); u32 (*get_wait_cb_size)(struct hl_device *hdev); u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id, - u32 size); + u32 size, bool eb); u32 (*gen_wait_cb)(struct hl_device *hdev, struct hl_gen_wait_properties *prop); void (*reset_sob)(struct hl_device *hdev, void *data); diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 7caf868d1585..76217258780a 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev, "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + /* we set an EB since we must make sure all oeprations are done + * when sending the signal + */ hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id, 0); + cs_cmpl->hw_sob->sob_id, 0, true); kref_get(&hw_sob->kref); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e465c158eaeb..65895ba075fe 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -361,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev); static void gaudi_disable_clock_gating(struct hl_device *hdev); static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size); + u32 size, bool eb); static u32 gaudi_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop); @@ -1064,7 +1064,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev, prop->collective_sob_id, queue_id); cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, - prop->collective_sob_id, cb_size); + prop->collective_sob_id, cb_size, false); } static void gaudi_collective_wait_init_cs(struct hl_cs *cs) @@ -7893,7 +7893,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) } static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size) + u32 size, bool eb) { struct hl_cb *cb = (struct hl_cb *) data; struct packet_msg_short *pkt; @@ -7910,7 +7910,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); - ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1); + ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d61177bf36a5..b8b4aa636b7c 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5339,7 +5339,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev) } static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size) + u32 size, bool eb) { return 0; } -- cgit 1.4.1 From 7a585dfc32110a106f70474c6fa822d912a92c7e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 30 Nov 2020 14:56:06 +0200 Subject: habanalabs: Revise comment to align with mirror list name hw_queues_mirror was renamed to cs_mirror, so revise accordingly a comment that refers to this list. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index beb482310a58..92c1c516b65f 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -562,7 +562,7 @@ void hl_cs_rollback_all(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) flush_workqueue(hdev->cq_wq[i]); - /* Make sure we don't have leftovers in the H/W queues mirror list */ + /* Make sure we don't have leftovers in the CS mirror list */ list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { cs_get(cs); cs->aborted = true; -- cgit 1.4.1 From 0024c094851f718ccb0b797255292bdce850a01f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 5 Dec 2020 22:55:09 +0200 Subject: habanalabs/gaudi: disable CGM at HW initialization In case the clock gating was enabled in preboot we need to disable it at the H/W initialization stage before touching the MME/TPC registers. Otherwise, the ASIC can get stuck. If the security is enabled in the firmware level, the CGM is always disabled and the driver can't enable it. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 14 +++++++++++--- drivers/misc/habanalabs/include/common/hl_boot_if.h | 5 +++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 65895ba075fe..f316b898e8e0 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -2403,8 +2403,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) gaudi_init_e2e(hdev); gaudi_init_hbm_cred(hdev); - hdev->asic_funcs->disable_clock_gating(hdev); - for (tpc_id = 0, tpc_offset = 0; tpc_id < TPC_NUMBER_OF_ENGINES; tpc_id++, tpc_offset += TPC_CFG_OFFSET) { @@ -3416,6 +3414,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev) if (hdev->in_debug) return; + if (!hdev->asic_prop.fw_security_disabled) + return; + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { enable = !!(hdev->clock_gating_mask & (BIT_ULL(gaudi_dma_assignment[i]))); @@ -3467,7 +3468,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev) u32 qman_offset; int i; - if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)) + if (!hdev->asic_prop.fw_security_disabled) return; for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { @@ -3801,6 +3802,13 @@ static int gaudi_hw_init(struct hl_device *hdev) return rc; } + /* In case the clock gating was enabled in preboot we need to disable + * it here before touching the MME/TPC registers. + * There is no need to take clk gating mutex because when this function + * runs, no other relevant code can run + */ + hdev->asic_funcs->disable_clock_gating(hdev); + /* SRAM scrambler must be initialized after CPU is running from HBM */ gaudi_init_scrambler_sram(hdev); diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 755c4800f002..7cb5f2d3e565 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -150,6 +150,10 @@ * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. + * FW initialized Clock Gating. + * Initialized in: preboot + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -171,6 +175,7 @@ #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9) #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10) #define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) enum cpu_boot_status { -- cgit 1.4.1 From 6bbb77b9e6f0bd5595724b7c0cb1189afdd133d3 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 6 Dec 2020 14:00:35 +0200 Subject: habanalabs: full FW hard reset support Driver must fetch FW hard reset capability at every FW boot stage: preboot, CPU boot, CPU application. If hard reset is triggered, driver will take into consideration only the last capability received. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 55 +++++++++++++++++++++------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index c970bfc6db66..20f77f58edef 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -629,32 +629,36 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, /* We read security status multiple times during boot: * 1. preboot - a. Check whether the security status bits are valid * b. Check whether fw security is enabled - * c. Check whether hard reset is done by fw - * 2. boot cpu - we get boot cpu security status - * 3. FW application - we get FW application security status + * c. Check whether hard reset is done by preboot + * 2. boot cpu - a. Fetch boot cpu security status + * b. Check whether hard reset is done by boot cpu + * 3. FW application - a. Fetch fw application security status + * b. Check whether hard reset is done by fw app * * Preboot: * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN) */ if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { - hdev->asic_prop.fw_security_status_valid = 1; + prop->fw_security_status_valid = 1; - if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)) + if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN) + prop->fw_security_disabled = false; + else prop->fw_security_disabled = true; if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - hdev->asic_prop.hard_reset_done_by_fw = true; + prop->hard_reset_done_by_fw = true; } else { - hdev->asic_prop.fw_security_status_valid = 0; + prop->fw_security_status_valid = 0; prop->fw_security_disabled = true; } - dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", - hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); dev_info(hdev->dev, "firmware-level security is %s\n", - prop->fw_security_disabled ? "disabled" : "enabled"); + prop->fw_security_disabled ? "disabled" : "enabled"); return 0; } @@ -664,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 cpu_security_boot_status_reg, u32 boot_err0_reg, bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout) { + struct asic_fixed_properties *prop = &hdev->asic_prop; u32 status; int rc; @@ -732,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, /* Read U-Boot version now in case we will later fail */ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); + /* Clear reset status since we need to read it again from boot CPU */ + prop->hard_reset_done_by_fw = false; + /* Read boot_cpu security bits */ - if (hdev->asic_prop.fw_security_status_valid) - hdev->asic_prop.fw_boot_cpu_security_map = + if (prop->fw_security_status_valid) { + prop->fw_boot_cpu_security_map = RREG32(cpu_security_boot_status_reg); + if (prop->fw_boot_cpu_security_map & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + prop->hard_reset_done_by_fw = true; + } + + dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); + if (rc) { detect_cpu_boot_status(hdev, status); rc = -EIO; @@ -805,11 +821,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } + /* Clear reset status since we need to read again from app */ + prop->hard_reset_done_by_fw = false; + /* Read FW application security bits */ - if (hdev->asic_prop.fw_security_status_valid) - hdev->asic_prop.fw_app_security_map = + if (prop->fw_security_status_valid) { + prop->fw_app_security_map = RREG32(cpu_security_boot_status_reg); + if (prop->fw_app_security_map & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + prop->hard_reset_done_by_fw = true; + } + + dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_info(hdev->dev, "Successfully loaded firmware to device\n"); out: -- cgit 1.4.1 From 13d0ee10b55ecec01fd3c91e086e4f3ba75a7911 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 6 Dec 2020 23:48:45 +0200 Subject: habanalabs/gaudi: enhance reset message Print the initiator who performs the hard-reset for easier debugging. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f316b898e8e0..b0528883a995 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3936,11 +3936,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); - } - dev_info(hdev->dev, - "Issued HARD reset command, going to wait %dms\n", - reset_timeout_ms); + dev_info(hdev->dev, + "Issued HARD reset command, going to wait %dms\n", + reset_timeout_ms); + } else { + dev_info(hdev->dev, + "Firmware performs HARD reset, going to wait %dms\n", + reset_timeout_ms); + } /* * After hard reset, we can't poll the BTM_FSM register because the PSOC -- cgit 1.4.1 From 90ffe170a390d5a620f8fe66758514e369e85d24 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 8 Dec 2020 16:52:48 +0200 Subject: habanalabs: update comment in hl_boot_if.h Hard-reset flag is updated in many stages of the boot sequence of the firmware. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/common/hl_boot_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 7cb5f2d3e565..b637dfd69f6e 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -145,7 +145,7 @@ * implemented. This means that FW will * perform hard reset procedure on * receiving the halt-machine event. - * Initialized in: linux + * Initialized in: preboot, u-boot, linux * * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. * Initialized in: linux -- cgit 1.4.1 From 377182a3cc5ae6cc17fb04d06864c975f9f71c18 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 9 Dec 2020 19:50:46 +0200 Subject: habanalabs: adjust pci controller init to new firmware When the firmware security is enabled, the pcie_aux_dbi_reg_addr register in the PCI controller is blocked. Therefore, ignore the result of writing to this register and assume it worked. Also remove the prints on errors in the internal ELBI write function. If the security is enabled, the firmware is responsible for setting this register correctly so we won't have any problem. If the security is disabled, the write will work (unless something is totally broken at the PCI level and then the whole sequence will fail). In addition, remove a write to register pcie_aux_dbi_reg_addr+4, which was never actually needed. Moreover, PCIE_DBI registers are blocked to access from host when firmware security is enabled. Use a different register to flush the writes. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/pci.c | 28 +++++++++++++++---------- drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++-- drivers/misc/habanalabs/gaudi/gaudi_coresight.c | 3 ++- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c index 923b2606e29f..b4725e6101f6 100644 --- a/drivers/misc/habanalabs/common/pci.c +++ b/drivers/misc/habanalabs/common/pci.c @@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) return 0; - if (val & PCI_CONFIG_ELBI_STS_ERR) { - dev_err(hdev->dev, "Error writing to ELBI\n"); + if (val & PCI_CONFIG_ELBI_STS_ERR) return -EIO; - } if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { dev_err(hdev->dev, "ELBI write didn't finish in time\n"); @@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) dbi_offset = addr & 0xFFF; - rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); - rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, + /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); + + rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, data); if (rc) @@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); if (rc) dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", @@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, /* Enable */ rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); return rc; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b0528883a995..88d0e4356d59 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3761,7 +3761,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) static void gaudi_pre_hw_init(struct hl_device *hdev) { /* Perform read from the device to make sure device is up */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); if (hdev->asic_prop.fw_security_disabled) { /* Set the access through PCI bars (Linux driver only) as @@ -3847,7 +3847,7 @@ static int gaudi_hw_init(struct hl_device *hdev) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 2e3612e1ee28..88a09d42e111 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -9,6 +9,7 @@ #include "../include/gaudi/gaudi_coresight.h" #include "../include/gaudi/asic_reg/gaudi_regs.h" #include "../include/gaudi/gaudi_masks.h" +#include "../include/gaudi/gaudi_reg_map.h" #include #define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET @@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return rc; } -- cgit 1.4.1 From 98e8781f008372057bd5cb059ca6b507371e473d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 9 Dec 2020 23:07:58 +0200 Subject: habanalabs/gaudi: retry loading TPC f/w on -EINTR If loading the firmware file for the TPC f/w was interrupted, try to do it again, up to 5 times. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 88d0e4356d59..8c09e4466af8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -838,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev) size_t fw_size; void *cpu_addr; dma_addr_t dma_handle; - int rc; + int rc, count = 5; +again: rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); + if (rc == -EINTR && count-- > 0) { + msleep(50); + goto again; + } + if (rc) { - dev_err(hdev->dev, "Firmware file %s is not found!\n", + dev_err(hdev->dev, "Failed to load firmware file %s\n", GAUDI_TPC_FW_FILE); goto out; } -- cgit 1.4.1 From a3fd28306329e8e82efab973aafe81e9001dcf6f Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Tue, 8 Dec 2020 16:14:01 +0200 Subject: habanalabs: add validation cs counter, fix misplaced counters Up until now validation errors were counted in the parsing field of the cs_counters struct, so we added a new counter and increased it when needed. In addition, there were some locations where only one of the counters was updated (ctx or aggregate) so add the second one to be updated as well. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 75 ++++++++++++++++------ drivers/misc/habanalabs/common/habanalabs.h | 2 + drivers/misc/habanalabs/common/habanalabs_ioctl.c | 5 ++ include/uapi/misc/habanalabs.h | 4 ++ 4 files changed, 68 insertions(+), 18 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 92c1c516b65f..b2b3d2b0f808 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; cs = kzalloc(sizeof(*cs), GFP_ATOMIC); - if (!cs) + if (!cs) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); return -ENOMEM; + } cs->ctx = ctx; cs->submitted = false; @@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); if (!cs_cmpl) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); rc = -ENOMEM; goto free_cs; } @@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); if (!cs->jobs_in_queue_cnt) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); rc = -ENOMEM; goto free_fence; } @@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) static int hl_cs_copy_chunk_array(struct hl_device *hdev, struct hl_cs_chunk **cs_chunk_array, - void __user *chunks, u32 num_chunks) + void __user *chunks, u32 num_chunks, + struct hl_ctx *ctx) { u32 size_to_copy; if (num_chunks > HL_MAX_JOBS_PER_CS) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Number of chunks can NOT be larger than %d\n", HL_MAX_JOBS_PER_CS); @@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), GFP_ATOMIC); - if (!*cs_chunk_array) + if (!*cs_chunk_array) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); return -ENOMEM; + } size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); kfree(*cs_chunk_array); return -EFAULT; @@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, struct hl_device *hdev = hpriv->hdev; struct hl_cs_chunk *cs_chunk_array; struct hl_cs_counters_atomic *cntr; + struct hl_ctx *ctx = hpriv->ctx; struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; @@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, cntr = &hdev->aggregated_cs_counters; *cs_seq = ULLONG_MAX; - rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, + hpriv->ctx); if (rc) goto out; @@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = validate_queue_index(hdev, chunk, &queue_type, &is_kernel_allocated_cb); if (rc) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); goto free_cs_object; } @@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { atomic64_inc( - &hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + &ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); rc = -EINVAL; goto free_cs_object; } @@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, job = hl_cs_allocate_job(hdev, queue_type, is_kernel_allocated_cb); if (!job) { - atomic64_inc( - &hpriv->ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; @@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = cs_parser(hpriv, job); if (rc) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); atomic64_inc(&cntr->parsing_drop_cnt); dev_err(hdev->dev, "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", @@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, } if (int_queues_only) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); @@ -1042,7 +1058,7 @@ out: } static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, - struct hl_cs_chunk *chunk, u64 *signal_seq) + struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx) { u64 *signal_seq_arr = NULL; u32 size_to_copy, signal_seq_arr_len; @@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, /* currently only one signal seq is supported */ if (signal_seq_arr_len != 1) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Wait for signal CS supports only one signal CS seq\n"); return -EINVAL; @@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, signal_seq_arr = kmalloc_array(signal_seq_arr_len, sizeof(*signal_seq_arr), GFP_ATOMIC); - if (!signal_seq_arr) + if (!signal_seq_arr) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); return -ENOMEM; + } size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr); if (copy_from_user(signal_seq_arr, u64_to_user_ptr(chunk->signal_seq_arr), size_to_copy)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Failed to copy signal seq array from user\n"); rc = -EFAULT; @@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, struct hl_device *hdev = hpriv->hdev; struct hl_cs_compl *sig_waitcs_cmpl; u32 q_idx, collective_engine_id = 0; + struct hl_cs_counters_atomic *cntr; struct hl_fence *sig_fence = NULL; struct hl_ctx *ctx = hpriv->ctx; enum hl_queue_type q_type; @@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, u64 signal_seq; int rc; + cntr = &hdev->aggregated_cs_counters; *cs_seq = ULLONG_MAX; - rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, + ctx); if (rc) goto out; @@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, chunk = &cs_chunk_array[0]; if (chunk->queue_index >= hdev->asic_prop.max_queues) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d is invalid\n", chunk->queue_index); rc = -EINVAL; @@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, q_type = hw_queue_prop->type; if (!hw_queue_prop->supports_sync_stream) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d does not support sync stream operations\n", q_idx); @@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); rc = -EINVAL; @@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) { - rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq); + rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx); if (rc) goto free_cs_chunk_array; sig_fence = hl_ctx_get_fence(ctx, signal_seq); if (IS_ERR(sig_fence)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Failed to get signal CS with seq 0x%llx\n", signal_seq); @@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, container_of(sig_fence, struct hl_cs_compl, base_fence); if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "CS seq 0x%llx is not of a signal CS\n", signal_seq); @@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, cs, q_idx, collective_engine_id); - else + else { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); rc = -EINVAL; + } if (rc) goto free_cs_object; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 70b778a0d60e..e0d7f5fbaa5c 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1000,6 +1000,7 @@ struct hl_va_range { * @queue_full_drop_cnt: dropped due to queue full * @device_in_reset_drop_cnt: dropped due to device in reset * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight + * @validation_drop_cnt: dropped due to error in validation */ struct hl_cs_counters_atomic { atomic64_t out_of_mem_drop_cnt; @@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic { atomic64_t queue_full_drop_cnt; atomic64_t device_in_reset_drop_cnt; atomic64_t max_cs_in_flight_drop_cnt; + atomic64_t validation_drop_cnt; }; /** diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index a0c0d20f6f8f..12efbd9d2e3a 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) atomic64_read(&cntr->device_in_reset_drop_cnt); cs_counters.total_max_cs_in_flight_drop_cnt = atomic64_read(&cntr->max_cs_in_flight_drop_cnt); + cs_counters.total_validation_drop_cnt = + atomic64_read(&cntr->validation_drop_cnt); if (hpriv->ctx) { cs_counters.ctx_out_of_mem_drop_cnt = @@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) cs_counters.ctx_max_cs_in_flight_drop_cnt = atomic64_read( &hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt); + cs_counters.ctx_validation_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.validation_drop_cnt); } return copy_to_user(out, &cs_counters, diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index dc8bcec195cc..dba3827c43ca 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -426,6 +426,8 @@ struct hl_info_sync_manager { * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight + * @total_validation_drop_cnt: total dropped due to validation error + * @ctx_validation_drop_cnt: context dropped due to validation error */ struct hl_info_cs_counters { __u64 total_out_of_mem_drop_cnt; @@ -438,6 +440,8 @@ struct hl_info_cs_counters { __u64 ctx_device_in_reset_drop_cnt; __u64 total_max_cs_in_flight_drop_cnt; __u64 ctx_max_cs_in_flight_drop_cnt; + __u64 total_validation_drop_cnt; + __u64 ctx_validation_drop_cnt; }; enum gaudi_dcores { -- cgit 1.4.1 From fcaebc7354188b0d708c79df4390fbabd4d9799d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 14 Dec 2020 12:52:06 +0200 Subject: habanalabs: register to pci shutdown callback We need to make sure our device is idle when rebooting a virtual machine. This is done in the driver level. The firmware will later handle FLR but we want to be extra safe and stop the devices until the FLR is handled. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 6bbb6bca6860..032d114f01ea 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = { .id_table = ids, .probe = hl_pci_probe, .remove = hl_pci_remove, + .shutdown = hl_pci_remove, .driver.pm = &hl_pm_ops, .err_handler = &hl_pci_err_handler, }; -- cgit 1.4.1 From 097c62b6f0ec2bdadf86afbe80df03856338724d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 22 Dec 2020 15:21:07 +0200 Subject: habanalabs: fix order of status check When the device is in reset or needs to be reset, the disabled property is don't-care. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 5871162a8442..0749c92cbcf6 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; - if (hdev->disabled) - status = HL_DEVICE_STATUS_MALFUNCTION; - else if (atomic_read(&hdev->in_reset)) + if (atomic_read(&hdev->in_reset)) status = HL_DEVICE_STATUS_IN_RESET; else if (hdev->needs_reset) status = HL_DEVICE_STATUS_NEEDS_RESET; + else if (hdev->disabled) + status = HL_DEVICE_STATUS_MALFUNCTION; else status = HL_DEVICE_STATUS_OPERATIONAL; -- cgit 1.4.1 From 95cd4bca7b1f4a25810f3ddfc5e767fb46931789 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 27 Dec 2020 12:33:44 +0100 Subject: netfilter: nft_dynset: report EOPNOTSUPP on missing set feature If userspace requests a feature which is not available the original set definition, then bail out with EOPNOTSUPP. If userspace sends unsupported dynset flags (new feature not supported by this kernel), then report EOPNOTSUPP to userspace. EINVAL should be only used to report malformed netlink messages from userspace. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 983a1d5ca3ab..f35df221a633 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -177,7 +177,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); if (flags & ~NFT_DYNSET_F_INV) - return -EINVAL; + return -EOPNOTSUPP; if (flags & NFT_DYNSET_F_INV) priv->invert = true; } @@ -210,7 +210,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) - return -EINVAL; + return -EOPNOTSUPP; err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); if (err) @@ -224,7 +224,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { if (!(set->flags & NFT_SET_MAP)) - return -EINVAL; + return -EOPNOTSUPP; if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; -- cgit 1.4.1 From b4e70d8dd9ea6bd5d5fb3122586f652326ca09cd Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 27 Dec 2020 12:35:43 +0100 Subject: netfilter: nftables: add set expression flags The set flag NFT_SET_EXPR provides a hint to the kernel that userspace supports for multiple expressions per set element. In the same direction, NFT_DYNSET_F_EXPR specifies that dynset expression defines multiple expressions per set element. This allows new userspace software with old kernels to bail out with EOPNOTSUPP. This update is similar to ef516e8625dd ("netfilter: nf_tables: reintroduce the NFT_SET_CONCAT flag"). The NFT_SET_EXPR flag needs to be set on when the NFTA_SET_EXPRESSIONS attribute is specified. The NFT_SET_EXPR flag is not set on with NFTA_SET_EXPR to retain backward compatibility in old userspace binaries. Fixes: 48b0ae046ee9 ("netfilter: nftables: netlink support for several set element expressions") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 3 +++ net/netfilter/nf_tables_api.c | 6 +++++- net/netfilter/nft_dynset.c | 9 +++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 28b6ee53305f..b1633e7ba529 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -293,6 +293,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_EVAL: set can be updated from the evaluation path * @NFT_SET_OBJECT: set contains stateful objects * @NFT_SET_CONCAT: set contains a concatenation + * @NFT_SET_EXPR: set contains expressions */ enum nft_set_flags { NFT_SET_ANONYMOUS = 0x1, @@ -303,6 +304,7 @@ enum nft_set_flags { NFT_SET_EVAL = 0x20, NFT_SET_OBJECT = 0x40, NFT_SET_CONCAT = 0x80, + NFT_SET_EXPR = 0x100, }; /** @@ -706,6 +708,7 @@ enum nft_dynset_ops { enum nft_dynset_flags { NFT_DYNSET_F_INV = (1 << 0), + NFT_DYNSET_F_EXPR = (1 << 1), }; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4186b1e52d58..15c467f1a9dd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4162,7 +4162,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | NFT_SET_INTERVAL | NFT_SET_TIMEOUT | NFT_SET_MAP | NFT_SET_EVAL | - NFT_SET_OBJECT | NFT_SET_CONCAT)) + NFT_SET_OBJECT | NFT_SET_CONCAT | NFT_SET_EXPR)) return -EOPNOTSUPP; /* Only one of these operations is supported */ if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == @@ -4304,6 +4304,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nlattr *tmp; int left; + if (!(flags & NFT_SET_EXPR)) { + err = -EINVAL; + goto err_set_alloc_name; + } i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index f35df221a633..0b053f75cd60 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -19,6 +19,7 @@ struct nft_dynset { enum nft_registers sreg_key:8; enum nft_registers sreg_data:8; bool invert; + bool expr; u8 num_exprs; u64 timeout; struct nft_expr *expr_array[NFT_SET_EXPR_MAX]; @@ -175,11 +176,12 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); - - if (flags & ~NFT_DYNSET_F_INV) + if (flags & ~(NFT_DYNSET_F_INV | NFT_DYNSET_F_EXPR)) return -EOPNOTSUPP; if (flags & NFT_DYNSET_F_INV) priv->invert = true; + if (flags & NFT_DYNSET_F_EXPR) + priv->expr = true; } set = nft_set_lookup_global(ctx->net, ctx->table, @@ -261,6 +263,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx, struct nlattr *tmp; int left; + if (!priv->expr) + return -EINVAL; + i = 0; nla_for_each_nested(tmp, tb[NFTA_DYNSET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { -- cgit 1.4.1 From 2ca408d9c749c32288bc28725f9f12ba30299e8f Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 30 Nov 2020 17:30:59 -0500 Subject: fanotify: Fix sys_fanotify_mark() on native x86-32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") converted native x86-32 which take 64-bit arguments to use the compat handlers to allow conversion to passing args via pt_regs. sys_fanotify_mark() was however missed, as it has a general compat handler. Add a config option that will use the syscall wrapper that takes the split args for native 32-bit. [ bp: Fix typo in Kconfig help text. ] Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") Reported-by: Paweł Jasiak Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Acked-by: Jan Kara Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20201130223059.101286-1-brgerst@gmail.com --- arch/Kconfig | 6 ++++++ arch/x86/Kconfig | 1 + fs/notify/fanotify/fanotify_user.c | 17 +++++++---------- include/linux/syscalls.h | 24 ++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 78c6f05b10f9..24862d15f3a3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID config ARCH_SUPPORTS_DEBUG_PAGEALLOC bool +config ARCH_SPLIT_ARG64 + bool + help + If a 32-bit architecture requires 64-bit arguments to be split into + pairs of 32-bit arguments, select this option. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7b6dd10b162a..21f851179ff0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -19,6 +19,7 @@ config X86_32 select KMAP_LOCAL select MODULES_USE_ELF_REL select OLD_SIGACTION + select ARCH_SPLIT_ARG64 config X86_64 def_bool y diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3e01d8f2ab90..dcab112e1f00 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1285,26 +1285,23 @@ fput_and_out: return ret; } +#ifndef CONFIG_ARCH_SPLIT_ARG64 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, __u64, mask, int, dfd, const char __user *, pathname) { return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); } +#endif -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(fanotify_mark, +#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) +SYSCALL32_DEFINE6(fanotify_mark, int, fanotify_fd, unsigned int, flags, - __u32, mask0, __u32, mask1, int, dfd, + SC_ARG64(mask), int, dfd, const char __user *, pathname) { - return do_fanotify_mark(fanotify_fd, flags, -#ifdef __BIG_ENDIAN - ((__u64)mask0 << 32) | mask1, -#else - ((__u64)mask1 << 32) | mask0, -#endif - dfd, pathname); + return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), + dfd, pathname); } #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f3929aff39cf..7688bc983de5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ +/* For split 64-bit arguments on 32-bit architectures */ +#ifdef __LITTLE_ENDIAN +#define SC_ARG64(name) u32, name##_lo, u32, name##_hi +#else +#define SC_ARG64(name) u32, name##_hi, u32, name##_lo +#endif +#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) + +#ifdef CONFIG_COMPAT +#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 +#else +#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6 +#endif + /* * Called before coming back to user-mode. Returning to user-mode with an * address limit different than USER_DS can allow to overwrite kernel memory. -- cgit 1.4.1 From 512d4a26abdbd11c6ffa03032740e5ab3c62c55b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: interconnect: qcom: fix rpmh link failures When CONFIG_COMPILE_TEST is set, it is possible to build some of the interconnect drivers into the kernel while their dependencies are loadable modules, which is bad: arm-linux-gnueabi-ld: drivers/interconnect/qcom/bcm-voter.o: in function `qcom_icc_bcm_voter_commit': (.text+0x1f8): undefined reference to `rpmh_invalidate' arm-linux-gnueabi-ld: (.text+0x20c): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: (.text+0x2b0): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: (.text+0x2e8): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: drivers/interconnect/qcom/icc-rpmh.o: in function `qcom_icc_bcm_init': (.text+0x2ac): undefined reference to `cmd_db_read_addr' arm-linux-gnueabi-ld: (.text+0x2c8): undefined reference to `cmd_db_read_aux_data' The exact dependencies are a bit complicated, so split them out into a hidden Kconfig symbol that all drivers can in turn depend on to get it right. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Arnd Bergmann Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201204165030.3747484-1-arnd@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/Kconfig | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig index a8f93ba265f8..b3fb5b02bcf1 100644 --- a/drivers/interconnect/qcom/Kconfig +++ b/drivers/interconnect/qcom/Kconfig @@ -42,13 +42,23 @@ config INTERCONNECT_QCOM_QCS404 This is a driver for the Qualcomm Network-on-Chip on qcs404-based platforms. +config INTERCONNECT_QCOM_RPMH_POSSIBLE + tristate + default INTERCONNECT_QCOM + depends on QCOM_RPMH || (COMPILE_TEST && !QCOM_RPMH) + depends on QCOM_COMMAND_DB || (COMPILE_TEST && !QCOM_COMMAND_DB) + depends on OF || COMPILE_TEST + help + Compile-testing RPMH drivers is possible on other platforms, + but in order to avoid link failures, drivers must not be built-in + when QCOM_RPMH or QCOM_COMMAND_DB are loadable modules + config INTERCONNECT_QCOM_RPMH tristate config INTERCONNECT_QCOM_SC7180 tristate "Qualcomm SC7180 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -57,8 +67,7 @@ config INTERCONNECT_QCOM_SC7180 config INTERCONNECT_QCOM_SDM845 tristate "Qualcomm SDM845 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -67,8 +76,7 @@ config INTERCONNECT_QCOM_SDM845 config INTERCONNECT_QCOM_SM8150 tristate "Qualcomm SM8150 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -77,8 +85,7 @@ config INTERCONNECT_QCOM_SM8150 config INTERCONNECT_QCOM_SM8250 tristate "Qualcomm SM8250 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help -- cgit 1.4.1 From c6174c0e058fc0a54e0b9787c44cb24b0a8d0217 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: interconnect: imx: Add a missing of_node_put after of_device_is_available Add an 'of_node_put()' call when a tested device node is not available. Fixes: f0d8048525d7 ("interconnect: Add imx core driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201206121304.29381-1-christophe.jaillet@wanadoo.fr Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index 41dba7090c2a..e398ebf1dbba 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -99,6 +99,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider, if (!dn || !of_device_is_available(dn)) { dev_warn(dev, "Missing property %s, skip scaling %s\n", adj->phandle_name, node->name); + of_node_put(dn); return 0; } -- cgit 1.4.1 From 6414b79d02c426b7dd7d942fc19fb38220ea44ec Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: interconnect: imx: Remove a useless test 'dn' can't be NULL here, it is tested just the line above. Remove this useless test. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201206121322.29434-1-christophe.jaillet@wanadoo.fr Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index e398ebf1dbba..c770951a909c 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -96,7 +96,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider, return -ENODEV; } /* Allow scaling to be disabled on a per-node basis */ - if (!dn || !of_device_is_available(dn)) { + if (!of_device_is_available(dn)) { dev_warn(dev, "Missing property %s, skip scaling %s\n", adj->phandle_name, node->name); of_node_put(dn); -- cgit 1.4.1 From 67288f74d4837b82ef937170da3389b0779c17be Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: interconnect: imx8mq: Use icc_sync_state Add the icc_sync_state callback to notify the framework when consumers are probed and the bandwidth doesn't have to be kept at maximum anymore. Signed-off-by: Martin Kepplinger Suggested-by: Georgi Djakov Fixes: 7d3b0b0d8184 ("interconnect: qcom: Use icc_sync_state") Link: https://lore.kernel.org/r/20201210100906.18205-6-martin.kepplinger@puri.sm Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx8mq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/interconnect/imx/imx8mq.c b/drivers/interconnect/imx/imx8mq.c index ba43a15aefec..d7768d3c6d8a 100644 --- a/drivers/interconnect/imx/imx8mq.c +++ b/drivers/interconnect/imx/imx8mq.c @@ -7,6 +7,7 @@ #include #include +#include #include #include "imx.h" @@ -94,6 +95,7 @@ static struct platform_driver imx8mq_icc_driver = { .remove = imx8mq_icc_remove, .driver = { .name = "imx8mq-interconnect", + .sync_state = icc_sync_state, }, }; -- cgit 1.4.1 From 12b38ea040b3bb2a30eb9cd488376df5be7ea81f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 13 Dec 2020 16:11:05 +0100 Subject: staging: spmi: hisi-spmi-controller: Fix some error handling paths IN the probe function, if an error occurs after calling 'spmi_controller_alloc()', it must be undone by a corresponding 'spmi_controller_put() call. In the remove function, use 'spmi_controller_put(ctrl)' instead of 'kfree(ctrl)'. While a it fix an error message (s/spmi_add_controller/spmi_controller_add/) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201213151105.137731-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/staging/hikey9xx/hisi-spmi-controller.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/staging/hikey9xx/hisi-spmi-controller.c b/drivers/staging/hikey9xx/hisi-spmi-controller.c index 861aedd5de48..0d42bc65f39b 100644 --- a/drivers/staging/hikey9xx/hisi-spmi-controller.c +++ b/drivers/staging/hikey9xx/hisi-spmi-controller.c @@ -278,21 +278,24 @@ static int spmi_controller_probe(struct platform_device *pdev) iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iores) { dev_err(&pdev->dev, "can not get resource!\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_controller; } spmi_controller->base = devm_ioremap(&pdev->dev, iores->start, resource_size(iores)); if (!spmi_controller->base) { dev_err(&pdev->dev, "can not remap base addr!\n"); - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + goto err_put_controller; } ret = of_property_read_u32(pdev->dev.of_node, "spmi-channel", &spmi_controller->channel); if (ret) { dev_err(&pdev->dev, "can not get channel\n"); - return -ENODEV; + ret = -ENODEV; + goto err_put_controller; } platform_set_drvdata(pdev, spmi_controller); @@ -309,9 +312,15 @@ static int spmi_controller_probe(struct platform_device *pdev) ctrl->write_cmd = spmi_write_cmd; ret = spmi_controller_add(ctrl); - if (ret) - dev_err(&pdev->dev, "spmi_add_controller failed with error %d!\n", ret); + if (ret) { + dev_err(&pdev->dev, "spmi_controller_add failed with error %d!\n", ret); + goto err_put_controller; + } + + return 0; +err_put_controller: + spmi_controller_put(ctrl); return ret; } @@ -320,7 +329,7 @@ static int spmi_del_controller(struct platform_device *pdev) struct spmi_controller *ctrl = platform_get_drvdata(pdev); spmi_controller_remove(ctrl); - kfree(ctrl); + spmi_controller_put(ctrl); return 0; } -- cgit 1.4.1 From cab36da4bf1a35739b091b73714a39a1bbd02b05 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Dec 2020 09:43:49 +0300 Subject: Staging: comedi: Return -EFAULT if copy_to_user() fails Return -EFAULT on error instead of the number of bytes remaining to be copied. Fixes: bac42fb21259 ("comedi: get rid of compat_alloc_user_space() mess in COMEDI_CMD{,TEST} compat") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/X8c3pfwFy2jpy4BP@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index d99231c737fb..80d74cce2a01 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2987,7 +2987,9 @@ static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32, v32.chanlist_len = cmd->chanlist_len; v32.data = ptr_to_compat(cmd->data); v32.data_len = cmd->data_len; - return copy_to_user(cmd32, &v32, sizeof(v32)); + if (copy_to_user(cmd32, &v32, sizeof(v32))) + return -EFAULT; + return 0; } /* Handle 32-bit COMEDI_CMD ioctl. */ -- cgit 1.4.1 From d887d6104adeb94d1b926936ea21f07367f0ff9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 13 Dec 2020 16:35:13 +0100 Subject: staging: mt7621-dma: Fix a resource leak in an error handling path If an error occurs after calling 'mtk_hsdma_init()', it must be undone by a corresponding call to 'mtk_hsdma_uninit()' as already done in the remove function. Fixes: 0853c7a53eb3 ("staging: mt7621-dma: ralink: add rt2880 dma engine") Signed-off-by: Christophe JAILLET Cc: stable Link: https://lore.kernel.org/r/20201213153513.138723-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-dma/mtk-hsdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c index d241349214e7..bc4bb4374313 100644 --- a/drivers/staging/mt7621-dma/mtk-hsdma.c +++ b/drivers/staging/mt7621-dma/mtk-hsdma.c @@ -712,7 +712,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) ret = dma_async_device_register(dd); if (ret) { dev_err(&pdev->dev, "failed to register dma device\n"); - return ret; + goto err_uninit_hsdma; } ret = of_dma_controller_register(pdev->dev.of_node, @@ -728,6 +728,8 @@ static int mtk_hsdma_probe(struct platform_device *pdev) err_unregister: dma_async_device_unregister(dd); +err_uninit_hsdma: + mtk_hsdma_uninit(hsdma); return ret; } -- cgit 1.4.1 From 0ffc76539e6e8d28114f95ac25c167c37b5191b3 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 27 Dec 2020 13:45:02 +0000 Subject: USB: cdc-acm: blacklist another IR Droid device This device is supported by the IR Toy driver. Reported-by: Georgi Bakalski Signed-off-by: Sean Young Acked-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20201227134502.4548-2-sean@mess.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f52f1bc0559f..781905745812 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1895,6 +1895,10 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x04d8, 0xfd08), .driver_info = IGNORE_DEVICE, }, + + { USB_DEVICE(0x04d8, 0xf58b), + .driver_info = IGNORE_DEVICE, + }, #endif /*Samsung phone in firmware update mode */ -- cgit 1.4.1 From 421da9413a6a5ec4334cade5092370cf2c8c8add Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 18 Dec 2020 12:57:36 +0200 Subject: MAINTAINERS: Update address for Cadence USB3 driver Updates my email address for Cadence USB3 driver. Signed-off-by: Roger Quadros Link: https://lore.kernel.org/r/20201218105736.17667-1-rogerq@ti.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..afe3a5c66bc9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3883,7 +3883,7 @@ F: drivers/mtd/nand/raw/cadence-nand-controller.c CADENCE USB3 DRD IP DRIVER M: Peter Chen M: Pawel Laszczak -M: Roger Quadros +R: Roger Quadros R: Aswath Govindraju L: linux-usb@vger.kernel.org S: Maintained -- cgit 1.4.1 From 88ebce92806e5dff3549e1a8cacb53978104d3b4 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Tue, 15 Dec 2020 09:55:49 +0530 Subject: dt-bindings: usb: Add new compatible string for AM64 SoC Add compatible string in j721e-usb binding file as the same USB subsystem is present in AM64. Reviewed-by: Rob Herring Signed-off-by: Aswath Govindraju Link: https://lore.kernel.org/r/20201215042549.7956-1-a-govindraju@ti.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index 388245b91a55..148b3fb4ceaf 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -11,8 +11,12 @@ maintainers: properties: compatible: - items: + oneOf: - const: ti,j721e-usb + - const: ti,am64-usb + - items: + - const: ti,j721e-usb + - const: ti,am64-usb reg: description: module registers -- cgit 1.4.1 From a390bef7db1f192cc5b588dbcf8ed113406ec130 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 10 Dec 2020 18:04:13 -0300 Subject: usb: gadget: fsl_mxc_udc: Remove the driver Since 5.10-rc1 i.MX is a devicetree-only platform, and this driver was only used by the old non-DT i.MX devices. Remove the driver as it has no users left. Signed-off-by: Fabio Estevam Acked-by: Peter Chen Link: https://lore.kernel.org/r/20201210210413.15262-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/Kconfig | 2 +- drivers/usb/gadget/udc/Makefile | 1 - drivers/usb/gadget/udc/fsl_mxc_udc.c | 122 ----------------------------------- 3 files changed, 1 insertion(+), 124 deletions(-) delete mode 100644 drivers/usb/gadget/udc/fsl_mxc_udc.c diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 1a12aab208b4..8c614bb86c66 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -90,7 +90,7 @@ config USB_BCM63XX_UDC config USB_FSL_USB2 tristate "Freescale Highspeed USB DR Peripheral Controller" - depends on FSL_SOC || ARCH_MXC + depends on FSL_SOC help Some of Freescale PowerPC and i.MX processors have a High Speed Dual-Role(DR) USB controller, which supports device mode. diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile index f5a7ce28aecd..a21f2224e7eb 100644 --- a/drivers/usb/gadget/udc/Makefile +++ b/drivers/usb/gadget/udc/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_USB_ATMEL_USBA) += atmel_usba_udc.o obj-$(CONFIG_USB_BCM63XX_UDC) += bcm63xx_udc.o obj-$(CONFIG_USB_FSL_USB2) += fsl_usb2_udc.o fsl_usb2_udc-y := fsl_udc_core.o -fsl_usb2_udc-$(CONFIG_ARCH_MXC) += fsl_mxc_udc.o obj-$(CONFIG_USB_TEGRA_XUDC) += tegra-xudc.o obj-$(CONFIG_USB_M66592) += m66592-udc.o obj-$(CONFIG_USB_R8A66597) += r8a66597-udc.o diff --git a/drivers/usb/gadget/udc/fsl_mxc_udc.c b/drivers/usb/gadget/udc/fsl_mxc_udc.c deleted file mode 100644 index 5a321992decc..000000000000 --- a/drivers/usb/gadget/udc/fsl_mxc_udc.c +++ /dev/null @@ -1,122 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2009 - * Guennadi Liakhovetski, DENX Software Engineering, - * - * Description: - * Helper routines for i.MX3x SoCs from Freescale, needed by the fsl_usb2_udc.c - * driver to function correctly on these systems. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "fsl_usb2_udc.h" - -static struct clk *mxc_ahb_clk; -static struct clk *mxc_per_clk; -static struct clk *mxc_ipg_clk; - -/* workaround ENGcm09152 for i.MX35 */ -#define MX35_USBPHYCTRL_OFFSET 0x600 -#define USBPHYCTRL_OTGBASE_OFFSET 0x8 -#define USBPHYCTRL_EVDO (1 << 23) - -int fsl_udc_clk_init(struct platform_device *pdev) -{ - struct fsl_usb2_platform_data *pdata; - unsigned long freq; - int ret; - - pdata = dev_get_platdata(&pdev->dev); - - mxc_ipg_clk = devm_clk_get(&pdev->dev, "ipg"); - if (IS_ERR(mxc_ipg_clk)) { - dev_err(&pdev->dev, "clk_get(\"ipg\") failed\n"); - return PTR_ERR(mxc_ipg_clk); - } - - mxc_ahb_clk = devm_clk_get(&pdev->dev, "ahb"); - if (IS_ERR(mxc_ahb_clk)) { - dev_err(&pdev->dev, "clk_get(\"ahb\") failed\n"); - return PTR_ERR(mxc_ahb_clk); - } - - mxc_per_clk = devm_clk_get(&pdev->dev, "per"); - if (IS_ERR(mxc_per_clk)) { - dev_err(&pdev->dev, "clk_get(\"per\") failed\n"); - return PTR_ERR(mxc_per_clk); - } - - clk_prepare_enable(mxc_ipg_clk); - clk_prepare_enable(mxc_ahb_clk); - clk_prepare_enable(mxc_per_clk); - - /* make sure USB_CLK is running at 60 MHz +/- 1000 Hz */ - if (!strcmp(pdev->id_entry->name, "imx-udc-mx27")) { - freq = clk_get_rate(mxc_per_clk); - if (pdata->phy_mode != FSL_USB2_PHY_ULPI && - (freq < 59999000 || freq > 60001000)) { - dev_err(&pdev->dev, "USB_CLK=%lu, should be 60MHz\n", freq); - ret = -EINVAL; - goto eclkrate; - } - } - - return 0; - -eclkrate: - clk_disable_unprepare(mxc_ipg_clk); - clk_disable_unprepare(mxc_ahb_clk); - clk_disable_unprepare(mxc_per_clk); - mxc_per_clk = NULL; - return ret; -} - -int fsl_udc_clk_finalize(struct platform_device *pdev) -{ - struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev); - int ret = 0; - - /* workaround ENGcm09152 for i.MX35 */ - if (pdata->workaround & FLS_USB2_WORKAROUND_ENGCM09152) { - unsigned int v; - struct resource *res = platform_get_resource - (pdev, IORESOURCE_MEM, 0); - void __iomem *phy_regs = ioremap(res->start + - MX35_USBPHYCTRL_OFFSET, 512); - if (!phy_regs) { - dev_err(&pdev->dev, "ioremap for phy address fails\n"); - ret = -EINVAL; - goto ioremap_err; - } - - v = readl(phy_regs + USBPHYCTRL_OTGBASE_OFFSET); - writel(v | USBPHYCTRL_EVDO, - phy_regs + USBPHYCTRL_OTGBASE_OFFSET); - - iounmap(phy_regs); - } - - -ioremap_err: - /* ULPI transceivers don't need usbpll */ - if (pdata->phy_mode == FSL_USB2_PHY_ULPI) { - clk_disable_unprepare(mxc_per_clk); - mxc_per_clk = NULL; - } - - return ret; -} - -void fsl_udc_clk_release(void) -{ - if (mxc_per_clk) - clk_disable_unprepare(mxc_per_clk); - clk_disable_unprepare(mxc_ahb_clk); - clk_disable_unprepare(mxc_ipg_clk); -} -- cgit 1.4.1 From 5e5ff0b4b6bcb4d17b7a26ec8bcfc7dd4651684f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 20 Dec 2020 00:25:53 +0900 Subject: USB: cdc-wdm: Fix use after free in service_outstanding_interrupt(). syzbot is reporting UAF at usb_submit_urb() [1], for service_outstanding_interrupt() is not checking WDM_DISCONNECTING before calling usb_submit_urb(). Close the race by doing same checks wdm_read() does upon retry. Also, while wdm_read() checks WDM_DISCONNECTING with desc->rlock held, service_interrupt_work() does not hold desc->rlock. Thus, it is possible that usb_submit_urb() is called from service_outstanding_interrupt() from service_interrupt_work() after WDM_DISCONNECTING was set and kill_urbs() from wdm_disconnect() completed. Thus, move kill_urbs() in wdm_disconnect() to after cancel_work_sync() (which makes sure that service_interrupt_work() is no longer running) completed. Although it seems to be safe to dereference desc->intf->dev in service_outstanding_interrupt() even if WDM_DISCONNECTING was already set because desc->rlock or cancel_work_sync() prevents wdm_disconnect() from reaching list_del() before service_outstanding_interrupt() completes, let's not emit error message if WDM_DISCONNECTING is set by wdm_disconnect() while usb_submit_urb() is in progress. [1] https://syzkaller.appspot.com/bug?extid=9e04e2df4a32fb661daf Reported-by: syzbot Signed-off-by: Tetsuo Handa Cc: stable Link: https://lore.kernel.org/r/620e2ee0-b9a3-dbda-a25b-a93e0ed03ec5@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 02d0cfd23bb2..508b1c3f8b73 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -465,13 +465,23 @@ static int service_outstanding_interrupt(struct wdm_device *desc) if (!desc->resp_count || !--desc->resp_count) goto out; + if (test_bit(WDM_DISCONNECTING, &desc->flags)) { + rv = -ENODEV; + goto out; + } + if (test_bit(WDM_RESETTING, &desc->flags)) { + rv = -EIO; + goto out; + } + set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); if (rv) { - dev_err(&desc->intf->dev, - "usb_submit_urb failed with result %d\n", rv); + if (!test_bit(WDM_DISCONNECTING, &desc->flags)) + dev_err(&desc->intf->dev, + "usb_submit_urb failed with result %d\n", rv); /* make sure the next notification trigger a submit */ clear_bit(WDM_RESPONDING, &desc->flags); @@ -1027,9 +1037,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); -- cgit 1.4.1 From 0f041b8592daaaea46e91a8ebb3b47e6e0171fd8 Mon Sep 17 00:00:00 2001 From: Madhusudanarao Amara Date: Wed, 16 Dec 2020 19:39:18 +0530 Subject: usb: typec: intel_pmc_mux: Configure HPD first for HPD+IRQ request Warm reboot scenarios some times type C Mux driver gets Mux configuration request as HPD=1,IRQ=1. In that scenario typeC Mux driver need to configure Mux as follows as per IOM requirement: (1). Confgiure Mux HPD = 1, IRQ = 0 (2). Configure Mux with HPD = 1, IRQ = 1 IOM expects TypeC Mux configuration as follows: (1). HPD=1, IRQ=0 (2). HPD=1, IRQ=1 if IOM gets mux config request (2) without configuring (1), it will ignore the request. The impact of this is there is no DP_alt mode display. Fixes: 43d596e32276 ("usb: typec: intel_pmc_mux: Check the port status before connect") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Madhusudanarao Amara Link: https://lore.kernel.org/r/20201216140918.49197-1-madhusudanarao.amara@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index cf37a59ce130..46a25b8db72e 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -207,10 +207,21 @@ static int pmc_usb_mux_dp_hpd(struct pmc_usb_port *port, struct typec_displayport_data *dp) { u8 msg[2] = { }; + int ret; msg[0] = PMC_USB_DP_HPD; msg[0] |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; + /* Configure HPD first if HPD,IRQ comes together */ + if (!IOM_PORT_HPD_ASSERTED(port->iom_status) && + dp->status & DP_STATUS_IRQ_HPD && + dp->status & DP_STATUS_HPD_STATE) { + msg[1] = PMC_USB_DP_HPD_LVL; + ret = pmc_usb_command(port, msg, sizeof(msg)); + if (ret) + return ret; + } + if (dp->status & DP_STATUS_IRQ_HPD) msg[1] = PMC_USB_DP_HPD_IRQ; -- cgit 1.4.1 From 5d5323a6f3625f101dbfa94ba3ef7706cce38760 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Tue, 15 Dec 2020 20:31:47 +0100 Subject: USB: xhci: fix U1/U2 handling for hardware with XHCI_INTEL_HOST quirk set The commit 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit latency is too long") was constraining the xhci code not to allow U1/U2 sleep states if the latency to wake up from the U-states reached the service interval of an periodic endpoint. This fix was not taking into account that in case the quirk XHCI_INTEL_HOST is set, the wakeup time will be calculated and configured differently. It checks for u1_params.mel/u2_params.mel as a limit. But the code could decide to write another MEL into the hardware. This leads to broken cases where not enough bandwidth is available for other devices: usb 1-2: can't set config #1, error -28 This patch is fixing that case by checking for timeout_ns after the wakeup time was calculated depending on the quirks. Fixes: 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit latency is too long") Signed-off-by: Michael Grzeschik Cc: stable Link: https://lore.kernel.org/r/20201215193147.11738-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 91ab81c3fc79..e86940571b4c 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4770,19 +4770,19 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); + else + timeout_ns = udev->u1_params.sel; + /* Prevent U1 if service interval is shorter than U1 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); - else - timeout_ns = udev->u1_params.sel; - /* The U1 timeout is encoded in 1us intervals. * Don't return a timeout of zero, because that's USB3_LPM_DISABLED. */ @@ -4834,19 +4834,19 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); + else + timeout_ns = udev->u2_params.sel; + /* Prevent U2 if service interval is shorter than U2 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); - else - timeout_ns = udev->u2_params.sel; - /* The U2 timeout is encoded in 256us intervals */ timeout_ns = DIV_ROUND_UP_ULL(timeout_ns, 256 * 1000); /* If the necessary timeout value is bigger than what we can set in the -- cgit 1.4.1 From a5ada3dfe6a20f41f91448b9034a1ef8da3dc87d Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Tue, 15 Dec 2020 10:54:59 +0800 Subject: usb: dwc3: meson-g12a: disable clk on error handling path in probe dwc3_meson_g12a_probe() does not invoke clk_bulk_disable_unprepare() on one error handling path. This patch fixes that. Fixes: 347052e3bf1b ("usb: dwc3: meson-g12a: fix USB2 PHY initialization on G12A and A1 SoCs") Reported-by: Hulk Robot Signed-off-by: Zheng Zengkai Cc: stable Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20201215025459.91794-1-zhengzengkai@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-meson-g12a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c index 417e05381b5d..bdf1f98dfad8 100644 --- a/drivers/usb/dwc3/dwc3-meson-g12a.c +++ b/drivers/usb/dwc3/dwc3-meson-g12a.c @@ -754,7 +754,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) ret = priv->drvdata->setup_regmaps(priv, base); if (ret) - return ret; + goto err_disable_clks; if (priv->vbus) { ret = regulator_enable(priv->vbus); -- cgit 1.4.1 From 2cc332e4ee4febcbb685e2962ad323fe4b3b750a Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 10 Dec 2020 10:01:48 +0800 Subject: usb: gadget: function: printer: Fix a memory leak for interface descriptor When printer driver is loaded, the printer_func_bind function is called, in this function, the interface descriptor be allocated memory, if after that, the error occurred, the interface descriptor memory need to be free. Reviewed-by: Peter Chen Cc: Signed-off-by: Zqiang Link: https://lore.kernel.org/r/20201210020148.6691-1-qiang.zhang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_printer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index 64a4112068fc..2f1eb2e81d30 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -1162,6 +1162,7 @@ fail_tx_reqs: printer_req_free(dev->in_ep, req); } + usb_free_all_descriptors(f); return ret; } -- cgit 1.4.1 From 5cc35c224a80aa5a5a539510ef049faf0d6ed181 Mon Sep 17 00:00:00 2001 From: Sriharsha Allenki Date: Wed, 2 Dec 2020 18:32:20 +0530 Subject: usb: gadget: Fix spinlock lockup on usb_function_deactivate There is a spinlock lockup as part of composite_disconnect when it tries to acquire cdev->lock as part of usb_gadget_deactivate. This is because the usb_gadget_deactivate is called from usb_function_deactivate with the same spinlock held. This would result in the below call stack and leads to stall. rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: rcu: 3-...0: (1 GPs behind) idle=162/1/0x4000000000000000 softirq=10819/10819 fqs=2356 (detected by 2, t=5252 jiffies, g=20129, q=3770) Task dump for CPU 3: task:uvc-gadget_wlhe state:R running task stack: 0 pid: 674 ppid: 636 flags:0x00000202 Call trace: __switch_to+0xc0/0x170 _raw_spin_lock_irqsave+0x84/0xb0 composite_disconnect+0x28/0x78 configfs_composite_disconnect+0x68/0x70 usb_gadget_disconnect+0x10c/0x128 usb_gadget_deactivate+0xd4/0x108 usb_function_deactivate+0x6c/0x80 uvc_function_disconnect+0x20/0x58 uvc_v4l2_release+0x30/0x88 v4l2_release+0xbc/0xf0 __fput+0x7c/0x230 ____fput+0x14/0x20 task_work_run+0x88/0x140 do_notify_resume+0x240/0x6f0 work_pending+0x8/0x200 Fix this by doing an unlock on cdev->lock before the usb_gadget_deactivate call from usb_function_deactivate. The same lockup can happen in the usb_gadget_activate path. Fix that path as well. Reported-by: Peter Chen Link: https://lore.kernel.org/linux-usb/20201102094936.GA29581@b29397-desktop/ Tested-by: Peter Chen Signed-off-by: Sriharsha Allenki Cc: stable Link: https://lore.kernel.org/r/20201202130220.24926-1-sallenki@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index c6d455f2bb92..1a556a628971 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -392,8 +392,11 @@ int usb_function_deactivate(struct usb_function *function) spin_lock_irqsave(&cdev->lock, flags); - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_deactivate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } if (status == 0) cdev->deactivations++; @@ -424,8 +427,11 @@ int usb_function_activate(struct usb_function *function) status = -EINVAL; else { cdev->deactivations--; - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_activate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } } spin_unlock_irqrestore(&cdev->lock, flags); -- cgit 1.4.1 From c91d3a6bcaa031f551ba29a496a8027b31289464 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 17 Nov 2020 17:29:55 +0800 Subject: USB: gadget: legacy: fix return error code in acm_ms_bind() If usb_otg_descriptor_alloc() failed, it need return ENOMEM. Fixes: 578aa8a2b12c ("usb: gadget: acm_ms: allocate and init otg descriptor by otg capabilities") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Cc: stable Link: https://lore.kernel.org/r/20201117092955.4102785-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/acm_ms.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/acm_ms.c b/drivers/usb/gadget/legacy/acm_ms.c index 59be2d8417c9..e8033e5f0c18 100644 --- a/drivers/usb/gadget/legacy/acm_ms.c +++ b/drivers/usb/gadget/legacy/acm_ms.c @@ -200,8 +200,10 @@ static int acm_ms_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(gadget); - if (!usb_desc) + if (!usb_desc) { + status = -ENOMEM; goto fail_string_ids; + } usb_otg_descriptor_init(gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; -- cgit 1.4.1 From 0a88fa221ce911c331bf700d2214c5b2f77414d3 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 17 Nov 2020 12:43:35 +0530 Subject: usb: gadget: u_ether: Fix MTU size mismatch with RX packet size Fix the MTU size issue with RX packet size as the host sends the packet with extra bytes containing ethernet header. This causes failure when user sets the MTU size to the maximum i.e. 15412. In this case the ethernet packet received will be of length 15412 plus the ethernet header length. This patch fixes the issue where there is a check that RX packet length must not be more than max packet length. Fixes: bba787a860fa ("usb: gadget: ether: Allow jumbo frames") Signed-off-by: Manish Narani Cc: stable Link: https://lore.kernel.org/r/1605597215-122027-1-git-send-email-manish.narani@xilinx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 31ea76adcc0d..c019f2b0c0af 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -45,9 +45,10 @@ #define UETH__VERSION "29-May-2008" /* Experiments show that both Linux and Windows hosts allow up to 16k - * frame sizes. Set the max size to 15k+52 to prevent allocating 32k + * frame sizes. Set the max MTU size to 15k+52 to prevent allocating 32k * blocks and still have efficient handling. */ -#define GETHER_MAX_ETH_FRAME_LEN 15412 +#define GETHER_MAX_MTU_SIZE 15412 +#define GETHER_MAX_ETH_FRAME_LEN (GETHER_MAX_MTU_SIZE + ETH_HLEN) struct eth_dev { /* lock is held while accessing port_usb @@ -786,7 +787,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); @@ -848,7 +849,7 @@ struct net_device *gether_setup_name_default(const char *netname) /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; return net; } -- cgit 1.4.1 From 83a43ff80a566de8718dfc6565545a0080ec1fb5 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 17 Nov 2020 09:14:30 +0800 Subject: usb: chipidea: ci_hdrc_imx: add missing put_device() call in usbmisc_get_init_data() if of_find_device_by_node() succeed, usbmisc_get_init_data() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling for this function implementation. Fixes: ef12da914ed6 ("usb: chipidea: imx: properly check for usbmisc") Signed-off-by: Yu Kuai Cc: stable Link: https://lore.kernel.org/r/20201117011430.642589-1-yukuai3@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 9e12152ea46b..8b7bc10b6e8b 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -139,9 +139,13 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) misc_pdev = of_find_device_by_node(args.np); of_node_put(args.np); - if (!misc_pdev || !platform_get_drvdata(misc_pdev)) + if (!misc_pdev) return ERR_PTR(-EPROBE_DEFER); + if (!platform_get_drvdata(misc_pdev)) { + put_device(&misc_pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } data->dev = &misc_pdev->dev; /* -- cgit 1.4.1 From 372c93131998c0622304bed118322d2a04489e63 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Dec 2020 11:30:53 +0100 Subject: USB: yurex: fix control-URB timeout handling Make sure to always cancel the control URB in write() so that it can be reused after a timeout or spurious CMD_ACK. Currently any further write requests after a timeout would fail after triggering a WARN() in usb_submit_urb() when attempting to submit the already active URB. Reported-by: syzbot+e87ebe0f7913f71f2ea5@syzkaller.appspotmail.com Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX") Cc: stable # 2.6.37 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 73ebfa6e9715..c640f98d20c5 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -496,6 +496,9 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, timeout = schedule_timeout(YUREX_WRITE_TIMEOUT); finish_wait(&dev->waitq, &wait); + /* make sure URB is idle after timeout or (spurious) CMD_ACK */ + usb_kill_urb(dev->cntl_urb); + mutex_unlock(&dev->io_mutex); if (retval < 0) { -- cgit 1.4.1 From ce722da66d3e9384aa2de9d33d584ee154e5e157 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:06 +0300 Subject: usb: dwc3: ulpi: Use VStsDone to detect PHY regs access completion In accordance with [1] the DWC_usb3 core sets the GUSB2PHYACCn.VStsDone bit when the PHY vendor control access is done and clears it when the application initiates a new transaction. The doc doesn't say anything about the GUSB2PHYACCn.VStsBsy flag serving for the same purpose. Moreover we've discovered that the VStsBsy flag can be cleared before the VStsDone bit. So using the former as a signal of the PHY control registers completion might be dangerous. Let's have the VStsDone flag utilized instead then. [1] Synopsys DesignWare Cores SuperSpeed USB 3.0 xHCI Host Controller Databook, 2.70a, December 2013, p.388 Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-2-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/ulpi.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 2f95f08ca511..1b241f937d8f 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -285,6 +285,7 @@ /* Global USB2 PHY Vendor Control Register */ #define DWC3_GUSB2PHYACC_NEWREGREQ BIT(25) +#define DWC3_GUSB2PHYACC_DONE BIT(24) #define DWC3_GUSB2PHYACC_BUSY BIT(23) #define DWC3_GUSB2PHYACC_WRITE BIT(22) #define DWC3_GUSB2PHYACC_ADDR(n) (n << 16) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index aa213c9815f6..3cc4f4970c05 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -24,7 +24,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc) while (count--) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); - if (!(reg & DWC3_GUSB2PHYACC_BUSY)) + if (reg & DWC3_GUSB2PHYACC_DONE) return 0; cpu_relax(); } -- cgit 1.4.1 From fca3f138105727c3a22edda32d02f91ce1bf11c9 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:07 +0300 Subject: usb: dwc3: ulpi: Replace CPU-based busyloop with Protocol-based one Originally the procedure of the ULPI transaction finish detection has been developed as a simple busy-loop with just decrementing counter and no delays. It's wrong since on different systems the loop will take a different time to complete. So if the system bus and CPU are fast enough to overtake the ULPI bus and the companion PHY reaction, then we'll get to take a false timeout error. Fix this by converting the busy-loop procedure to take the standard bus speed, address value and the registers access mode into account for the busy-loop delay calculation. Here is the way the fix works. It's known that the ULPI bus is clocked with 60MHz signal. In accordance with [1] the ULPI bus protocol is created so to spend 5 and 6 clock periods for immediate register write and read operations respectively, and 6 and 7 clock periods - for the extended register writes and reads. Based on that we can easily pre-calculate the time which will be needed for the controller to perform a requested IO operation. Note we'll still preserve the attempts counter in case if the DWC USB3 controller has got some internals delays. [1] UTMI+ Low Pin Interface (ULPI) Specification, Revision 1.1, October 20, 2004, pp. 30 - 36. Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-3-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index 3cc4f4970c05..54c877f7b51d 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -7,6 +7,8 @@ * Author: Heikki Krogerus */ +#include +#include #include #include "core.h" @@ -17,12 +19,22 @@ DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \ DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a)) -static int dwc3_ulpi_busyloop(struct dwc3 *dwc) +#define DWC3_ULPI_BASE_DELAY DIV_ROUND_UP(NSEC_PER_SEC, 60000000L) + +static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { + unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; unsigned int count = 1000; u32 reg; + if (addr >= ULPI_EXT_VENDOR_SPECIFIC) + ns += DWC3_ULPI_BASE_DELAY; + + if (read) + ns += DWC3_ULPI_BASE_DELAY; + while (count--) { + ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); if (reg & DWC3_GUSB2PHYACC_DONE) return 0; @@ -47,7 +59,7 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - ret = dwc3_ulpi_busyloop(dwc); + ret = dwc3_ulpi_busyloop(dwc, addr, true); if (ret) return ret; @@ -71,7 +83,7 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - return dwc3_ulpi_busyloop(dwc); + return dwc3_ulpi_busyloop(dwc, addr, false); } static const struct ulpi_ops dwc3_ulpi_ops = { -- cgit 1.4.1 From e5f4ca3fce90a37b23a77bfcc86800d484a80514 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:08 +0300 Subject: usb: dwc3: ulpi: Fix USB2.0 HS/FS/LS PHY suspend regression First of all the commit e0082698b689 ("usb: dwc3: ulpi: conditionally resume ULPI PHY") introduced the Suspend USB2.0 HS/FS/LS PHY regression, as by design of the fix any attempt to read/write from/to the PHY control registers will completely disable the PHY suspension, which consequently will increase the USB bus power consumption. Secondly the fix won't work well for the very first attempt of the ULPI PHY control registers IO, because after disabling the USB2.0 PHY suspension functionality it will still take some time for the bus to resume from the sleep state if one has been reached before it. So the very first PHY register read/write operation will take more time than the busy-loop provides and the IO timeout error might be returned anyway. Here we suggest to fix the denoted problems in the following way. First of all let's not disable the Suspend USB2.0 HS/FS/LS PHY functionality so to make the controller and the USB2.0 bus more power efficient. Secondly instead of that we'll extend the PHY IO op wait procedure with 1 - 1.2 ms sleep if the PHY suspension is enabled (1ms should be enough as by LPM specification it is at most how long it takes for the USB2.0 bus to resume from L1 (Sleep) state). Finally in case if the USB2.0 PHY suspension functionality has been disabled on the DWC USB3 controller setup procedure we'll compensate the USB bus resume process latency by extending the busy-loop attempts counter. Fixes: e0082698b689 ("usb: dwc3: ulpi: conditionally resume ULPI PHY") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-4-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index 54c877f7b51d..f23f4c9a557e 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -24,7 +24,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; - unsigned int count = 1000; + unsigned int count = 10000; u32 reg; if (addr >= ULPI_EXT_VENDOR_SPECIFIC) @@ -33,6 +33,10 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) if (read) ns += DWC3_ULPI_BASE_DELAY; + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (reg & DWC3_GUSB2PHYCFG_SUSPHY) + usleep_range(1000, 1200); + while (count--) { ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); @@ -50,12 +54,6 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) u32 reg; int ret; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); @@ -73,12 +71,6 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) struct dwc3 *dwc = dev_get_drvdata(dev); u32 reg; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); -- cgit 1.4.1 From 9389044f27081d6ec77730c36d5bf9a1288bcda2 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 21 Dec 2020 18:35:28 +0100 Subject: usb: gadget: f_uac2: reset wMaxPacketSize With commit 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth") wMaxPacketSize is computed dynamically but the value is never reset. Because of this, the actual maximum packet size can only decrease each time the audio gadget is instantiated. Reset the endpoint maximum packet size and mark wMaxPacketSize as dynamic to solve the problem. Fixes: 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth") Signed-off-by: Jerome Brunet Cc: stable Link: https://lore.kernel.org/r/20201221173531.215169-2-jbrunet@baylibre.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 69 ++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 3633df6d7610..5d960b6603b6 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -271,7 +271,7 @@ static struct usb_endpoint_descriptor fs_epout_desc = { .bEndpointAddress = USB_DIR_OUT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -280,7 +280,7 @@ static struct usb_endpoint_descriptor hs_epout_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -348,7 +348,7 @@ static struct usb_endpoint_descriptor fs_epin_desc = { .bEndpointAddress = USB_DIR_IN, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -357,7 +357,7 @@ static struct usb_endpoint_descriptor hs_epin_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -444,12 +444,28 @@ struct cntrl_range_lay3 { __le32 dRES; } __packed; -static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, +static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, struct usb_endpoint_descriptor *ep_desc, - unsigned int factor, bool is_playback) + enum usb_device_speed speed, bool is_playback) { int chmask, srate, ssize; - u16 max_packet_size; + u16 max_size_bw, max_size_ep; + unsigned int factor; + + switch (speed) { + case USB_SPEED_FULL: + max_size_ep = 1023; + factor = 1000; + break; + + case USB_SPEED_HIGH: + max_size_ep = 1024; + factor = 8000; + break; + + default: + return -EINVAL; + } if (is_playback) { chmask = uac2_opts->p_chmask; @@ -461,10 +477,12 @@ static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, ssize = uac2_opts->c_ssize; } - max_packet_size = num_channels(chmask) * ssize * + max_size_bw = num_channels(chmask) * ssize * DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); - ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_packet_size, - le16_to_cpu(ep_desc->wMaxPacketSize))); + ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, + max_size_ep)); + + return 0; } /* Use macro to overcome line length limitation */ @@ -670,10 +688,33 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) } /* Calculate wMaxPacketSize according to audio bandwidth */ - set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true); - set_ep_max_packet_size(uac2_opts, &fs_epout_desc, 1000, false); - set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true); - set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false); + ret = set_ep_max_packet_size(uac2_opts, &fs_epin_desc, USB_SPEED_FULL, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &fs_epout_desc, USB_SPEED_FULL, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epin_desc, USB_SPEED_HIGH, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epout_desc, USB_SPEED_HIGH, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } if (EPOUT_EN(uac2_opts)) { agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); -- cgit 1.4.1 From 60267ba35c744d851dcd2d22ebaa240ca6aaa15f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 16 Dec 2020 17:19:58 +0100 Subject: ceph: reencode gid_list when reconnecting On reconnect, cap and dentry releases are dropped and the fields that follow must be reencoded into the freed space. Currently these are timestamp and gid_list, but gid_list isn't reencoded. This results in failed to decode message of type 24 v4: End of buffer errors on the MDS. While at it, make a change to encode gid_list unconditionally, without regard to what head/which version was used as a result of checking whether CEPH_FEATURE_FS_BTIME is supported or not. URL: https://tracker.ceph.com/issues/48618 Fixes: 4f1ddb1ea874 ("ceph: implement updated ceph_mds_request_head structure") Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- fs/ceph/mds_client.c | 53 ++++++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 98c15ff2e599..840587037b59 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2475,6 +2475,22 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, return r; } +static void encode_timestamp_and_gids(void **p, + const struct ceph_mds_request *req) +{ + struct ceph_timespec ts; + int i; + + ceph_encode_timespec64(&ts, &req->r_stamp); + ceph_encode_copy(p, &ts, sizeof(ts)); + + /* gid_list */ + ceph_encode_32(p, req->r_cred->group_info->ngroups); + for (i = 0; i < req->r_cred->group_info->ngroups; i++) + ceph_encode_64(p, from_kgid(&init_user_ns, + req->r_cred->group_info->gid[i])); +} + /* * called under mdsc->mutex */ @@ -2491,7 +2507,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, u64 ino1 = 0, ino2 = 0; int pathlen1 = 0, pathlen2 = 0; bool freepath1 = false, freepath2 = false; - int len, i; + int len; u16 releases; void *p, *end; int ret; @@ -2517,17 +2533,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, goto out_free1; } - if (legacy) { - /* Old style */ - len = sizeof(*head); - } else { - /* New style: add gid_list and any later fields */ - len = sizeof(struct ceph_mds_request_head) + sizeof(u32) + - (sizeof(u64) * req->r_cred->group_info->ngroups); - } - + len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head); len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + sizeof(struct ceph_timespec); + len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -2548,7 +2557,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.tid = cpu_to_le64(req->r_tid); /* - * The old ceph_mds_request_header didn't contain a version field, and + * The old ceph_mds_request_head didn't contain a version field, and * one was added when we moved the message version from 3->4. */ if (legacy) { @@ -2609,20 +2618,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, head->num_releases = cpu_to_le16(releases); - /* time stamp */ - { - struct ceph_timespec ts; - ceph_encode_timespec64(&ts, &req->r_stamp); - ceph_encode_copy(&p, &ts, sizeof(ts)); - } - - /* gid list */ - if (!legacy) { - ceph_encode_32(&p, req->r_cred->group_info->ngroups); - for (i = 0; i < req->r_cred->group_info->ngroups; i++) - ceph_encode_64(&p, from_kgid(&init_user_ns, - req->r_cred->group_info->gid[i])); - } + encode_timestamp_and_gids(&p, req); if (WARN_ON_ONCE(p > end)) { ceph_msg_put(msg); @@ -2730,13 +2726,8 @@ static int __prepare_send_request(struct ceph_mds_session *session, /* remove cap/dentry releases from message */ rhead->num_releases = 0; - /* time stamp */ p = msg->front.iov_base + req->r_request_release_offset; - { - struct ceph_timespec ts; - ceph_encode_timespec64(&ts, &req->r_stamp); - ceph_encode_copy(&p, &ts, sizeof(ts)); - } + encode_timestamp_and_gids(&p, req); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); -- cgit 1.4.1 From ad32fe8801c38f7b1a8b3814bd1f006cb2b5e781 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 15 Dec 2020 16:40:59 +0100 Subject: libceph: fix auth_signature buffer allocation in secure mode auth_signature frame is 68 bytes in plain mode and 96 bytes in secure mode but we are requesting 68 bytes in both modes. By luck, this doesn't actually result in any invalid memory accesses because the allocation is satisfied out of kmalloc-96 slab and so exactly 96 bytes are allocated, but KASAN rightfully complains. Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)") Reported-by: Luis Henriques Signed-off-by: Ilya Dryomov --- net/ceph/messenger_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c1ebb2aa08b5..4f938fc8deaf 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1333,7 +1333,8 @@ static int prepare_auth_signature(struct ceph_connection *con) void *buf; int ret; - buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, false)); + buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, + con_secure(con))); if (!buf) return -ENOMEM; -- cgit 1.4.1 From f5f2c9a0e3073debc6bc0ecc855ced0158526ee8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 15 Dec 2020 16:49:07 +0100 Subject: libceph: align session_key and con_secret to 16 bytes crypto_shash_setkey() and crypto_aead_setkey() will do a (small) GFP_ATOMIC allocation to align the key if it isn't suitably aligned. It's not a big deal, but at the same time easy to avoid. The actual alignment requirement is dynamic, queryable with crypto_shash_alignmask() and crypto_aead_alignmask(), but shouldn't be stricter than 16 bytes for our algorithms. Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)") Signed-off-by: Ilya Dryomov --- net/ceph/messenger_v2.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 4f938fc8deaf..c38d8de93836 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -2033,10 +2033,18 @@ bad: return -EINVAL; } +/* + * Align session_key and con_secret to avoid GFP_ATOMIC allocation + * inside crypto_shash_setkey() and crypto_aead_setkey() called from + * setup_crypto(). __aligned(16) isn't guaranteed to work for stack + * objects, so do it by hand. + */ static int process_auth_done(struct ceph_connection *con, void *p, void *end) { - u8 session_key[CEPH_KEY_LEN]; - u8 con_secret[CEPH_MAX_CON_SECRET_LEN]; + u8 session_key_buf[CEPH_KEY_LEN + 16]; + u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16]; + u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16); + u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16); int session_key_len, con_secret_len; int payload_len; u64 global_id; -- cgit 1.4.1 From 664f1e259a982bf213f0cd8eea7616c89546585c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 16 Dec 2020 18:40:05 +0100 Subject: libceph: add __maybe_unused to DEFINE_MSGR2_FEATURE Avoid -Wunused-const-variable warnings for "make W=1". Reported-by: Jeff Layton Signed-off-by: Ilya Dryomov --- include/linux/ceph/msgr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h index f5e02f6c0655..3989dcb94d3d 100644 --- a/include/linux/ceph/msgr.h +++ b/include/linux/ceph/msgr.h @@ -33,8 +33,8 @@ #define CEPH_MSGR2_INCARNATION_1 (0ull) #define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \ - static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ - static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \ + static const uint64_t __maybe_unused CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \ + static const uint64_t __maybe_unused CEPH_MSGR2_FEATUREMASK_##name = \ (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation); #define HAVE_MSGR2_FEATURE(x, name) \ -- cgit 1.4.1 From 48b0777cd93dbd800d3966b6f5c34714aad5c203 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 28 Dec 2020 16:13:52 -0500 Subject: Revert "dm crypt: export sysfs of kcryptd workqueue" This reverts commit a2b8b2d975673b1a50ab0bcce5d146b9335edfad. WQ_SYSFS breaks the ability to reload a DM table due to sysfs kobject collision (due to active and inactive table). Given lack of demonstrated need for exposing this workqueue via sysfs: revert exposing it. Reported-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5f9f9b3a226d..53791138d78b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3166,12 +3166,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) - cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, + cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1, devname); else - cc->crypt_queue = alloc_workqueue("kcryptd-%s", - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | - WQ_UNBOUND | WQ_SYSFS, + cc->crypt_queue = alloc_workqueue("kcryptd/%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus(), devname); if (!cc->crypt_queue) { ti->error = "Couldn't create kcryptd queue"; -- cgit 1.4.1 From 59b4a8fa27f5a895582ada1ae5034af7c94a57b5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 23 Dec 2020 19:21:16 -0800 Subject: CDC-NCM: remove "connected" log message The cdc_ncm driver passes network connection notifications up to usbnet_link_change(), which is the right place for any logging. Remove the netdev_info() duplicating this from the driver itself. This stops devices such as my "TRENDnet USB 10/100/1G/2.5G LAN" (ID 20f4:e02b) adapter from spamming the kernel log with cdc_ncm 2-2:2.0 enp0s2u2c2: network connection: connected messages every 60 msec or so. Signed-off-by: Roland Dreier Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20201224032116.2453938-1-roland@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 2bac57d5e8d5..3b816a4731f2 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1863,9 +1863,6 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - netif_info(dev, link, dev->net, - "network connection: %sconnected\n", - !!event->wValue ? "" : "dis"); usbnet_link_change(dev, !!event->wValue, 0); break; -- cgit 1.4.1 From 1ad58225dba3f2f598d2c6daed4323f24547168f Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:20 +0100 Subject: net-sysfs: take the rtnl lock when storing xps_cpus Two race conditions can be triggered when storing xps cpus, resulting in various oops and invalid memory accesses: 1. Calling netdev_set_num_tc while netif_set_xps_queue: - netif_set_xps_queue uses dev->tc_num as one of the parameters to compute the size of new_dev_maps when allocating it. dev->tc_num is also used to access the map, and the compiler may generate code to retrieve this field multiple times in the function. - netdev_set_num_tc sets dev->tc_num. If new_dev_maps is allocated using dev->tc_num and then dev->tc_num is set to a higher value through netdev_set_num_tc, later accesses to new_dev_maps in netif_set_xps_queue could lead to accessing memory outside of new_dev_maps; triggering an oops. 2. Calling netif_set_xps_queue while netdev_set_num_tc is running: 2.1. netdev_set_num_tc starts by resetting the xps queues, dev->tc_num isn't updated yet. 2.2. netif_set_xps_queue is called, setting up the map with the *old* dev->num_tc. 2.3. netdev_set_num_tc updates dev->tc_num. 2.4. Later accesses to the map lead to out of bound accesses and oops. A similar issue can be found with netdev_reset_tc. One way of triggering this is to set an iface up (for which the driver uses netdev_set_num_tc in the open path, such as bnx2x) and writing to xps_cpus in a concurrent thread. With the right timing an oops is triggered. Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc and netdev_reset_tc should be mutually exclusive. We do that by taking the rtnl lock in xps_cpus_store. Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 999b70c59761..7cc15dec1717 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1396,7 +1396,13 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, return err; } + if (!rtnl_trylock()) { + free_cpumask_var(mask); + return restart_syscall(); + } + err = netif_set_xps_queue(dev, mask, index); + rtnl_unlock(); free_cpumask_var(mask); -- cgit 1.4.1 From fb25038586d0064123e393cadf1fadd70a9df97a Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:21 +0100 Subject: net-sysfs: take the rtnl lock when accessing xps_cpus_map and num_tc Accesses to dev->xps_cpus_map (when using dev->num_tc) should be protected by the rtnl lock, like we do for netif_set_xps_queue. I didn't see an actual bug being triggered, but let's be safe here and take the rtnl lock while accessing the map in sysfs. Fixes: 184c449f91fe ("net: Add support for XPS with QoS via traffic classes") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7cc15dec1717..65886bfbf822 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1317,8 +1317,8 @@ static const struct attribute_group dql_group = { static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) { + int cpu, len, ret, num_tc = 1, tc = 0; struct net_device *dev = queue->dev; - int cpu, len, num_tc = 1, tc = 0; struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; @@ -1328,22 +1328,31 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, index = get_netdev_queue_index(queue); + if (!rtnl_trylock()) + return restart_syscall(); + if (dev->num_tc) { /* Do not allow XPS on subordinate device directly */ num_tc = dev->num_tc; - if (num_tc < 0) - return -EINVAL; + if (num_tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } /* If queue belongs to subordinate dev use its map */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; tc = netdev_txq_to_tc(dev, index); - if (tc < 0) - return -EINVAL; + if (tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } } - if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) - return -ENOMEM; + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_rtnl_unlock; + } rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_cpus_map); @@ -1366,9 +1375,15 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, } rcu_read_unlock(); + rtnl_unlock(); + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); return len < PAGE_SIZE ? len : -EINVAL; + +err_rtnl_unlock: + rtnl_unlock(); + return ret; } static ssize_t xps_cpus_store(struct netdev_queue *queue, -- cgit 1.4.1 From 2d57b4f142e0b03e854612b8e28978935414bced Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:22 +0100 Subject: net-sysfs: take the rtnl lock when storing xps_rxqs Two race conditions can be triggered when storing xps rxqs, resulting in various oops and invalid memory accesses: 1. Calling netdev_set_num_tc while netif_set_xps_queue: - netif_set_xps_queue uses dev->tc_num as one of the parameters to compute the size of new_dev_maps when allocating it. dev->tc_num is also used to access the map, and the compiler may generate code to retrieve this field multiple times in the function. - netdev_set_num_tc sets dev->tc_num. If new_dev_maps is allocated using dev->tc_num and then dev->tc_num is set to a higher value through netdev_set_num_tc, later accesses to new_dev_maps in netif_set_xps_queue could lead to accessing memory outside of new_dev_maps; triggering an oops. 2. Calling netif_set_xps_queue while netdev_set_num_tc is running: 2.1. netdev_set_num_tc starts by resetting the xps queues, dev->tc_num isn't updated yet. 2.2. netif_set_xps_queue is called, setting up the map with the *old* dev->num_tc. 2.3. netdev_set_num_tc updates dev->tc_num. 2.4. Later accesses to the map lead to out of bound accesses and oops. A similar issue can be found with netdev_reset_tc. One way of triggering this is to set an iface up (for which the driver uses netdev_set_num_tc in the open path, such as bnx2x) and writing to xps_rxqs in a concurrent thread. With the right timing an oops is triggered. Both issues have the same fix: netif_set_xps_queue, netdev_set_num_tc and netdev_reset_tc should be mutually exclusive. We do that by taking the rtnl lock in xps_rxqs_store. Fixes: 8af2c06ff4b1 ("net-sysfs: Add interface for Rx queue(s) map per Tx queue") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 65886bfbf822..62ca2f2c0ee6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1499,10 +1499,17 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } + if (!rtnl_trylock()) { + bitmap_free(mask); + return restart_syscall(); + } + cpus_read_lock(); err = __netif_set_xps_queue(dev, mask, index, true); cpus_read_unlock(); + rtnl_unlock(); + bitmap_free(mask); return err ? : len; } -- cgit 1.4.1 From 4ae2bb81649dc03dfc95875f02126b14b773f7ab Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 23 Dec 2020 22:23:23 +0100 Subject: net-sysfs: take the rtnl lock when accessing xps_rxqs_map and num_tc Accesses to dev->xps_rxqs_map (when using dev->num_tc) should be protected by the rtnl lock, like we do for netif_set_xps_queue. I didn't see an actual bug being triggered, but let's be safe here and take the rtnl lock while accessing the map in sysfs. Fixes: 8af2c06ff4b1 ("net-sysfs: Add interface for Rx queue(s) map per Tx queue") Signed-off-by: Antoine Tenart Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/net-sysfs.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 62ca2f2c0ee6..daf502c13d6d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1429,22 +1429,29 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) { + int j, len, ret, num_tc = 1, tc = 0; struct net_device *dev = queue->dev; struct xps_dev_maps *dev_maps; unsigned long *mask, index; - int j, len, num_tc = 1, tc = 0; index = get_netdev_queue_index(queue); + if (!rtnl_trylock()) + return restart_syscall(); + if (dev->num_tc) { num_tc = dev->num_tc; tc = netdev_txq_to_tc(dev, index); - if (tc < 0) - return -EINVAL; + if (tc < 0) { + ret = -EINVAL; + goto err_rtnl_unlock; + } } mask = bitmap_zalloc(dev->num_rx_queues, GFP_KERNEL); - if (!mask) - return -ENOMEM; + if (!mask) { + ret = -ENOMEM; + goto err_rtnl_unlock; + } rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_rxqs_map); @@ -1470,10 +1477,16 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) out_no_maps: rcu_read_unlock(); + rtnl_unlock(); + len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues); bitmap_free(mask); return len < PAGE_SIZE ? len : -EINVAL; + +err_rtnl_unlock: + rtnl_unlock(); + return ret; } static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, -- cgit 1.4.1 From 4614792eebcbf81c60ad3604c1aeeb2b0899cea4 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 24 Dec 2020 18:24:05 +0200 Subject: net: ethernet: ti: cpts: fix ethtool output when no ptp_clock registered The CPTS driver registers PTP PHC clock when first netif is going up and unregister it when all netif are down. Now ethtool will show: - PTP PHC clock index 0 after boot until first netif is up; - the last assigned PTP PHC clock index even if PTP PHC clock is not registered any more after all netifs are down. This patch ensures that -1 is returned by ethtool when PTP PHC clock is not registered any more. Fixes: 8a2c9a5ab4b9 ("net: ethernet: ti: cpts: rework initialization/deinitialization") Signed-off-by: Grygorii Strashko Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20201224162405.28032-1-grygorii.strashko@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpts.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index d1fc7955d422..43222a34cba0 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -599,6 +599,7 @@ void cpts_unregister(struct cpts *cpts) ptp_clock_unregister(cpts->clock); cpts->clock = NULL; + cpts->phc_index = -1; cpts_write32(cpts, 0, int_enable); cpts_write32(cpts, 0, control); @@ -784,6 +785,7 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, cpts->cc.read = cpts_systim_read; cpts->cc.mask = CLOCKSOURCE_MASK(32); cpts->info = cpts_info; + cpts->phc_index = -1; if (n_ext_ts) cpts->info.n_ext_ts = n_ext_ts; -- cgit 1.4.1 From 950271d7cc0b4546af3549d8143c4132d6e1f138 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 25 Dec 2020 10:52:16 +0800 Subject: tun: fix return value when the number of iovs exceeds MAX_SKB_FRAGS Currently the tun_napi_alloc_frags() function returns -ENOMEM when the number of iovs exceeds MAX_SKB_FRAGS + 1. However this is inappropriate, we should use -EMSGSIZE instead of -ENOMEM. The following distinctions are matters: 1. the caller need to drop the bad packet when -EMSGSIZE is returned, which means meeting a persistent failure. 2. the caller can try again when -ENOMEM is returned, which means meeting a transient failure. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Yunjian Wang Acked-by: Willem de Bruijn Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/1608864736-24332-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fbed05ae7b0f..978ac0981d16 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1365,7 +1365,7 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int i; if (it->nr_segs > MAX_SKB_FRAGS + 1) - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EMSGSIZE); local_bh_disable(); skb = napi_get_frags(&tfile->napi); -- cgit 1.4.1 From e7579d5d5b3298f7e888ed07ac16bfb7174c135a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 21 Dec 2020 22:07:25 +0100 Subject: net: mptcp: cap forward allocation to 1M the following syzkaller reproducer: r0 = socket$inet_mptcp(0x2, 0x1, 0x106) bind$inet(r0, &(0x7f0000000080)={0x2, 0x4e24, @multicast2}, 0x10) connect$inet(r0, &(0x7f0000000480)={0x2, 0x4e24, @local}, 0x10) sendto$inet(r0, &(0x7f0000000100)="f6", 0xffffffe7, 0xc000, 0x0, 0x0) systematically triggers the following warning: WARNING: CPU: 2 PID: 8618 at net/core/stream.c:208 sk_stream_kill_queues+0x3fa/0x580 Modules linked in: CPU: 2 PID: 8618 Comm: syz-executor Not tainted 5.10.0+ #334 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/04 RIP: 0010:sk_stream_kill_queues+0x3fa/0x580 Code: df 48 c1 ea 03 0f b6 04 02 84 c0 74 04 3c 03 7e 40 8b ab 20 02 00 00 e9 64 ff ff ff e8 df f0 81 2 RSP: 0018:ffffc9000290fcb0 EFLAGS: 00010293 RAX: ffff888011cb8000 RBX: 0000000000000000 RCX: ffffffff86eecf0e RDX: 0000000000000000 RSI: ffffffff86eecf6a RDI: 0000000000000005 RBP: 0000000000000e28 R08: ffff888011cb8000 R09: fffffbfff1f48139 R10: ffffffff8fa409c7 R11: fffffbfff1f48138 R12: ffff8880215e6220 R13: ffffffff8fa409c0 R14: ffffc9000290fd30 R15: 1ffff92000521fa2 FS: 00007f41c78f4800(0000) GS:ffff88802d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f95c803d088 CR3: 0000000025ed2000 CR4: 00000000000006f0 Call Trace: __mptcp_destroy_sock+0x4f5/0x8e0 mptcp_close+0x5e2/0x7f0 inet_release+0x12b/0x270 __sock_release+0xc8/0x270 sock_close+0x18/0x20 __fput+0x272/0x8e0 task_work_run+0xe0/0x1a0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x19/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xa9 userspace programs provide arbitrarily high values of 'len' in sendmsg(): this is causing integer overflow of 'amount'. Cap forward allocation to 1 megabyte: higher values are not really useful. Suggested-by: Paolo Abeni Fixes: e93da92896bc ("mptcp: implement wmem reservation") Signed-off-by: Davide Caratti Link: https://lore.kernel.org/r/3334d00d8b2faecafdfab9aa593efcbf61442756.1608584474.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 09b19aa2f205..6628d8d74203 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -877,6 +877,9 @@ static void __mptcp_wmem_reserve(struct sock *sk, int size) struct mptcp_sock *msk = mptcp_sk(sk); WARN_ON_ONCE(msk->wmem_reserved); + if (WARN_ON_ONCE(amount < 0)) + amount = 0; + if (amount <= sk->sk_forward_alloc) goto reserve; @@ -1587,7 +1590,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) return -EOPNOTSUPP; - mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, len)); + mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len))); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -- cgit 1.4.1 From fb1e6e562b37b39adfe251919c9abfdb3e01f921 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 27 Dec 2020 14:18:17 -0500 Subject: bnxt_en: Fix AER recovery. A recent change skips sending firmware messages to the firmware when pci_channel_offline() is true during fatal AER error. To make this complete, we need to move the re-initialization sequence to bnxt_io_resume(), otherwise the firmware messages to re-initialize will all be skipped. In any case, it is more correct to re-initialize in bnxt_io_resume(). Also, fix the reverse x-mas tree format when defining variables in bnxt_io_slot_reset(). Fixes: b340dc680ed4 ("bnxt_en: Avoid sending firmware messages when AER error is detected.") Reviewed-by: Edwin Peer Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4edd6f8e017e..b8351e24395d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12887,10 +12887,10 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, */ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) { + pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; struct net_device *netdev = pci_get_drvdata(pdev); struct bnxt *bp = netdev_priv(netdev); int err = 0, off; - pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT; netdev_info(bp->dev, "PCI Slot Reset\n"); @@ -12919,22 +12919,8 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) pci_save_state(pdev); err = bnxt_hwrm_func_reset(bp); - if (!err) { - err = bnxt_hwrm_func_qcaps(bp); - if (!err && netif_running(netdev)) - err = bnxt_open(netdev); - } - bnxt_ulp_start(bp, err); - if (!err) { - bnxt_reenable_sriov(bp); + if (!err) result = PCI_ERS_RESULT_RECOVERED; - } - } - - if (result != PCI_ERS_RESULT_RECOVERED) { - if (netif_running(netdev)) - dev_close(netdev); - pci_disable_device(pdev); } rtnl_unlock(); @@ -12952,10 +12938,21 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) static void bnxt_io_resume(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); + struct bnxt *bp = netdev_priv(netdev); + int err; + netdev_info(bp->dev, "PCI Slot Resume\n"); rtnl_lock(); - netif_device_attach(netdev); + err = bnxt_hwrm_func_qcaps(bp); + if (!err && netif_running(netdev)) + err = bnxt_open(netdev); + + bnxt_ulp_start(bp, err); + if (!err) { + bnxt_reenable_sriov(bp); + netif_device_attach(netdev); + } rtnl_unlock(); } -- cgit 1.4.1 From a029a2fef5d11bb85587433c3783615442abac96 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 27 Dec 2020 14:18:18 -0500 Subject: bnxt_en: Check TQM rings for maximum supported value. TQM rings are hardware resources that require host context memory managed by the driver. The driver supports up to 9 TQM rings and the number of rings to use is requested by firmware during run-time. Cap this number to the maximum supported to prevent accessing beyond the array. Future firmware may request more than 9 TQM rings. Define macros to remove the magic number 9 from the C code. Fixes: ac3158cb0108 ("bnxt_en: Allocate TQM ring context memory according to fw specification.") Reviewed-by: Pavan Chebbi Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b8351e24395d..d10e4f85dd11 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6790,8 +6790,10 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp) ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count; if (!ctx->tqm_fp_rings_count) ctx->tqm_fp_rings_count = bp->max_q; + else if (ctx->tqm_fp_rings_count > BNXT_MAX_TQM_FP_RINGS) + ctx->tqm_fp_rings_count = BNXT_MAX_TQM_FP_RINGS; - tqm_rings = ctx->tqm_fp_rings_count + 1; + tqm_rings = ctx->tqm_fp_rings_count + BNXT_MAX_TQM_SP_RINGS; ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL); if (!ctx_pg) { kfree(ctx); @@ -6925,7 +6927,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) pg_attr = &req.tqm_sp_pg_size_tqm_sp_lvl, pg_dir = &req.tqm_sp_page_dir, ena = FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP; - i < 9; i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) { + i < BNXT_MAX_TQM_RINGS; + i++, num_entries++, pg_attr++, pg_dir++, ena <<= 1) { if (!(enables & ena)) continue; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 950ea26ae0d2..51996c85547e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1436,6 +1436,11 @@ struct bnxt_ctx_pg_info { struct bnxt_ctx_pg_info **ctx_pg_tbl; }; +#define BNXT_MAX_TQM_SP_RINGS 1 +#define BNXT_MAX_TQM_FP_RINGS 8 +#define BNXT_MAX_TQM_RINGS \ + (BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS) + struct bnxt_ctx_mem_info { u32 qp_max_entries; u16 qp_min_qp1_entries; @@ -1474,7 +1479,7 @@ struct bnxt_ctx_mem_info { struct bnxt_ctx_pg_info stat_mem; struct bnxt_ctx_pg_info mrav_mem; struct bnxt_ctx_pg_info tim_mem; - struct bnxt_ctx_pg_info *tqm_mem[9]; + struct bnxt_ctx_pg_info *tqm_mem[BNXT_MAX_TQM_RINGS]; }; struct bnxt_fw_health { -- cgit 1.4.1 From 1169318bd565d2911b949f6123e109baa35881b6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 Dec 2020 15:37:36 -0600 Subject: net: ipa: don't return a value from gsi_channel_command() Callers of gsi_channel_command() no longer care whether the command times out, and don't use what gsi_channel_command() returns. Redefine that function to have void return type. Reported-by: kernel test robot Fixes: 6ffddf3b3d182 ("net: ipa: use state to determine channel command success") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 579cc3e51677..e51a77057899 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -454,7 +454,7 @@ static enum gsi_channel_state gsi_channel_state(struct gsi_channel *channel) } /* Issue a channel command and wait for it to complete */ -static int +static void gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) { struct completion *completion = &channel->completion; @@ -489,12 +489,10 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); if (success) - return 0; + return; dev_err(dev, "GSI command %u for channel %u timed out, state %u\n", opcode, channel_id, gsi_channel_state(channel)); - - return -ETIMEDOUT; } /* Allocate GSI channel in NOT_ALLOCATED state */ @@ -503,7 +501,6 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; struct device *dev = gsi->dev; enum gsi_channel_state state; - int ret; /* Get initial channel state */ state = gsi_channel_state(channel); @@ -513,7 +510,7 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id) return -EINVAL; } - ret = gsi_channel_command(channel, GSI_CH_ALLOCATE); + gsi_channel_command(channel, GSI_CH_ALLOCATE); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -531,7 +528,6 @@ static int gsi_channel_start_command(struct gsi_channel *channel) { struct device *dev = channel->gsi->dev; enum gsi_channel_state state; - int ret; state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_ALLOCATED && @@ -541,7 +537,7 @@ static int gsi_channel_start_command(struct gsi_channel *channel) return -EINVAL; } - ret = gsi_channel_command(channel, GSI_CH_START); + gsi_channel_command(channel, GSI_CH_START); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -559,7 +555,6 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) { struct device *dev = channel->gsi->dev; enum gsi_channel_state state; - int ret; state = gsi_channel_state(channel); @@ -576,7 +571,7 @@ static int gsi_channel_stop_command(struct gsi_channel *channel) return -EINVAL; } - ret = gsi_channel_command(channel, GSI_CH_STOP); + gsi_channel_command(channel, GSI_CH_STOP); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -598,7 +593,6 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) { struct device *dev = channel->gsi->dev; enum gsi_channel_state state; - int ret; msleep(1); /* A short delay is required before a RESET command */ @@ -612,7 +606,7 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) return; } - ret = gsi_channel_command(channel, GSI_CH_RESET); + gsi_channel_command(channel, GSI_CH_RESET); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); @@ -627,7 +621,6 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; struct device *dev = gsi->dev; enum gsi_channel_state state; - int ret; state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_ALLOCATED) { @@ -636,7 +629,7 @@ static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id) return; } - ret = gsi_channel_command(channel, GSI_CH_DE_ALLOC); + gsi_channel_command(channel, GSI_CH_DE_ALLOC); /* If successful the channel state will have changed */ state = gsi_channel_state(channel); -- cgit 1.4.1 From 1ddf776b498c922935d0ec3283b9817dd33aedf7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Sat, 26 Dec 2020 15:37:37 -0600 Subject: net: ipa: don't return a value from evt_ring_command() Callers of evt_ring_command() no longer care whether the command times out, and don't use what evt_ring_command() returns. Redefine that function to have void return type. Reported-by: kernel test robot Fixes: 428b448ee764a ("net: ipa: use state to determine event ring command success") Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index e51a77057899..14d9a791924b 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -326,8 +326,8 @@ gsi_evt_ring_state(struct gsi *gsi, u32 evt_ring_id) } /* Issue an event ring command and wait for it to complete */ -static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id, - enum gsi_evt_cmd_opcode opcode) +static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id, + enum gsi_evt_cmd_opcode opcode) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; struct completion *completion = &evt_ring->completion; @@ -361,19 +361,16 @@ static int evt_ring_command(struct gsi *gsi, u32 evt_ring_id, iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); if (success) - return 0; + return; dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n", opcode, evt_ring_id, evt_ring->state); - - return -ETIMEDOUT; } /* Allocate an event ring in NOT_ALLOCATED state */ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; - int ret; /* Get initial event ring state */ evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id); @@ -383,7 +380,7 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) return -EINVAL; } - ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); + evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) @@ -400,7 +397,6 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; enum gsi_evt_ring_state state = evt_ring->state; - int ret; if (state != GSI_EVT_RING_STATE_ALLOCATED && state != GSI_EVT_RING_STATE_ERROR) { @@ -409,7 +405,7 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) return; } - ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); + evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) @@ -423,7 +419,6 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; - int ret; if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) { dev_err(gsi->dev, "event ring %u state %u before dealloc\n", @@ -431,7 +426,7 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) return; } - ret = evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); + evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED) -- cgit 1.4.1 From 4d4f9c1a17a3480f8fe523673f7232b254d724b7 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 16 Dec 2020 23:39:56 +0000 Subject: MIPS: boot: Fix unaligned access with CONFIG_MIPS_RAW_APPENDED_DTB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compressed payload is not necesarily 4-byte aligned, at least when compiling with Clang. In that case, the 4-byte value appended to the compressed payload that corresponds to the uncompressed kernel image size must be read using get_unaligned_le32(). This fixes Clang-built kernels not booting on MIPS (tested on a Ingenic JZ4770 board). Fixes: b8f54f2cde78 ("MIPS: ZBOOT: copy appended dtb to the end of the kernel") Cc: # v4.7 Signed-off-by: Paul Cercueil Reviewed-by: Nick Desaulniers Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/decompress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index c61c641674e6..e3946b06e840 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -13,6 +13,7 @@ #include #include +#include /* * These two variables specify the free mem region @@ -117,7 +118,7 @@ void decompress_kernel(unsigned long boot_heap_start) dtb_size = fdt_totalsize((void *)&__appended_dtb); /* last four bytes is always image size in little endian */ - image_size = le32_to_cpup((void *)&__image_end - 4); + image_size = get_unaligned_le32((void *)&__image_end - 4); /* copy dtb to where the booted kernel will expect it */ memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, -- cgit 1.4.1 From 698222457465ce343443be81c5512edda86e5914 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Dec 2020 19:44:38 +0000 Subject: MIPS: Fix malformed NT_FILE and NT_SIGINFO in 32bit coredumps Patches that introduced NT_FILE and NT_SIGINFO notes back in 2012 had taken care of native (fs/binfmt_elf.c) and compat (fs/compat_binfmt_elf.c) coredumps; unfortunately, compat on mips (which does not go through the usual compat_binfmt_elf.c) had not been noticed. As the result, both N32 and O32 coredumps on 64bit mips kernels have those sections malformed enough to confuse the living hell out of all gdb and readelf versions (up to and including the tip of binutils-gdb.git). Longer term solution is to make both O32 and N32 compat use the regular compat_binfmt_elf.c, but that's too much for backports. The minimal solution is to do in arch/mips/kernel/binfmt_elf[on]32.c the same thing those patches have done in fs/compat_binfmt_elf.c Cc: stable@kernel.org # v3.7+ Signed-off-by: Al Viro Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/binfmt_elfn32.c | 7 +++++++ arch/mips/kernel/binfmt_elfo32.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 6ee3f7218c67..c4441416e96b 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -103,4 +103,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index 6dd103d3cebb..7b2a23f48c1a 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -106,4 +106,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" -- cgit 1.4.1 From 4f374d2c43a9e5e773f1dee56db63bd6b8a36276 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Wed, 23 Dec 2020 20:35:21 +0200 Subject: net: mvpp2: fix pkt coalescing int-threshold configuration The packet coalescing interrupt threshold has separated registers for different aggregated/cpu (sw-thread). The required value should be loaded for every thread but not only for 1 current cpu. Fixes: 213f428f5056 ("net: mvpp2: add support for TX interrupts and RX queue distribution modes") Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1608748521-11033-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index f20b31327027..4b1808acef58 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2370,17 +2370,18 @@ static void mvpp2_rx_pkts_coal_set(struct mvpp2_port *port, static void mvpp2_tx_pkts_coal_set(struct mvpp2_port *port, struct mvpp2_tx_queue *txq) { - unsigned int thread = mvpp2_cpu_to_thread(port->priv, get_cpu()); + unsigned int thread; u32 val; if (txq->done_pkts_coal > MVPP2_TXQ_THRESH_MASK) txq->done_pkts_coal = MVPP2_TXQ_THRESH_MASK; val = (txq->done_pkts_coal << MVPP2_TXQ_THRESH_OFFSET); - mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id); - mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val); - - put_cpu(); + /* PKT-coalescing registers are per-queue + per-thread */ + for (thread = 0; thread < MVPP2_MAX_THREADS; thread++) { + mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_NUM_REG, txq->id); + mvpp2_thread_write(port->priv, thread, MVPP2_TXQ_THRESH_REG, val); + } } static u32 mvpp2_usec_to_cycles(u32 usec, unsigned long clk_hz) -- cgit 1.4.1 From 21fdca22eb7df2a1e194b8adb812ce370748b733 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 24 Dec 2020 20:01:09 +0100 Subject: ipv4: Ignore ECN bits for fib lookups in fib_compute_spec_dst() RT_TOS() only clears one of the ECN bits. Therefore, when fib_compute_spec_dst() resorts to a fib lookup, it can return different results depending on the value of the second ECN bit. For example, ECT(0) and ECT(1) packets could be treated differently. $ ip netns add ns0 $ ip netns add ns1 $ ip link add name veth01 netns ns0 type veth peer name veth10 netns ns1 $ ip -netns ns0 link set dev lo up $ ip -netns ns1 link set dev lo up $ ip -netns ns0 link set dev veth01 up $ ip -netns ns1 link set dev veth10 up $ ip -netns ns0 address add 192.0.2.10/24 dev veth01 $ ip -netns ns1 address add 192.0.2.11/24 dev veth10 $ ip -netns ns1 address add 192.0.2.21/32 dev lo $ ip -netns ns1 route add 192.0.2.10/32 tos 4 dev veth10 src 192.0.2.21 $ ip netns exec ns1 sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0 With TOS 4 and ECT(1), ns1 replies using source address 192.0.2.21 (ping uses -Q to set all TOS and ECN bits): $ ip netns exec ns0 ping -c 1 -b -Q 5 192.0.2.255 [...] 64 bytes from 192.0.2.21: icmp_seq=1 ttl=64 time=0.544 ms But with TOS 4 and ECT(0), ns1 replies using source address 192.0.2.11 because the "tos 4" route isn't matched: $ ip netns exec ns0 ping -c 1 -b -Q 6 192.0.2.255 [...] 64 bytes from 192.0.2.11: icmp_seq=1 ttl=64 time=0.597 ms After this patch the ECN bits don't affect the result anymore: $ ip netns exec ns0 ping -c 1 -b -Q 6 192.0.2.255 [...] 64 bytes from 192.0.2.21: icmp_seq=1 ttl=64 time=0.591 ms Fixes: 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper.") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cdf6ec5aa45d..84bb707bd88d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -292,7 +292,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; -- cgit 1.4.1 From a533b70a657c03137dd49cbcfee70aac086ab2b1 Mon Sep 17 00:00:00 2001 From: weichenchen Date: Fri, 25 Dec 2020 13:44:45 +0800 Subject: net: neighbor: fix a crash caused by mod zero pneigh_enqueue() tries to obtain a random delay by mod NEIGH_VAR(p, PROXY_DELAY). However, NEIGH_VAR(p, PROXY_DELAY) migth be zero at that point because someone could write zero to /proc/sys/net/ipv4/neigh/[device]/proxy_delay after the callers check it. This patch uses prandom_u32_max() to get a random delay instead which avoids potential division by zero. Signed-off-by: weichenchen Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9500d28a43b0..277ed854aef1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1569,10 +1569,8 @@ static void neigh_proxy_process(struct timer_list *t) void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct sk_buff *skb) { - unsigned long now = jiffies; - - unsigned long sched_next = now + (prandom_u32() % - NEIGH_VAR(p, PROXY_DELAY)); + unsigned long sched_next = jiffies + + prandom_u32_max(NEIGH_VAR(p, PROXY_DELAY)); if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { kfree_skb(skb); -- cgit 1.4.1 From bd1248f1ddbc48b0c30565fce897a3b6423313b8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 24 Dec 2020 22:23:44 -0800 Subject: net: sched: prevent invalid Scell_log shift count Check Scell_log shift size in red_check_params() and modify all callers of red_check_params() to pass Scell_log. This prevents a shift out-of-bounds as detected by UBSAN: UBSAN: shift-out-of-bounds in ./include/net/red.h:252:22 shift exponent 72 is too large for 32-bit type 'int' Fixes: 8afa10cbe281 ("net_sched: red: Avoid illegal values") Signed-off-by: Randy Dunlap Reported-by: syzbot+97c5bd9cc81eca63d36e@syzkaller.appspotmail.com Cc: Nogah Frankel Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: netdev@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/red.h | 4 +++- net/sched/sch_choke.c | 2 +- net/sched/sch_gred.c | 2 +- net/sched/sch_red.c | 2 +- net/sched/sch_sfq.c | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/net/red.h b/include/net/red.h index fc455445f4b2..932f0d79d60c 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -168,12 +168,14 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } -static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_log) { if (fls(qth_min) + Wlog > 32) return false; if (fls(qth_max) + Wlog > 32) return false; + if (Scell_log >= 32) + return false; if (qth_max < qth_min) return false; return true; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index bd618b00d319..50f680f03a54 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -362,7 +362,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, ctl = nla_data(tb[TCA_CHOKE_PARMS]); - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) return -EINVAL; if (ctl->limit > CHOKE_MAX_QUEUE) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 8599c6f31b05..e0bc77533acc 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -480,7 +480,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) { + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) { NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index e89fab6ccb34..b4ae34d7aa96 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -250,7 +250,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; ctl = nla_data(tb[TCA_RED_PARMS]); - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) return -EINVAL; err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index bca2be57d9fc..b25e51440623 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -647,7 +647,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) } if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, - ctl_v1->Wlog)) + ctl_v1->Wlog, ctl_v1->Scell_log)) return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); -- cgit 1.4.1 From 5ede3ada3da7f050519112b81badc058190b9f9f Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Sat, 26 Dec 2020 16:10:05 +0800 Subject: net: hns: fix return value check in __lb_other_process() The function skb_copy() could return NULL, the return value need to be checked. Fixes: b5996f11ea54 ("net: add Hisilicon Network Subsystem basic ethernet support") Signed-off-by: Yunjian Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index 7165da0ee9aa..a6e3f07caf99 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -415,6 +415,10 @@ static void __lb_other_process(struct hns_nic_ring_data *ring_data, /* for mutl buffer*/ new_skb = skb_copy(skb, GFP_ATOMIC); dev_kfree_skb_any(skb); + if (!new_skb) { + netdev_err(ndev, "skb alloc failed\n"); + return; + } skb = new_skb; check_ok = 0; -- cgit 1.4.1 From 085c7c4e1c0e50d90b7d90f61a12e12b317a91e2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 26 Dec 2020 15:44:53 -0800 Subject: erspan: fix version 1 check in gre_parse_header() Both version 0 and version 1 use ETH_P_ERSPAN, but version 0 does not have an erspan header. So the check in gre_parse_header() is wrong, we have to distinguish version 1 from version 0. We can just check the gre header length like is_erspan_type1(). Fixes: cb73ee40b1b3 ("net: ip_gre: use erspan key field for tunnel lookup") Reported-by: syzbot+f583ce3d4ddf9836b27a@syzkaller.appspotmail.com Cc: William Tu Cc: Lorenzo Bianconi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/gre_demux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 66fdbfe5447c..5d1e6fe9d838 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -128,7 +128,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * to 0 and sets the configured key in the * inner erspan header field */ - if (greh->protocol == htons(ETH_P_ERSPAN) || + if ((greh->protocol == htons(ETH_P_ERSPAN) && hdr_len != 4) || greh->protocol == htons(ETH_P_ERSPAN2)) { struct erspan_base_hdr *ershdr; -- cgit 1.4.1 From 9b22fece786ed641909988da4810bfa8e5d2e592 Mon Sep 17 00:00:00 2001 From: Léo Le Bouter Date: Sun, 27 Dec 2020 17:11:36 +0100 Subject: atlantic: remove architecture depends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was tested on a RaptorCS Talos II with IBM POWER9 DD2.2 CPUs and an ASUS XG-C100F PCI-e card without any issue. Speeds of ~8Gbps could be attained with not-very-scientific (wget HTTP) both-ways measurements on a local network. No warning or error reported in kernel logs. The drivers seems to be portable enough for it not to be gated like such. Signed-off-by: Léo Le Bouter Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/Kconfig b/drivers/net/ethernet/aquantia/Kconfig index efb33c078a3c..cec2018c84a9 100644 --- a/drivers/net/ethernet/aquantia/Kconfig +++ b/drivers/net/ethernet/aquantia/Kconfig @@ -19,7 +19,6 @@ if NET_VENDOR_AQUANTIA config AQTION tristate "aQuantia AQtion(tm) Support" depends on PCI - depends on X86_64 || ARM64 || COMPILE_TEST depends on MACSEC || MACSEC=n help This enables the support for the aQuantia AQtion(tm) Ethernet card. -- cgit 1.4.1 From 1fef73597fa545c35fddc953979013882fbd4e55 Mon Sep 17 00:00:00 2001 From: Xie He Date: Sun, 27 Dec 2020 18:53:39 -0800 Subject: net: hdlc_ppp: Fix issues when mod_timer is called while timer is running ppp_cp_event is called directly or indirectly by ppp_rx with "ppp->lock" held. It may call mod_timer to add a new timer. However, at the same time ppp_timer may be already running and waiting for "ppp->lock". In this case, there's no need for ppp_timer to continue running and it can just exit. If we let ppp_timer continue running, it may call add_timer. This causes kernel panic because add_timer can't be called with a timer pending. This patch fixes this problem. Fixes: e022c2f07ae5 ("WAN: new synchronous PPP implementation for generic HDLC.") Cc: Krzysztof Halasa Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_ppp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 64f855651336..261b53fc8e04 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -569,6 +569,13 @@ static void ppp_timer(struct timer_list *t) unsigned long flags; spin_lock_irqsave(&ppp->lock, flags); + /* mod_timer could be called after we entered this function but + * before we got the lock. + */ + if (timer_pending(&proto->timer)) { + spin_unlock_irqrestore(&ppp->lock, flags); + return; + } switch (proto->state) { case STOPPING: case REQ_SENT: -- cgit 1.4.1 From 26b614fa441048a9f8e4a814c3b01756816ce7a7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 16 Dec 2020 17:48:33 +0200 Subject: dmaengine: ti: k3-udma: Fix pktdma rchan TPL level setup Instead of initializing the rchan_tpl the initial commit re-initialized the tchan_tpl. Fixes: d2abc982333c0 ("dmaengine: ti: k3-udma: Initial support for K3 PKTDMA") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201216154833.20821-1-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 87157cbae1b8..298460438bb4 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4698,9 +4698,9 @@ static int pktdma_setup_resources(struct udma_dev *ud) ud->tchan_tpl.levels = 1; } - ud->tchan_tpl.levels = ud->tchan_tpl.levels; - ud->tchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; - ud->tchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; + ud->rchan_tpl.levels = ud->tchan_tpl.levels; + ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; + ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), sizeof(unsigned long), GFP_KERNEL); -- cgit 1.4.1 From ff58f7dd0c1352a01de3a40327895bd51e03de3a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2020 11:29:46 +0300 Subject: dmaengine: idxd: off by one in cleanup code The clean up is off by one so this will start at "i" and it should start with "i - 1" and then it doesn't unregister the zeroeth elements in the array. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dan Carpenter Acked-by: Dave Jiang Link: https://lore.kernel.org/r/X9nFeojulsNqUSnG@mwanda Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 266423a2cabc..4dbb03c545e4 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -434,7 +434,7 @@ int idxd_register_driver(void) return 0; drv_fail: - for (; i > 0; i--) + while (--i >= 0) driver_unregister(&idxd_drvs[i]->drv); return rc; } @@ -1840,7 +1840,7 @@ int idxd_register_bus_type(void) return 0; bus_err: - for (; i > 0; i--) + while (--i >= 0) bus_unregister(idxd_bus_types[i]); return rc; } -- cgit 1.4.1 From 8fb28795fb64e1151c0e713686d8b026a5a2aece Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 18 Dec 2020 18:41:37 +0800 Subject: dmaengine: qcom: gpi: Fixes a format mismatch drivers/dma/qcom/gpi.c:1419:3: warning: format '%lu' expects argument of type 'long unsigned int', but argument 8 has type 'size_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1427:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'dma_addr_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 5 has type 'phys_addr_t {aka unsigned int}' [-Wformat=] Signed-off-by: Xiaoming Ni Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201218104137.59200-1-nixiaoming@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index d2334f535de2..556c070a514c 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1416,7 +1416,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, len = 1 << bit; ring->alloc_size = (len + (len - 1)); dev_dbg(gpii->gpi_dev->dev, - "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%lu\n", + "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%zu\n", elements, el_size, (elements * el_size), len, ring->alloc_size); @@ -1424,7 +1424,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, ring->alloc_size, &ring->dma_handle, GFP_KERNEL); if (!ring->pre_aligned) { - dev_err(gpii->gpi_dev->dev, "could not alloc size:%lu mem for ring\n", + dev_err(gpii->gpi_dev->dev, "could not alloc size:%zu mem for ring\n", ring->alloc_size); return -ENOMEM; } @@ -1444,8 +1444,8 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, smp_wmb(); dev_dbg(gpii->gpi_dev->dev, - "phy_pre:0x%0llx phy_alig:0x%0llx len:%u el_size:%u elements:%u\n", - ring->dma_handle, ring->phys_addr, ring->len, + "phy_pre:%pad phy_alig:%pa len:%u el_size:%u elements:%u\n", + &ring->dma_handle, &ring->phys_addr, ring->len, ring->el_size, ring->elements); return 0; -- cgit 1.4.1 From 33cbd54dc515cc04b5a603603414222b4bb1448d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Dec 2020 13:47:18 +0100 Subject: dmaengine: mediatek: mtk-hsdma: Fix a resource leak in the error handling path of the probe function 'mtk_hsdma_hw_deinit()' should be called in the error handling path of the probe function to undo a previous 'mtk_hsdma_hw_init()' call, as already done in the remove function. Fixes: 548c4597e984 ("dmaengine: mediatek: Add MediaTek High-Speed DMA controller for MT7622 and MT7623 SoC") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201219124718.182664-1-christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-hsdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c index f133ae8dece1..6ad8afbb95f2 100644 --- a/drivers/dma/mediatek/mtk-hsdma.c +++ b/drivers/dma/mediatek/mtk-hsdma.c @@ -1007,6 +1007,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) return 0; err_free: + mtk_hsdma_hw_deinit(hsdma); of_dma_controller_free(pdev->dev.of_node); err_unregister: dma_async_device_unregister(dd); -- cgit 1.4.1 From d645148cc82ca7fbacaa601414a552184e9c6dd3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Dec 2020 14:28:00 +0100 Subject: dmaengine: milbeaut-xdmac: Fix a resource leak in the error handling path of the probe function 'disable_xdmac()' should be called in the error handling path of the probe function to undo a previous 'enable_xdmac()' call, as already done in the remove function. Fixes: a6e9be055d47 ("dmaengine: milbeaut-xdmac: Add XDMAC driver for Milbeaut platforms") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201219132800.183254-1-christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/milbeaut-xdmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c index 584c931e807a..d29d01e730aa 100644 --- a/drivers/dma/milbeaut-xdmac.c +++ b/drivers/dma/milbeaut-xdmac.c @@ -350,7 +350,7 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) ret = dma_async_device_register(ddev); if (ret) - return ret; + goto disable_xdmac; ret = of_dma_controller_register(dev->of_node, of_dma_simple_xlate, mdev); @@ -363,6 +363,8 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) unregister_dmac: dma_async_device_unregister(ddev); +disable_xdmac: + disable_xdmac(mdev); return ret; } -- cgit 1.4.1 From 595a334148449bd1d27cf5d6fcb3b0d718cb1b9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Dec 2020 14:56:52 +0300 Subject: dmaengine: dw-edma: Fix use after free in dw_edma_alloc_chunk() If the dw_edma_alloc_burst() function fails then we free "chunk" but it's still on the "desc->chunk->list" list so it will lead to a use after free. Also the "->chunks_alloc" count is incremented when it shouldn't be. In current kernels small allocations are guaranteed to succeed and dw_edma_alloc_burst() can't fail so this will not actually affect runtime. Fixes: e63d79d1ffcd ("dmaengine: Add Synopsys eDMA IP core driver") Signed-off-by: Dan Carpenter Acked-by: Gustavo Pimentel Link: https://lore.kernel.org/r/X9dTBFrUPEvvW7qc@mwanda Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index b971505b8715..08d71dafa001 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -86,12 +86,12 @@ static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc) if (desc->chunk) { /* Create and add new element into the linked list */ - desc->chunks_alloc++; - list_add_tail(&chunk->list, &desc->chunk->list); if (!dw_edma_alloc_burst(chunk)) { kfree(chunk); return NULL; } + desc->chunks_alloc++; + list_add_tail(&chunk->list, &desc->chunk->list); } else { /* List head */ chunk->burst = NULL; -- cgit 1.4.1 From ba42f61b36121730d7f51cc261dfd744ee19f50b Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:06:49 +0800 Subject: qcom: bam_dma: Delete useless kfree code The parameter of kfree function is NULL, so kfree code is useless, delete it. Therefore, goto expression is no longer needed, so simplify it. Signed-off-by: Zheng Yongjun Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201216130649.13979-1-zhengyongjun3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/bam_dma.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d5773d474d8f..88579857ca1d 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -630,7 +630,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, GFP_NOWAIT); if (!async_desc) - goto err_out; + return NULL; if (flags & DMA_PREP_FENCE) async_desc->flags |= DESC_FLAG_NWD; @@ -670,10 +670,6 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, } return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags); - -err_out: - kfree(async_desc); - return NULL; } /** -- cgit 1.4.1 From 28d8e07fc9478f8f14dd5dd4b2c382982fa12461 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:13:47 +0200 Subject: MAINTAINERS: Add entry for Texas Instruments DMA drivers My employment with TI is coming to an end, it is my intention to look after the DMA drivers I have worked with over the years. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215131348.11282-2-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..217866b668b9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17553,6 +17553,19 @@ S: Supported F: Documentation/devicetree/bindings/iio/dac/ti,dac7612.txt F: drivers/iio/dac/ti-dac7612.c +TEXAS INSTRUMENTS DMA DRIVERS +M: Peter Ujfalusi +L: dmaengine@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt +F: Documentation/devicetree/bindings/dma/ti-edma.txt +F: Documentation/devicetree/bindings/dma/ti/ +F: drivers/dma/ti/ +X: drivers/dma/ti/cppi41.c +F: include/linux/dma/k3-udma-glue.h +F: include/linux/dma/ti-cppi5.h +F: include/linux/dma/k3-psil.h + TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER M: Nishanth Menon M: Tero Kristo -- cgit 1.4.1 From cc465fa269bc0dc63a1ab7384110e4079fb40421 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:13:48 +0200 Subject: dt-bindings: dma: ti: Update maintainer and author information My employment with TI is coming to an end, add the copyright and author comments as they due and change the maintainer mail address. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215131348.11282-3-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml | 4 +++- Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml | 4 +++- Documentation/devicetree/bindings/dma/ti/k3-udma.yaml | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml index b15f68c499cb..df29d59d13a8 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-bcdma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 DMSS BCDMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Block Copy DMA (BCDMA) is intended to perform similar functions as the TR diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml index b13ab60cd740..ea19d12a9337 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-pktdma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 DMSS PKTDMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Packet DMA (PKTDMA) is intended to perform similar functions as the packet diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 9a87fd9041eb..6a09bbf83d46 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2019 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-udma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 NAVSS Unified DMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The UDMA-P is intended to perform similar (but significantly upgraded) -- cgit 1.4.1 From cffa4b2122f5f3e53cf3d529bbc74651f95856d5 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Tue, 29 Dec 2020 18:50:46 +0800 Subject: regmap: debugfs: Fix a memory leak when calling regmap_attach_dev After initializing the regmap through syscon_regmap_lookup_by_compatible, then regmap_attach_dev to the device, because the debugfs_name has been allocated, there is no need to redistribute it again unreferenced object 0xd8399b80 (size 64): comm "swapper/0", pid 1, jiffies 4294937641 (age 278.590s) hex dump (first 32 bytes): 64 75 6d 6d 79 2d 69 6f 6d 75 78 63 2d 67 70 72 dummy-iomuxc-gpr 40 32 30 65 34 30 30 30 00 7f 52 5b d8 7e 42 69 @20e4000..R[.~Bi backtrace: [] kasprintf+0x2c/0x54 [<6ad3bbc2>] regmap_debugfs_init+0xdc/0x2fc [] __regmap_init+0xc38/0xd88 [<1f7e0609>] of_syscon_register+0x168/0x294 [<735e8766>] device_node_get_regmap+0x6c/0x98 [] imx6ul_init_machine+0x20/0x88 [<0456565b>] customize_machine+0x1c/0x30 [] do_one_initcall+0x80/0x3ac [<7e584867>] kernel_init_freeable+0x170/0x1f0 [<80074741>] kernel_init+0x8/0x120 [<285d6f28>] ret_from_fork+0x14/0x20 [<00000000>] 0x0 Fixes: 9b947a13e7f6 ("regmap: use debugfs even when no device") Signed-off-by: Xiaolei Wang Link: https://lore.kernel.org/r/20201229105046.41984-1-xiaolei.wang@windriver.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-debugfs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index 8dfac7f3ed7a..bf03cd343be2 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -582,18 +582,25 @@ void regmap_debugfs_init(struct regmap *map) devname = dev_name(map->dev); if (name) { - map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", + if (!map->debugfs_name) { + map->debugfs_name = kasprintf(GFP_KERNEL, "%s-%s", devname, name); + if (!map->debugfs_name) + return; + } name = map->debugfs_name; } else { name = devname; } if (!strcmp(name, "dummy")) { - kfree(map->debugfs_name); + if (!map->debugfs_name) + kfree(map->debugfs_name); map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d", dummy_index); + if (!map->debugfs_name) + return; name = map->debugfs_name; dummy_index++; } -- cgit 1.4.1 From ede090f5a438e97d0586f64067bbb956e30a2a31 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Tue, 29 Dec 2020 13:27:41 +0800 Subject: spi: altera: fix return value for altera_spi_txrx() This patch fixes the return value for altera_spi_txrx. It should return 1 for interrupt transfer mode, and return 0 for polling transfer mode. The altera_spi_txrx() implements the spi_controller.transfer_one callback. According to the spi-summary.rst, the transfer_one should return 0 when transfer is finished, return 1 when transfer is still in progress. Signed-off-by: Xu Yilun Link: https://lore.kernel.org/r/1609219662-27057-2-git-send-email-yilun.xu@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c index 809bfff3690a..cbc4c28c1541 100644 --- a/drivers/spi/spi-altera.c +++ b/drivers/spi/spi-altera.c @@ -189,24 +189,26 @@ static int altera_spi_txrx(struct spi_master *master, /* send the first byte */ altera_spi_tx_word(hw); - } else { - while (hw->count < hw->len) { - altera_spi_tx_word(hw); - for (;;) { - altr_spi_readl(hw, ALTERA_SPI_STATUS, &val); - if (val & ALTERA_SPI_STATUS_RRDY_MSK) - break; + return 1; + } + + while (hw->count < hw->len) { + altera_spi_tx_word(hw); - cpu_relax(); - } + for (;;) { + altr_spi_readl(hw, ALTERA_SPI_STATUS, &val); + if (val & ALTERA_SPI_STATUS_RRDY_MSK) + break; - altera_spi_rx_word(hw); + cpu_relax(); } - spi_finalize_current_transfer(master); + + altera_spi_rx_word(hw); } + spi_finalize_current_transfer(master); - return t->len; + return 0; } static irqreturn_t altera_spi_irq(int irq, void *dev) -- cgit 1.4.1 From da4282c17d695b9311608aa63b3c633e649aadea Mon Sep 17 00:00:00 2001 From: Jiang Wang Date: Thu, 24 Dec 2020 01:12:42 +0000 Subject: selftests/bpf: Fix a compile error for BPF_F_BPRM_SECUREEXEC When CONFIG_BPF_LSM is not configured, running bpf selftesting will show BPF_F_BPRM_SECUREEXEC undefined error for bprm_opts.c. The problem is that bprm_opts.c includes vmliunx.h. The vmlinux.h is generated by "bpftool btf dump file ./vmlinux format c". On the other hand, BPF_F_BPRM_SECUREEXEC is defined in include/uapi/linux/bpf.h and used only in bpf_lsm.c. When CONFIG_BPF_LSM is not set, bpf_lsm will not be compiled, so vmlinux.h will not include definition of BPF_F_BPRM_SECUREEXEC. Ideally, we want to compile bpf selftest regardless of the configuration setting, so change the include file from vmlinux.h to bpf.h. Signed-off-by: Jiang Wang Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201224011242.585967-1-jiang.wang@bytedance.com --- tools/testing/selftests/bpf/progs/bprm_opts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c index 5bfef2887e70..418d9c6d4952 100644 --- a/tools/testing/selftests/bpf/progs/bprm_opts.c +++ b/tools/testing/selftests/bpf/progs/bprm_opts.c @@ -4,7 +4,7 @@ * Copyright 2020 Google LLC. */ -#include "vmlinux.h" +#include #include #include #include -- cgit 1.4.1 From a694ffed876575d1df1a47067444047182de4354 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Mon, 28 Dec 2020 23:31:30 +0200 Subject: drm/msm: Fix null dereference in _msm_gem_new The crash was caused by locking an uninitialized lock during init of drm_gem_object. The lock changed in the breaking commit, but the init was not moved accordingly. 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = (ptrval) [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: msm(+) qcom_spmi_vadc qcom_vadc_common dm_mod usb_f_rndis rmi_i2c rmi_core qnoc_msm8974 icc_smd_rpm pm8941_pwrkey CPU: 2 PID: 1020 Comm: udevd Not tainted 5.10.0-postmarketos-qcom-msm8974 #8 Hardware name: Generic DT based system PC is at ww_mutex_lock+0x20/0xb0 LR is at _msm_gem_new+0x13c/0x298 [msm] pc : [] lr : [] psr: 20000013 sp : c36e7ad0 ip : c3b3d800 fp : 00000000 r10: 00000001 r9 : c3b22800 r8 : 00000000 r7 : c3b23000 r6 : c3b3d600 r5 : c3b3d600 r4 : 00000000 r3 : c34b4780 r2 : c3b3d6f4 r1 : 00000000 r0 : 00000000 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5787d Table: 03ae406a DAC: 00000051 Process udevd (pid: 1020, stack limit = 0x(ptrval)) Stack: (0xc36e7ad0 to 0xc36e8000) [...] [] (ww_mutex_lock) from [] (_msm_gem_new+0x13c/0x298 [msm]) [] (_msm_gem_new [msm]) from [] (_msm_gem_kernel_new+0x20/0x190 [msm]) [] (_msm_gem_kernel_new [msm]) from [] (msm_gem_kernel_new+0x24/0x2c [msm]) [] (msm_gem_kernel_new [msm]) from [] (msm_gpu_init+0x308/0x548 [msm]) [] (msm_gpu_init [msm]) from [] (adreno_gpu_init+0x13c/0x240 [msm]) [] (adreno_gpu_init [msm]) from [] (a3xx_gpu_init+0x78/0x1dc [msm]) [] (a3xx_gpu_init [msm]) from [] (adreno_bind+0x1cc/0x274 [msm]) [] (adreno_bind [msm]) from [] (component_bind_all+0x11c/0x278) [] (component_bind_all) from [] (msm_drm_bind+0x18c/0x5b4 [msm]) [] (msm_drm_bind [msm]) from [] (try_to_bring_up_master+0x200/0x2c8) [] (try_to_bring_up_master) from [] (component_master_add_with_match+0xc8/0xfc) [] (component_master_add_with_match) from [] (msm_pdev_probe+0x288/0x2c4 [msm]) [] (msm_pdev_probe [msm]) from [] (platform_drv_probe+0x48/0x98) [] (platform_drv_probe) from [] (really_probe+0x108/0x528) [] (really_probe) from [] (driver_probe_device+0x78/0x1d4) [] (driver_probe_device) from [] (device_driver_attach+0xa8/0xb0) [] (device_driver_attach) from [] (__driver_attach+0xb4/0x154) [] (__driver_attach) from [] (bus_for_each_dev+0x78/0xb8) [] (bus_for_each_dev) from [] (bus_add_driver+0x10c/0x208) [] (bus_add_driver) from [] (driver_register+0x88/0x118) [] (driver_register) from [] (do_one_initcall+0x50/0x2b0) [] (do_one_initcall) from [] (do_init_module+0x60/0x288) [] (do_init_module) from [] (sys_finit_module+0xd4/0x120) [] (sys_finit_module) from [] (ret_fast_syscall+0x0/0x54) Exception stack(0xc36e7fa8 to 0xc36e7ff0) 7fa0: 00020000 00000000 00000007 b6edd5b0 00000000 b6f2ff20 7fc0: 00020000 00000000 0000017b 0000017b b6eef980 bedc3a54 00473c99 00000000 7fe0: b6edd5b0 bedc3918 b6ed8a5f b6f6a8b0 Code: e3c3303f e593300c e1a04000 f590f000 (e1940f9f) ---[ end trace 277e2a3da40bbb76 ]--- Fixes: 6c0e3ea250476 ("drm/msm/gem: Switch over to obj->resv for locking") Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a21be5b910ff..d9a5a1895f3d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1101,6 +1101,8 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, struct msm_gem_vma *vma; struct page **pages; + drm_gem_private_object_init(dev, obj, size); + msm_gem_lock(obj); vma = add_vma(obj, NULL); @@ -1112,7 +1114,6 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, to_msm_bo(obj)->vram_node = &vma->node; - drm_gem_private_object_init(dev, obj, size); pages = get_pages(obj); if (IS_ERR(pages)) { -- cgit 1.4.1 From 07fcad0d726d5da7c43f1c8e8fdb66c93a140ca5 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Mon, 28 Dec 2020 23:31:31 +0200 Subject: drm/msm: Ensure get_pages is called when locked get_pages is only called in a locked context. Add a WARN_ON to make sure it stays that way. Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d9a5a1895f3d..114c0711a302 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -96,6 +96,8 @@ static struct page **get_pages(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + WARN_ON(!msm_gem_is_locked(obj)); + if (!msm_obj->pages) { struct drm_device *dev = obj->dev; struct page **p; @@ -1114,8 +1116,9 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, to_msm_bo(obj)->vram_node = &vma->node; - + msm_gem_lock(obj); pages = get_pages(obj); + msm_gem_unlock(obj); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto fail; -- cgit 1.4.1 From 77788775c7132a8d93c6930ab1bd84fc743c7cb7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Dec 2020 10:50:46 -0700 Subject: io_uring: don't assume mm is constant across submits If we COW the identity, we assume that ->mm never changes. But this isn't true of multiple processes end up sharing the ring. Hence treat id->mm like like any other process compontent when it comes to the identity mapping. This is pretty trivial, just moving the existing grab into io_grab_identity(), and including a check for the match. Cc: stable@vger.kernel.org # 5.10 Fixes: 1e6fa5216a0e ("io_uring: COW io_identity on mismatch") Reported-by: Christian Brauner : Tested-by: Christian Brauner : Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7e35283fc0b1..eb4620ff638e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1501,6 +1501,13 @@ static bool io_grab_identity(struct io_kiocb *req) spin_unlock_irq(&ctx->inflight_lock); req->work.flags |= IO_WQ_WORK_FILES; } + if (!(req->work.flags & IO_WQ_WORK_MM) && + (def->work_flags & IO_WQ_WORK_MM)) { + if (id->mm != current->mm) + return false; + mmgrab(id->mm); + req->work.flags |= IO_WQ_WORK_MM; + } return true; } @@ -1525,13 +1532,6 @@ static void io_prep_async_work(struct io_kiocb *req) req->work.flags |= IO_WQ_WORK_UNBOUND; } - /* ->mm can never change on us */ - if (!(req->work.flags & IO_WQ_WORK_MM) && - (def->work_flags & IO_WQ_WORK_MM)) { - mmgrab(id->mm); - req->work.flags |= IO_WQ_WORK_MM; - } - /* if we fail grabbing identity, we must COW, regrab, and retry */ if (io_grab_identity(req)) return; -- cgit 1.4.1 From b000700d6db50c933ce8b661154e26cf4ad06dba Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 15:27:14 +0800 Subject: habanalabs: Fix memleak in hl_device_reset When kzalloc() fails, we should execute hl_mmu_fini() to release the MMU module. It's the same when hl_ctx_init() fails. Signed-off-by: Dinghao Liu Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0749c92cbcf6..1456eabf9601 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1092,6 +1092,7 @@ kill_processes: GFP_KERNEL); if (!hdev->kernel_ctx) { rc = -ENOMEM; + hl_mmu_fini(hdev); goto out_err; } @@ -1103,6 +1104,7 @@ kill_processes: "failed to init kernel ctx in hard reset\n"); kfree(hdev->kernel_ctx); hdev->kernel_ctx = NULL; + hl_mmu_fini(hdev); goto out_err; } } -- cgit 1.4.1 From 7cf22a1c88c05ea3807f95b1edfebb729016ae52 Mon Sep 17 00:00:00 2001 From: Harish Date: Tue, 29 Dec 2020 15:14:22 -0800 Subject: selftests/vm: fix building protection keys test Commit d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error") tried to include a ARCH check for powerpc, however ARCH is not defined in the Makefile before including lib.mk. This makes test building to skip on both x86 and powerpc. Fix the arch check by replacing it using machine type as it is already defined and used in the test. Link: https://lkml.kernel.org/r/20201215100402.257376-1-harish@linux.ibm.com Fixes: d8cbe8bfa7d ("tools/testing/selftests/vm: fix build error") Signed-off-by: Harish Reviewed-by: Sandipan Das Cc: Shuah Khan Cc: Sandipan Das Cc: John Hubbard Cc: Dave Hansen Cc: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 9a25307f6115..d42115e4284d 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -4,7 +4,7 @@ include local_config.mk uname_M := $(shell uname -m 2>/dev/null || echo not) -MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/') # Without this, failed build products remain, with up-to-date timestamps, # thus tricking Make (and you!) into believing that All Is Well, in subsequent @@ -43,7 +43,7 @@ TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie) @@ -65,13 +65,13 @@ TEST_GEN_FILES += $(BINARIES_64) endif else -ifneq (,$(findstring $(ARCH),powerpc)) +ifneq (,$(findstring $(MACHINE),ppc64)) TEST_GEN_FILES += protection_keys endif endif -ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) +ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs @@ -84,7 +84,7 @@ TEST_FILES := test_vmalloc.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk -ifeq ($(ARCH),x86_64) +ifeq ($(MACHINE),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -- cgit 1.4.1 From e7dd91c456a8cdbcd7066997d15e36d14276a949 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 29 Dec 2020 15:14:25 -0800 Subject: mm/hugetlb: fix deadlock in hugetlb_cow error path syzbot reported the deadlock here [1]. The issue is in hugetlb cow error handling when there are not enough huge pages for the faulting task which took the original reservation. It is possible that other (child) tasks could have consumed pages associated with the reservation. In this case, we want the task which took the original reservation to succeed. So, we unmap any associated pages in children so that they can be used by the faulting task that owns the reservation. The unmapping code needs to hold i_mmap_rwsem in write mode. However, due to commit c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") we are already holding i_mmap_rwsem in read mode when hugetlb_cow is called. Technically, i_mmap_rwsem does not need to be held in read mode for COW mappings as they can not share pmd's. Modifying the fault code to not take i_mmap_rwsem in read mode for COW (and other non-sharable) mappings is too involved for a stable fix. Instead, we simply drop the hugetlb_fault_mutex and i_mmap_rwsem before unmapping. This is OK as it is technically not needed. They are reacquired after unmapping as expected by calling code. Since this is done in an uncommon error path, the overhead of dropping and reacquiring mutexes is acceptable. While making changes, remove redundant BUG_ON after unmap_ref_private. [1] https://lkml.kernel.org/r/000000000000b73ccc05b5cf8558@google.com Link: https://lkml.kernel.org/r/4c5781b8-3b00-761e-c0c7-c5edebb6ec1a@oracle.com Fixes: c0d0381ade79 ("hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization") Signed-off-by: Mike Kravetz Reported-by: syzbot+5eee4145df3c15e96625@syzkaller.appspotmail.com Cc: Naoya Horiguchi Cc: Michal Hocko Cc: Hugh Dickins Cc: "Aneesh Kumar K . V" Cc: Davidlohr Bueso Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cbf32d2824fd..a2602969873d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4105,10 +4105,30 @@ retry_avoidcopy: * may get SIGKILLed if it later faults. */ if (outside_reserve) { + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx; + u32 hash; + put_page(old_page); BUG_ON(huge_pte_none(pte)); + /* + * Drop hugetlb_fault_mutex and i_mmap_rwsem before + * unmapping. unmapping needs to hold i_mmap_rwsem + * in write mode. Dropping i_mmap_rwsem in read mode + * here is OK as COW mappings do not interact with + * PMD sharing. + * + * Reacquire both after unmap operation. + */ + idx = vma_hugecache_offset(h, vma, haddr); + hash = hugetlb_fault_mutex_hash(mapping, idx); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + i_mmap_unlock_read(mapping); + unmap_ref_private(mm, vma, old_page, haddr); - BUG_ON(huge_pte_none(pte)); + + i_mmap_lock_read(mapping); + mutex_lock(&hugetlb_fault_mutex_table[hash]); spin_lock(ptl); ptep = huge_pte_offset(mm, haddr, huge_page_size(h)); if (likely(ptep && -- cgit 1.4.1 From 3a176b94609a18f5f8bac7ddbf8923bd737262db Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 29 Dec 2020 15:14:28 -0800 Subject: Revert "kbuild: avoid static_assert for genksyms" This reverts commit 14dc3983b5dff513a90bd5a8cc90acaf7867c3d0. Macro Elver had sent a fix proper fix earlier, and also pointed out corner cases: "I guess what you propose is simpler, but might still have corner cases where we still get warnings. In particular, if some file (for whatever reason) does not include build_bug.h and uses a raw _Static_assert(), then we still get warnings. E.g. I see 1 user of raw _Static_assert() (drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h )." I believe the raw use of _Static_assert() should be allowed, so this should be fixed in genksyms. Even after commit 14dc3983b5df ("kbuild: avoid static_assert for genksyms"), I confirmed the following test code emits the warning. ---------------->8---------------- #include _Static_assert((1 ?: 0), ""); void foo(void) { } EXPORT_SYMBOL(foo); ---------------->8---------------- WARNING: modpost: EXPORT symbol "foo" [vmlinux] version generation failed, symbol will not be versioned. Now that commit 869b91992bce ("genksyms: Ignore module scoped _Static_assert()") fixed this issue properly, the workaround should be reverted. Link: https://lkml.org/lkml/2020/12/10/845 Link: https://lkml.kernel.org/r/20201219183911.181442-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Cc: Marco Elver Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/build_bug.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h index 7bb66e15b481..e3a0be2c90ad 100644 --- a/include/linux/build_bug.h +++ b/include/linux/build_bug.h @@ -77,9 +77,4 @@ #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ -- cgit 1.4.1 From 5dbdb2d87c294401a22e6a6002f08ebc9fbea38b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 29 Dec 2020 15:14:31 -0800 Subject: checkpatch: prefer strscpy to strlcpy Prefer strscpy over the deprecated strlcpy function. Link: https://lkml.kernel.org/r/19fe91084890e2c16fe56f960de6c570a93fa99b.camel@perches.com Signed-off-by: Joe Perches Requested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 00085308ed9d..92e888ed939f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -6646,6 +6646,12 @@ sub process { # } # } +# strlcpy uses that should likely be strscpy + if ($line =~ /\bstrlcpy\s*\(/) { + WARN("STRLCPY", + "Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw\@mail.gmail.com/\n" . $herecurr); + } + # typecasts on min/max could be min_t/max_t if ($perl_version_ok && defined $stat && -- cgit 1.4.1 From 6d87d0ece58bc0022ca5247721a8eb06ef66b673 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Tue, 29 Dec 2020 15:14:34 -0800 Subject: mm: add prototype for __add_to_page_cache_locked() Otherwise it causes a gcc warning: mm/filemap.c:830:14: warning: no previous prototype for `__add_to_page_cache_locked' [-Wmissing-prototypes] A previous attempt to make this function static led to compilation errors when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() is referred to by BPF code. Adding a prototype will silence the warning. Link: https://lkml.kernel.org/r/1608693702-4665-1-git-send-email-jrdr.linux@gmail.com Signed-off-by: Souptick Joarder Cc: Alex Shi Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5299b90a6c40..c1e908184442 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -216,6 +216,13 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +/* + * Any attempt to mark this function as static leads to build failure + * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked() + * is referred to by BPF code. This must be visible for error injection. + */ +int __add_to_page_cache_locked(struct page *page, struct address_space *mapping, + pgoff_t index, gfp_t gfp, void **shadowp); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) -- cgit 1.4.1 From dc2da7b45ffe954a0090f5d0310ed7b0b37d2bd2 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 29 Dec 2020 15:14:37 -0800 Subject: mm: memmap defer init doesn't work as expected VMware observed a performance regression during memmap init on their platform, and bisected to commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") causing it. Before the commit: [0.033176] Normal zone: 1445888 pages used for memmap [0.033176] Normal zone: 89391104 pages, LIFO batch:63 [0.035851] ACPI: PM-Timer IO Port: 0x448 With commit [0.026874] Normal zone: 1445888 pages used for memmap [0.026875] Normal zone: 89391104 pages, LIFO batch:63 [2.028450] ACPI: PM-Timer IO Port: 0x448 The root cause is the current memmap defer init doesn't work as expected. Before, memmap_init_zone() was used to do memmap init of one whole zone, to initialize all low zones of one numa node, but defer memmap init of the last zone in that numa node. However, since commit 73a6e474cb376, function memmap_init() is adapted to iterater over memblock regions inside one zone, then call memmap_init_zone() to do memmap init for each region. E.g, on VMware's system, the memory layout is as below, there are two memory regions in node 2. The current code will mistakenly initialize the whole 1st region [mem 0xab00000000-0xfcffffffff], then do memmap defer to iniatialize only one memmory section on the 2nd region [mem 0x10000000000-0x1033fffffff]. In fact, we only expect to see that there's only one memory section's memmap initialized. That's why more time is costed at the time. [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff] [ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff] [ 0.008843] ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x55ffffffff] [ 0.008844] ACPI: SRAT: Node 1 PXM 1 [mem 0x5600000000-0xaaffffffff] [ 0.008844] ACPI: SRAT: Node 2 PXM 2 [mem 0xab00000000-0xfcffffffff] [ 0.008845] ACPI: SRAT: Node 2 PXM 2 [mem 0x10000000000-0x1033fffffff] Now, let's add a parameter 'zone_end_pfn' to memmap_init_zone() to pass down the real zone end pfn so that defer_init() can use it to judge whether defer need be taken in zone wide. Link: https://lkml.kernel.org/r/20201223080811.16211-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20201223080811.16211-2-bhe@redhat.com Fixes: commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") Signed-off-by: Baoquan He Reported-by: Rahul Gopakumar Reviewed-by: Mike Rapoport Cc: David Hildenbrand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 4 ++-- include/linux/mm.h | 5 +++-- mm/memory_hotplug.c | 2 +- mm/page_alloc.c | 8 +++++--- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 9b5acf8fb092..e76386a3479e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), - args->nid, args->zone, page_to_pfn(map_start), + args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end), MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); return 0; } @@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) { - memmap_init_zone(size, nid, zone, start_pfn, + memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } else { struct page *start; diff --git a/include/linux/mm.h b/include/linux/mm.h index c1e908184442..ecdf8a8cd6ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2439,8 +2439,9 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn); #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, - enum meminit_context, struct vmem_altmap *, int migratetype); +extern void memmap_init_zone(unsigned long, int, unsigned long, + unsigned long, unsigned long, enum meminit_context, + struct vmem_altmap *, int migratetype); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index af41fb990820..f9d57b9be8c7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -713,7 +713,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, * expects the zone spans the pfn range. All the pages in the range * are reserved so nobody should be touching them so we should be safe */ - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0, MEMINIT_HOTPLUG, altmap, migratetype); set_zone_contiguous(zone); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7a2c89b21115..bdbec4c98173 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -423,6 +423,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) if (end_pfn < pgdat_end_pfn(NODE_DATA(nid))) return false; + if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX) + return true; /* * We start only with one section of pages, more pages are added as * needed until the rest of deferred pages are initialized. @@ -6116,7 +6118,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * zone stats (e.g., nr_isolate_pageblock) are touched. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, + unsigned long start_pfn, unsigned long zone_end_pfn, enum meminit_context context, struct vmem_altmap *altmap, int migratetype) { @@ -6152,7 +6154,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context == MEMINIT_EARLY) { if (overlap_memmap_init(zone, &pfn)) continue; - if (defer_init(nid, pfn, end_pfn)) + if (defer_init(nid, pfn, zone_end_pfn)) break; } @@ -6266,7 +6268,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid, if (end_pfn > start_pfn) { size = end_pfn - start_pfn; - memmap_init_zone(size, nid, zone, start_pfn, + memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } } -- cgit 1.4.1 From e05986ee7a5814bec0e0075d813daca3d46e4a9e Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Tue, 29 Dec 2020 15:14:40 -0800 Subject: mm/mremap.c: fix extent calculation When `next < old_addr`, `next - old_addr` arithmetic underflows causing `extent` to be incorrect. Make `extent` the smaller of `next - old_addr` or `old_end - old_addr`. Link: https://lkml.kernel.org/r/20201219170433.2418867-1-kaleshsingh@google.com Fixes: c49dd34018026 ("mm: speedup mremap on 1GB or larger regions") Signed-off-by: Kalesh Singh Reported-by: Guenter Roeck Tested-by: Guenter Roeck Cc: Suren Baghdasaryan Cc: Minchan Kim Cc: Lokesh Gidra Cc: Helge Deller Cc: Kalesh Singh Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index c5590afe7165..f554320281cc 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -358,7 +358,9 @@ static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr, next = (old_addr + size) & mask; /* even if next overflowed, extent below will be ok */ - extent = (next > old_end) ? old_end - old_addr : next - old_addr; + extent = next - old_addr; + if (extent > old_end - old_addr) + extent = old_end - old_addr; next = (new_addr + size) & mask; if (extent > next - new_addr) extent = next - new_addr; -- cgit 1.4.1 From 111fe7186b29d172729db5e294875b9fc7a0ec1d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 29 Dec 2020 15:14:43 -0800 Subject: mm: generalise COW SMC TLB flushing race comment I'm not sure if I'm completely missing something here, but AFAIKS the reference to the mysterious "COW SMC race" confuses the issue. The original changelog and mailing list thread didn't help me either. This SMC race is where the problem was detected, but isn't the general problem bigger and more obvious: that the new PTE could be picked up at any time by any TLB while entries for the old PTE exist in other TLBs before the TLB flush takes effect? The case where the iTLB and dTLB of a CPU are pointing at different pages is an interesting one but follows from the general problem. The other (minor) thing with the comment I think it makes it a bit clearer to say what the old code was doing (i.e., it avoids the race as opposed to what?). References: 4ce072f1faf29 ("mm: fix a race condition under SMC + COW") Link: https://lkml.kernel.org/r/20201215121119.351650-1-npiggin@gmail.com Reviewed-by: Matthew Wilcox (Oracle) Cc: Suresh Siddha Cc: "David S. Miller" Cc: Hugh Dickins Cc: Peter Zijlstra Cc: Suresh Siddha Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 7d608765932b..feff48e1465a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2892,11 +2892,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) entry = mk_pte(new_page, vma->vm_page_prot); entry = pte_sw_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); + /* * Clear the pte entry and flush it first, before updating the - * pte with the new entry. This will avoid a race condition - * seen in the presence of one thread doing SMC and another - * thread doing COW. + * pte with the new entry, to keep TLBs on different CPUs in + * sync. This code used to set the new PTE then flush TLBs, but + * that left a window where the new PTE could be loaded into + * some TLBs while the old PTE remains in others. */ ptep_clear_flush_notify(vma, vmf->address, vmf->pte); page_add_new_anon_rmap(new_page, vma, vmf->address, false); -- cgit 1.4.1 From 13384f6125ad7ebdcc8914fe1e03ded48ce76581 Mon Sep 17 00:00:00 2001 From: Walter Wu Date: Tue, 29 Dec 2020 15:14:46 -0800 Subject: kasan: fix null pointer dereference in kasan_record_aux_stack Syzbot reported the following [1]: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 2d993067 P4D 2d993067 PUD 19a3c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 3852 Comm: kworker/1:2 Not tainted 5.10.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events free_ipc RIP: 0010:kasan_record_aux_stack+0x77/0xb0 Add null checking slab object from kasan_get_alloc_meta() in order to avoid null pointer dereference. [1] https://syzkaller.appspot.com/x/log.txt?x=10a82a50d00000 Link: https://lkml.kernel.org/r/20201228080018.23041-1-walter-zh.wu@mediatek.com Signed-off-by: Walter Wu Suggested-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/generic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 1dd5a0f99372..5106b84b07d4 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -337,6 +337,8 @@ void kasan_record_aux_stack(void *addr) cache = page->slab_cache; object = nearest_obj(cache, page, addr); alloc_meta = kasan_get_alloc_meta(cache, object); + if (!alloc_meta) + return; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT); -- cgit 1.4.1 From 87dbc209ea04645fd2351981f09eff5d23f8e2e9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 29 Dec 2020 15:14:49 -0800 Subject: local64.h: make mandatory Make mandatory in include/asm-generic/Kbuild and remove all arch/*/include/asm/local64.h arch-specific files since they only #include . This fixes build errors on arch/c6x/ and arch/nios2/ for block/blk-iocost.c. Build-tested on 21 of 25 arch-es. (tools problems on the others) Yes, we could even rename to and change all #includes to use instead. Link: https://lkml.kernel.org/r/20201227024446.17018-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Suggested-by: Christoph Hellwig Reviewed-by: Masahiro Yamada Cc: Jens Axboe Cc: Ley Foon Tan Cc: Mark Salter Cc: Aurelien Jacquiot Cc: Peter Zijlstra Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/local64.h | 1 - arch/arc/include/asm/Kbuild | 1 - arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/csky/include/asm/Kbuild | 1 - arch/h8300/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/local64.h | 1 - arch/m68k/include/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 - arch/mips/include/asm/Kbuild | 1 - arch/nds32/include/asm/Kbuild | 1 - arch/openrisc/include/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/riscv/include/asm/Kbuild | 1 - arch/s390/include/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/x86/include/asm/local64.h | 1 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/Kbuild | 1 + 22 files changed, 1 insertion(+), 21 deletions(-) delete mode 100644 arch/alpha/include/asm/local64.h delete mode 100644 arch/ia64/include/asm/local64.h delete mode 100644 arch/x86/include/asm/local64.h diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/alpha/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 81f4edec0c2a..3c1afa524b9c 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += user.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 4a0848aef207..03657ff8fbe3 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h -generic-y += local64.h generic-y += parport.h generated-y += mach-types.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index ff9cbb631212..07ac208edc89 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += early_ioremap.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += qspinlock.h diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 93372255984d..cc24bb8e539f 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += asm-offsets.h generic-y += gpio.h generic-y += kvm_para.h -generic-y += local64.h generic-y += qrwlock.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index ddf04f32b546..60ee7f0d60a8 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += asm-offsets.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += spinlock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 373964bb177e..3ece3c93fe08 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -2,5 +2,4 @@ generic-y += extable.h generic-y += iomap.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/ia64/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1bff55aa2d54..0dbf9c5c6fae 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -2,6 +2,5 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += spinlock.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 63bce836b9f1..29b0e557aa7c 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += syscalls.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 198b3bafdac9..95b4fa7bd0d1 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -6,7 +6,6 @@ generated-y += syscall_table_64_n64.h generated-y += syscall_table_64_o32.h generic-y += export.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += qrwlock.h diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index ff1e94299317..82a4453c9c2d 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -4,6 +4,5 @@ generic-y += cmpxchg.h generic-y += export.h generic-y += gpio.h generic-y += kvm_para.h -generic-y += local64.h generic-y += parport.h generic-y += user.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 442f3d3bcd90..ca5987e11053 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qspinlock_types.h generic-y += qspinlock.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index f16c4db80116..4406475a2304 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -3,6 +3,5 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += user.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 90cd5c53af66..e1f9b4ea1c53 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -5,7 +5,6 @@ generated-y += syscall_table_c32.h generated-y += syscall_table_spu.h generic-y += export.h generic-y += kvm_types.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += qrwlock.h generic-y += vtime.h diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 59dd7be55005..445ccc97305a 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -3,6 +3,5 @@ generic-y += early_ioremap.h generic-y += extable.h generic-y += flat.h generic-y += kvm_para.h -generic-y += local64.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 319efa0e6d02..1a18d7b82f86 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -7,5 +7,4 @@ generated-y += unistd_nr.h generic-y += asm-offsets.h generic-y += export.h generic-y += kvm_types.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 7435182ef846..fc44d9c88b41 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += parport.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 5269a704801f..3688fdae50e4 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -6,5 +6,4 @@ generated-y += syscall_table_64.h generated-y += syscall_table_c32.h generic-y += export.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h deleted file mode 100644 index 36c93b5cc239..000000000000 --- a/arch/x86/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 9718e9593564..854c5e07e867 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -2,7 +2,6 @@ generated-y += syscall_table.h generic-y += extable.h generic-y += kvm_para.h -generic-y += local64.h generic-y += mcs_spinlock.h generic-y += param.h generic-y += qrwlock.h diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 4365b9aa3e3f..267f6dfb8960 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -34,6 +34,7 @@ mandatory-y += kmap_size.h mandatory-y += kprobes.h mandatory-y += linkage.h mandatory-y += local.h +mandatory-y += local64.h mandatory-y += mm-arch-hooks.h mandatory-y += mmiowb.h mandatory-y += mmu.h -- cgit 1.4.1 From 8b0fac44bd1ff17016502b3c3533f5abb8456c65 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 29 Dec 2020 15:14:52 -0800 Subject: sizes.h: add SZ_8G/SZ_16G/SZ_32G macros Add these macros, since we can use them in drivers. Link: https://lkml.kernel.org/r/20201229072819.11183-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sizes.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/sizes.h b/include/linux/sizes.h index 9874f6f67537..1ac79bcee2bb 100644 --- a/include/linux/sizes.h +++ b/include/linux/sizes.h @@ -44,6 +44,9 @@ #define SZ_2G 0x80000000 #define SZ_4G _AC(0x100000000, ULL) +#define SZ_8G _AC(0x200000000, ULL) +#define SZ_16G _AC(0x400000000, ULL) +#define SZ_32G _AC(0x800000000, ULL) #define SZ_64T _AC(0x400000000000, ULL) #endif /* __LINUX_SIZES_H__ */ -- cgit 1.4.1 From aa8c7db494d0a83ecae583aa193f1134ef25d506 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 29 Dec 2020 15:14:55 -0800 Subject: kdev_t: always inline major/minor helper functions Silly GCC doesn't always inline these trivial functions. Fixes the following warning: arch/x86/kernel/sys_ia32.o: warning: objtool: cp_stat64()+0xd8: call to new_encode_dev() with UACCESS enabled Link: https://lkml.kernel.org/r/984353b44a4484d86ba9f73884b7306232e25e30.1608737428.git.jpoimboe@redhat.com Signed-off-by: Josh Poimboeuf Reported-by: Randy Dunlap Acked-by: Randy Dunlap [build-tested] Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdev_t.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index 85b5151911cf..4856706fbfeb 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -21,61 +21,61 @@ }) /* acceptable for old filesystems */ -static inline bool old_valid_dev(dev_t dev) +static __always_inline bool old_valid_dev(dev_t dev) { return MAJOR(dev) < 256 && MINOR(dev) < 256; } -static inline u16 old_encode_dev(dev_t dev) +static __always_inline u16 old_encode_dev(dev_t dev) { return (MAJOR(dev) << 8) | MINOR(dev); } -static inline dev_t old_decode_dev(u16 val) +static __always_inline dev_t old_decode_dev(u16 val) { return MKDEV((val >> 8) & 255, val & 255); } -static inline u32 new_encode_dev(dev_t dev) +static __always_inline u32 new_encode_dev(dev_t dev) { unsigned major = MAJOR(dev); unsigned minor = MINOR(dev); return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); } -static inline dev_t new_decode_dev(u32 dev) +static __always_inline dev_t new_decode_dev(u32 dev) { unsigned major = (dev & 0xfff00) >> 8; unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); return MKDEV(major, minor); } -static inline u64 huge_encode_dev(dev_t dev) +static __always_inline u64 huge_encode_dev(dev_t dev) { return new_encode_dev(dev); } -static inline dev_t huge_decode_dev(u64 dev) +static __always_inline dev_t huge_decode_dev(u64 dev) { return new_decode_dev(dev); } -static inline int sysv_valid_dev(dev_t dev) +static __always_inline int sysv_valid_dev(dev_t dev) { return MAJOR(dev) < (1<<14) && MINOR(dev) < (1<<18); } -static inline u32 sysv_encode_dev(dev_t dev) +static __always_inline u32 sysv_encode_dev(dev_t dev) { return MINOR(dev) | (MAJOR(dev) << 18); } -static inline unsigned sysv_major(u32 dev) +static __always_inline unsigned sysv_major(u32 dev) { return (dev >> 18) & 0x3fff; } -static inline unsigned sysv_minor(u32 dev) +static __always_inline unsigned sysv_minor(u32 dev) { return dev & 0x3ffff; } -- cgit 1.4.1 From 36845663843fc59c5d794e3dc0641472e3e572da Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Tue, 29 Dec 2020 15:14:58 -0800 Subject: lib/genalloc: fix the overflow when size is too big Some graphic card has very big memory on chip, such as 32G bytes. In the following case, it will cause overflow: pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE); ret = gen_pool_add(pool, 0x1000000, SZ_32G, NUMA_NO_NODE); va = gen_pool_alloc(pool, SZ_4G); The overflow occurs in gen_pool_alloc_algo_owner(): .... size = nbits << order; .... The @nbits is "int" type, so it will overflow. Then the gen_pool_avail() will return the wrong value. This patch converts some "int" to "unsigned long", and changes the compare code in while. Link: https://lkml.kernel.org/r/20201229060657.3389-1-sjhuang@iluvatar.ai Signed-off-by: Huang Shijie Reported-by: Shi Jiasheng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/lib/genalloc.c b/lib/genalloc.c index 7f1244b5294a..dab97bb69df6 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -81,14 +81,14 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear) * users set the same bit, one user will return remain bits, otherwise * return 0. */ -static int bitmap_set_ll(unsigned long *map, int start, int nr) +static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned long size = start + nr; int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_set >= 0) { + while (nr >= bits_to_set) { if (set_bits_ll(p, mask_to_set)) return nr; nr -= bits_to_set; @@ -116,14 +116,15 @@ static int bitmap_set_ll(unsigned long *map, int start, int nr) * users clear the same bit, one user will return remain bits, * otherwise return 0. */ -static int bitmap_clear_ll(unsigned long *map, int start, int nr) +static unsigned long +bitmap_clear_ll(unsigned long *map, unsigned long start, unsigned long nr) { unsigned long *p = map + BIT_WORD(start); - const int size = start + nr; + const unsigned long size = start + nr; int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - while (nr - bits_to_clear >= 0) { + while (nr >= bits_to_clear) { if (clear_bits_ll(p, mask_to_clear)) return nr; nr -= bits_to_clear; @@ -183,8 +184,8 @@ int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t ph size_t size, int nid, void *owner) { struct gen_pool_chunk *chunk; - int nbits = size >> pool->min_alloc_order; - int nbytes = sizeof(struct gen_pool_chunk) + + unsigned long nbits = size >> pool->min_alloc_order; + unsigned long nbytes = sizeof(struct gen_pool_chunk) + BITS_TO_LONGS(nbits) * sizeof(long); chunk = vzalloc_node(nbytes, nid); @@ -242,7 +243,7 @@ void gen_pool_destroy(struct gen_pool *pool) struct list_head *_chunk, *_next_chunk; struct gen_pool_chunk *chunk; int order = pool->min_alloc_order; - int bit, end_bit; + unsigned long bit, end_bit; list_for_each_safe(_chunk, _next_chunk, &pool->chunks) { chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); @@ -278,7 +279,7 @@ unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size, struct gen_pool_chunk *chunk; unsigned long addr = 0; int order = pool->min_alloc_order; - int nbits, start_bit, end_bit, remain; + unsigned long nbits, start_bit, end_bit, remain; #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG BUG_ON(in_nmi()); @@ -487,7 +488,7 @@ void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size, { struct gen_pool_chunk *chunk; int order = pool->min_alloc_order; - int start_bit, nbits, remain; + unsigned long start_bit, nbits, remain; #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG BUG_ON(in_nmi()); @@ -755,7 +756,7 @@ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, index = bitmap_find_next_zero_area(map, size, start, nr, 0); while (index < size) { - int next_bit = find_next_bit(map, size, index + nr); + unsigned long next_bit = find_next_bit(map, size, index + nr); if ((next_bit - index) < len) { len = next_bit - index; start_bit = index; -- cgit 1.4.1 From f0bb29e8c4076444d32df00c8d32e169ceecf283 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 29 Dec 2020 15:15:01 -0800 Subject: lib/zlib: fix inflating zlib streams on s390 Decompressing zlib streams on s390 fails with "incorrect data check" error. Userspace zlib checks inflate_state.flags in order to byteswap checksums only for zlib streams, and s390 hardware inflate code, which was ported from there, tries to match this behavior. At the same time, kernel zlib does not use inflate_state.flags, so it contains essentially random values. For many use cases either zlib stream is zeroed out or checksum is not used, so this problem is masked, but at least SquashFS is still affected. Fix by always passing a checksum to and from the hardware as is, which matches zlib_inflate()'s expectations. Link: https://lkml.kernel.org/r/20201215155551.894884-1-iii@linux.ibm.com Fixes: 126196100063 ("lib/zlib: add s390 hardware support for kernel zlib_inflate") Signed-off-by: Ilya Leoshkevich Tested-by: Christian Borntraeger Acked-by: Mikhail Zaslonko Acked-by: Christian Borntraeger Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Mikhail Zaslonko Cc: [5.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/zlib_dfltcc/dfltcc_inflate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/zlib_dfltcc/dfltcc_inflate.c b/lib/zlib_dfltcc/dfltcc_inflate.c index db107016d29b..fb60b5a6a1cb 100644 --- a/lib/zlib_dfltcc/dfltcc_inflate.c +++ b/lib/zlib_dfltcc/dfltcc_inflate.c @@ -125,7 +125,7 @@ dfltcc_inflate_action dfltcc_inflate( param->ho = (state->write - state->whave) & ((1 << HB_BITS) - 1); if (param->hl) param->nt = 0; /* Honor history for the first block */ - param->cv = state->flags ? REVERSE(state->check) : state->check; + param->cv = state->check; /* Inflate */ do { @@ -138,7 +138,7 @@ dfltcc_inflate_action dfltcc_inflate( state->bits = param->sbb; state->whave = param->hl; state->write = (param->ho + param->hl) & ((1 << HB_BITS) - 1); - state->check = state->flags ? REVERSE(param->cv) : param->cv; + state->check = param->cv; if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { /* Report an error if stream is corrupted */ state->mode = BAD; -- cgit 1.4.1 From 605cc30dea249edf1b659e7d0146a2cf13cbbf71 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 29 Dec 2020 15:15:04 -0800 Subject: zlib: move EXPORT_SYMBOL() and MODULE_LICENSE() out of dfltcc_syms.c In commit 11fb479ff5d9 ("zlib: export S390 symbols for zlib modules"), I added EXPORT_SYMBOL()s to dfltcc_inflate.c but then Mikhail said that these should probably be in dfltcc_syms.c with the other EXPORT_SYMBOL()s. However, that is contrary to the current kernel style, which places EXPORT_SYMBOL() immediately after the function that it applies to, so move all EXPORT_SYMBOL()s to their respective function locations and drop the dfltcc_syms.c file. Also move MODULE_LICENSE() from the deleted file to dfltcc.c. [rdunlap@infradead.org: remove dfltcc_syms.o from Makefile] Link: https://lkml.kernel.org/r/20201227171837.15492-1-rdunlap@infradead.org Link: https://lkml.kernel.org/r/20201219052530.28461-1-rdunlap@infradead.org Fixes: 11fb479ff5d9 ("zlib: export S390 symbols for zlib modules") Signed-off-by: Randy Dunlap Cc: Acked-by: Ilya Leoshkevich Acked-by: Christian Borntraeger Cc: Zaslonko Mikhail Cc: Heiko Carstens Cc: Vasily Gorbik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/zlib_dfltcc/Makefile | 2 +- lib/zlib_dfltcc/dfltcc.c | 6 +++++- lib/zlib_dfltcc/dfltcc_deflate.c | 3 +++ lib/zlib_dfltcc/dfltcc_syms.c | 17 ----------------- 4 files changed, 9 insertions(+), 19 deletions(-) delete mode 100644 lib/zlib_dfltcc/dfltcc_syms.c diff --git a/lib/zlib_dfltcc/Makefile b/lib/zlib_dfltcc/Makefile index 8e4d5afbbb10..66e1c96387c4 100644 --- a/lib/zlib_dfltcc/Makefile +++ b/lib/zlib_dfltcc/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_ZLIB_DFLTCC) += zlib_dfltcc.o -zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o dfltcc_syms.o +zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o diff --git a/lib/zlib_dfltcc/dfltcc.c b/lib/zlib_dfltcc/dfltcc.c index c30de430b30c..782f76e9d4da 100644 --- a/lib/zlib_dfltcc/dfltcc.c +++ b/lib/zlib_dfltcc/dfltcc.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: Zlib /* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */ -#include +#include +#include #include "dfltcc_util.h" #include "dfltcc.h" @@ -53,3 +54,6 @@ void dfltcc_reset( dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE; dfltcc_state->param.ribm = DFLTCC_RIBM; } +EXPORT_SYMBOL(dfltcc_reset); + +MODULE_LICENSE("GPL"); diff --git a/lib/zlib_dfltcc/dfltcc_deflate.c b/lib/zlib_dfltcc/dfltcc_deflate.c index 00c185101c6d..6c946e8532ee 100644 --- a/lib/zlib_dfltcc/dfltcc_deflate.c +++ b/lib/zlib_dfltcc/dfltcc_deflate.c @@ -4,6 +4,7 @@ #include "dfltcc_util.h" #include "dfltcc.h" #include +#include #include /* @@ -34,6 +35,7 @@ int dfltcc_can_deflate( return 1; } +EXPORT_SYMBOL(dfltcc_can_deflate); static void dfltcc_gdht( z_streamp strm @@ -277,3 +279,4 @@ again: goto again; /* deflate() must use all input or all output */ return 1; } +EXPORT_SYMBOL(dfltcc_deflate); diff --git a/lib/zlib_dfltcc/dfltcc_syms.c b/lib/zlib_dfltcc/dfltcc_syms.c deleted file mode 100644 index 6f23481804c1..000000000000 --- a/lib/zlib_dfltcc/dfltcc_syms.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/lib/zlib_dfltcc/dfltcc_syms.c - * - * Exported symbols for the s390 zlib dfltcc support. - * - */ - -#include -#include -#include -#include "dfltcc.h" - -EXPORT_SYMBOL(dfltcc_can_deflate); -EXPORT_SYMBOL(dfltcc_deflate); -EXPORT_SYMBOL(dfltcc_reset); -MODULE_LICENSE("GPL"); -- cgit 1.4.1 From 1f3147b49d75b47b6be54a1e6dfa87a4921e1e51 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 29 Dec 2020 15:15:07 -0800 Subject: mm: slub: call account_slab_page() after slab page initialization It's convenient to have page->objects initialized before calling into account_slab_page(). In particular, this information can be used to pre-alloc the obj_cgroup vector. Let's call account_slab_page() a bit later, after the initialization of page->objects. This commit doesn't bring any functional change, but is required for further optimizations. [akpm@linux-foundation.org: undo changes needed by forthcoming mm-memcg-slab-pre-allocate-obj_cgroups-for-slab-caches-with-slab_account.patch] Link: https://lkml.kernel.org/r/20201110195753.530157-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Johannes Weiner Reviewed-by: Shakeel Butt Cc: Michal Hocko Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 0c8b43a5b3b0..dc5b42e700b8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1619,9 +1619,6 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, else page = __alloc_pages_node(node, flags, order); - if (page) - account_slab_page(page, order, s); - return page; } @@ -1774,6 +1771,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) page->objects = oo_objects(oo); + account_slab_page(page, oo_order(oo), s); + page->slab_cache = s; __SetPageSlab(page); if (page_is_pfmemalloc(page)) -- cgit 1.4.1 From 875b2376fd663832bf45f7285c9d26cb8c52929a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 19:47:06 -0800 Subject: fs: block_dev.c: fix kernel-doc warnings from struct block_device changes Fix new kernel-doc warnings in fs/block_dev.c: ../fs/block_dev.c:1066: warning: Excess function parameter 'whole' description in 'bd_abort_claiming' ../fs/block_dev.c:1837: warning: Function parameter or member 'dev' not described in 'lookup_bdev' Fixes: 4e7b5671c6a8 ("block: remove i_bdev") Fixes: 37c3fc9abb25 ("block: simplify the block device claiming interface") Signed-off-by: Randy Dunlap Cc: Jens Axboe Cc: Christoph Hellwig Cc: linux-fsdevel@vger.kernel.org Cc: Alexander Viro Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 9293045e128c..3e5b02f6606c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1055,7 +1055,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder) /** * bd_abort_claiming - abort claiming of a block device * @bdev: block device of interest - * @whole: whole block device * @holder: holder that has claimed @bdev * * Abort claiming of a block device when the exclusive open failed. This can be @@ -1828,6 +1827,7 @@ const struct file_operations def_blk_fops = { /** * lookup_bdev - lookup a struct block_device by name * @pathname: special file representing the block device + * @dev: return value of the block device's dev_t * * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) -- cgit 1.4.1 From dc30432605bbbd486dfede3852ea4d42c40a84b4 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 28 Dec 2020 11:27:18 -0800 Subject: block: add debugfs stanza for QUEUE_FLAG_NOWAIT This was missed in 021a24460dc2. Leads to the numeric value of QUEUE_FLAG_NOWAIT (i.e. 29) showing up in /sys/kernel/debug/block/*/state. Fixes: 021a24460dc28e7412aecfae89f60e1847e685c0 Cc: Konstantin Khlebnikov Cc: Mike Snitzer Cc: Christoph Hellwig Cc: Jens Axboe Signed-off-by: Andres Freund Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 3094542e12ae..e21eed20a155 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -129,6 +129,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(PCI_P2PDMA), QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), + QUEUE_FLAG_NAME(NOWAIT), }; #undef QUEUE_FLAG_NAME -- cgit 1.4.1 From 44362a3c353aeec5904c2ae6d1737f20fe7e9c79 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Dec 2020 12:08:54 +0000 Subject: KVM: arm64: Fix hyp_cpu_pm_{init,exit} __init annotation The __init annotations on hyp_cpu_pm_{init,exit} are obviously incorrect, and the build system shouts at you if you enable DEBUG_SECTION_MISMATCH. Nothing really bad happens as we never execute that code outside of the init context, but we can't label the callers as __int either, as kvm_init isn't __init itself. Oh well. Signed-off-by: Marc Zyngier Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201223120854.255347-1-maz@kernel.org --- arch/arm64/kvm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ab782c480e9a..04c44853b103 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1568,12 +1568,12 @@ static struct notifier_block hyp_init_cpu_pm_nb = { .notifier_call = hyp_init_cpu_pm_notifier, }; -static void __init hyp_cpu_pm_init(void) +static void hyp_cpu_pm_init(void) { if (!is_protected_kvm_enabled()) cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } -static void __init hyp_cpu_pm_exit(void) +static void hyp_cpu_pm_exit(void) { if (!is_protected_kvm_enabled()) cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); -- cgit 1.4.1 From 6820e812dafb4258bc14692f686eec5bde6fba86 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 16 Dec 2020 11:23:21 +0200 Subject: spi: Fix the clamping of spi->max_speed_hz If spi->controller->max_speed_hz is zero, a non-zero spi->max_speed_hz will be overwritten by zero. Make sure spi->controller->max_speed_hz is not zero when clamping spi->max_speed_hz. Put the spi->controller->max_speed_hz non-zero check higher in the if, so that we avoid a superfluous init to zero when both spi->max_speed_hz and spi->controller->max_speed_hz are zero. Fixes: 9326e4f1e5dd ("spi: Limit the spi device max speed to controller's max speed") Reported-by: Geert Uytterhoeven Suggested-by: Geert Uytterhoeven Signed-off-by: Tudor Ambarus Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201216092321.413262-1-tudor.ambarus@microchip.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 51d7c004fbab..f59bf5094adb 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3378,8 +3378,9 @@ int spi_setup(struct spi_device *spi) if (status) return status; - if (!spi->max_speed_hz || - spi->max_speed_hz > spi->controller->max_speed_hz) + if (spi->controller->max_speed_hz && + (!spi->max_speed_hz || + spi->max_speed_hz > spi->controller->max_speed_hz)) spi->max_speed_hz = spi->controller->max_speed_hz; mutex_lock(&spi->controller->io_mutex); -- cgit 1.4.1 From e042f151ec7474b88b8c1edaaddd1ff7415d7117 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 19:54:28 -0800 Subject: hwmon: (sbtsi_temp) Fix Documenation kernel-doc warning Fix Documentation/hwmon/ kernel-doc warning in 5.11-rc1: lnx-511-rc1/Documentation/hwmon/sbtsi_temp.rst:4: WARNING: Title underline too short. Kernel driver sbtsi_temp ================== Fixes: 6ec3fcf556fe ("hwmon: (sbtsi) Add documentation") Signed-off-by: Randy Dunlap Cc: Kun Yi Cc: Guenter Roeck Cc: Jean Delvare Cc: linux-hwmon@vger.kernel.org Link: https://lore.kernel.org/r/20201229035428.31270-1-rdunlap@infradead.org Signed-off-by: Guenter Roeck --- Documentation/hwmon/sbtsi_temp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/sbtsi_temp.rst b/Documentation/hwmon/sbtsi_temp.rst index 922b3c8db666..749f518389c3 100644 --- a/Documentation/hwmon/sbtsi_temp.rst +++ b/Documentation/hwmon/sbtsi_temp.rst @@ -1,7 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0-or-later Kernel driver sbtsi_temp -================== +======================== Supported hardware: -- cgit 1.4.1 From 742eb4750ff35fd62784b04b675d672b8dee2524 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 18 Nov 2020 21:23:33 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/configs/debug_defconfig | 12 +++++++++--- arch/s390/configs/defconfig | 11 +++++++---- arch/s390/configs/zfcpdump_defconfig | 2 ++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 1be32fcf6f2e..c4f6ff98a612 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -61,7 +61,9 @@ CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SECCOMP_CACHE_DEBUG=y CONFIG_LOCK_EVENT_COUNTS=y +# CONFIG_GCC_PLUGINS is not set CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -410,12 +412,12 @@ CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y +CONFIG_ZFCP=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m @@ -444,6 +446,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m +CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -542,7 +545,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_NULL_TTY=m CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -574,6 +576,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -655,6 +658,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG is not set CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_SWN_UPCALL=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -826,6 +830,8 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_FTRACE_STARTUP_TEST=y +# CONFIG_EVENT_TRACE_STARTUP_TEST is not set CONFIG_DEBUG_USER_ASCE=y CONFIG_NOTIFIER_ERROR_INJECTION=m CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index e2171a008809..51135893cffe 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -58,6 +58,7 @@ CONFIG_S390_UNWIND_SELFTEST=m CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y +# CONFIG_GCC_PLUGINS is not set CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y @@ -95,7 +96,6 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y -CONFIG_GUP_TEST=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -403,12 +403,12 @@ CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_FC_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m CONFIG_SCSI_SRP_ATTRS=m CONFIG_ISCSI_TCP=m CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y +CONFIG_ZFCP=m CONFIG_SCSI_VIRTIO=m CONFIG_SCSI_DH=y CONFIG_SCSI_DH_RDAC=m @@ -437,6 +437,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_MULTIPATH_HST=m +CONFIG_DM_MULTIPATH_IOA=m CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y CONFIG_DM_FLAKEY=m @@ -536,7 +537,6 @@ CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_NULL_TTY=m CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m @@ -568,6 +568,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -645,6 +646,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG is not set CONFIG_CIFS_DFS_UPCALL=y +CONFIG_CIFS_SWN_UPCALL=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m @@ -778,6 +780,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_HIST_TRIGGERS=y +CONFIG_DEBUG_USER_ASCE=y CONFIG_LKDTM=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index a302630341ef..1ef211dae77a 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -22,6 +22,7 @@ CONFIG_CRASH_DUMP=y # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set # CONFIG_SECCOMP is not set +# CONFIG_GCC_PLUGINS is not set CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set @@ -58,6 +59,7 @@ CONFIG_RAW_DRIVER=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set +# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set -- cgit 1.4.1 From 129975e75b9a2ba528d7f58be2e338cd644f6ed8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 28 Dec 2020 09:51:57 +0100 Subject: s390/Kconfig: sort config S390 select list once again ...and add comments at the top and bottom. Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e84bdd15150b..c72874f09741 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -54,17 +54,23 @@ config KASAN_SHADOW_OFFSET config S390 def_bool y + # + # Note: keep this list sorted alphabetically + # + imply IMA_SECURE_AND_OR_TRUSTED_BOOT select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_SCALED_CPUTIME select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -111,8 +117,10 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 + select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER + select GENERIC_ALLOCATOR select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT @@ -126,22 +134,21 @@ config S390 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_VMALLOC - select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS - select HAVE_FAST_GUP + select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_FAST_GUP select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ERROR_INJECTION @@ -163,16 +170,15 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP select HAVE_MEMBLOCK_PHYS_MAP - select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI select HAVE_NOP_MCOUNT select HAVE_OPROFILE select HAVE_PCI select HAVE_PERF_EVENTS - select MMU_GATHER_RCU_TABLE_FREE + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ @@ -181,6 +187,8 @@ config S390 select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select MMU_GATHER_NO_GATHER + select MMU_GATHER_RCU_TABLE_FREE select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE if PCI select NEED_SG_DMA_LENGTH if PCI @@ -190,17 +198,12 @@ config S390 select PCI_MSI if PCI select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select SPARSE_IRQ + select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TTY select VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME - select HAVE_NMI - select ARCH_HAS_FORCE_DMA_UNENCRYPTED - select SWIOTLB - select GENERIC_ALLOCATOR - imply IMA_SECURE_AND_OR_TRUSTED_BOOT - + # Note: keep the above list sorted alphabetically config SCHED_OMIT_FRAME_POINTER def_bool y -- cgit 1.4.1 From 1eda52334e6d13eb1a85f713ce06dd39342b5020 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Tue, 15 Dec 2020 10:20:30 +0100 Subject: hwmon: (pwm-fan) Ensure that calculation doesn't discard big period values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With MAX_PWM being defined to 255 the code unsigned long period; ... period = ctx->pwm->args.period; state.duty_cycle = DIV_ROUND_UP(pwm * (period - 1), MAX_PWM); calculates a too small value for duty_cycle if the configured period is big (either by discarding the 64 bit value ctx->pwm->args.period or by overflowing the multiplication). As this results in a too slow fan and so maybe an overheating machine better be safe than sorry and error out in .probe. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20201215092031.152243-1-u.kleine-koenig@pengutronix.de Signed-off-by: Guenter Roeck --- drivers/hwmon/pwm-fan.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 777439f48c14..111a91dc6b79 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -334,8 +334,18 @@ static int pwm_fan_probe(struct platform_device *pdev) ctx->pwm_value = MAX_PWM; - /* Set duty cycle to maximum allowed and enable PWM output */ pwm_init_state(ctx->pwm, &state); + /* + * __set_pwm assumes that MAX_PWM * (period - 1) fits into an unsigned + * long. Check this here to prevent the fan running at a too low + * frequency. + */ + if (state.period > ULONG_MAX / MAX_PWM + 1) { + dev_err(dev, "Configured period too big\n"); + return -EINVAL; + } + + /* Set duty cycle to maximum allowed and enable PWM output */ state.duty_cycle = ctx->pwm->args.period - 1; state.enabled = true; -- cgit 1.4.1 From c318840fb2a42ce25febc95c4c19357acf1ae5ca Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Dec 2020 11:20:44 -0500 Subject: USB: Gadget: dummy-hcd: Fix shift-out-of-bounds bug The dummy-hcd driver was written under the assumption that all the parameters in URBs sent to its root hub would be valid. With URBs sent from userspace via usbfs, that assumption can be violated. In particular, the driver doesn't fully check the port-feature values stored in the wValue entry of Clear-Port-Feature and Set-Port-Feature requests. Values that are too large can cause the driver to perform an invalid left shift of more than 32 bits. Ironically, two of those left shifts are unnecessary, because they implement Set-Port-Feature requests that hubs are not required to support, according to section 11.24.2.13 of the USB-2.0 spec. This patch adds the appropriate checks for the port feature selector values and removes the unnecessary feature settings. It also rejects requests to set the TEST feature or to set or clear the INDICATOR and C_OVERCURRENT features, as none of these are relevant to dummy-hcd's root-hub emulation. CC: Reported-and-tested-by: syzbot+5925509f78293baa7331@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201230162044.GA727759@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index ab5e978b5052..1a953f44183a 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2118,9 +2118,21 @@ static int dummy_hub_control( dum_hcd->port_status &= ~USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; - default: + case USB_PORT_FEAT_ENABLE: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3 */ + if (hcd->speed == HCD_USB3) + goto error; + fallthrough; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: dum_hcd->port_status &= ~(1 << wValue); set_link_state(dum_hcd); + break; + default: + /* Disallow INDICATOR and C_OVER_CURRENT */ + goto error; } break; case GetHubDescriptor: @@ -2281,18 +2293,17 @@ static int dummy_hub_control( */ dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50); fallthrough; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3, and ignored for USB-2 */ + if (hcd->speed == HCD_USB3) + goto error; + break; default: - if (hcd->speed == HCD_USB3) { - if ((dum_hcd->port_status & - USB_SS_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } - } else - if ((dum_hcd->port_status & - USB_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } - set_link_state(dum_hcd); + /* Disallow TEST, INDICATOR, and C_OVER_CURRENT */ + goto error; } break; case GetPortErrorCount: -- cgit 1.4.1 From be1283454b61a1f3b089f1a74b73e20532262e32 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Dec 2020 18:08:18 +0100 Subject: cpufreq: intel_pstate: Fix fast-switch fallback path When sugov_update_single_perf() falls back to the "frequency" path due to the missing scale-invariance, it will call cpufreq_driver_fast_switch() via sugov_fast_switch() and the driver's ->fast_switch() callback will be invoked, so it must not be NULL. However, after commit a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") intel_pstate sets ->fast_switch() to NULL when it is going to use intel_cpufreq_adjust_perf(), which is a mistake, because on x86 the scale-invariance may be turned off dynamically, so modify it to retain the original ->adjust_perf() callback pointer. Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") Reported-by: Kenneth R. Crudup Tested-by: Kenneth R. Crudup Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6e23376548ce..1a660466dd75 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3086,7 +3086,6 @@ static int __init intel_pstate_init(void) intel_pstate.attr = hwp_cpufreq_attrs; intel_cpufreq.attr = hwp_cpufreq_attrs; intel_cpufreq.flags |= CPUFREQ_NEED_UPDATE_LIMITS; - intel_cpufreq.fast_switch = NULL; intel_cpufreq.adjust_perf = intel_cpufreq_adjust_perf; if (!default_driver) default_driver = &intel_pstate; -- cgit 1.4.1 From 9cf93f056f783f986c19f40d5304d1bcffa0fc0d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 27 Dec 2020 12:11:16 +0200 Subject: intel_idle: add SnowRidge C-state table Add C-state table for the SnowRidge SoC which is found on Intel Jacobsville platforms. The following has been changed. 1. C1E latency changed from 10us to 15us. It was measured using the open source "wult" tool (the "nic" method, 15us is the 99.99th percentile). 2. C1E power break even changed from 20us to 25us, which may result in less C1E residency in some workloads. 3. C6 latency changed from 50us to 130us. Measured the same way as C1E. The C6 C-state is supported only by some SnowRidge revisions, so add a C-state table commentary about this. On SnowRidge, C6 support is enumerated via the usual mechanism: "mwait" leaf of the "cpuid" instruction. The 'intel_idle' driver does check this leaf, so even though C6 is present in the table, the driver will only use it if the CPU does support it. Signed-off-by: Artem Bityutskiy Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index d79335506ecd..28f93b9aa51b 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -963,6 +963,39 @@ static struct cpuidle_state dnv_cstates[] __initdata = { .enter = NULL } }; +/* + * Note, depending on HW and FW revision, SnowRidge SoC may or may not support + * C6, and this is indicated in the CPUID mwait leaf. + */ +static struct cpuidle_state snr_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 15, + .target_residency = 25, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 130, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static const struct idle_cpu idle_cpu_nehalem __initconst = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1084,6 +1117,12 @@ static const struct idle_cpu idle_cpu_dnv __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_snr __initconst = { + .state_table = snr_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), @@ -1122,7 +1161,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_snr), {} }; -- cgit 1.4.1 From 1642b4450d20e31439c80c28256c8eee08684698 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 30 Dec 2020 21:34:14 +0000 Subject: io_uring: add a helper for setting a ref node Setting a new reference node to a file data is not trivial, don't repeat it, add and use a helper. Cc: stable@vger.kernel.org # 5.6+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index eb4620ff638e..6372aba8d0c2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7231,6 +7231,16 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } +static void io_sqe_files_set_node(struct fixed_file_data *file_data, + struct fixed_file_ref_node *ref_node) +{ + spin_lock_bh(&file_data->lock); + file_data->node = ref_node; + list_add_tail(&ref_node->node, &file_data->ref_list); + spin_unlock_bh(&file_data->lock); + percpu_ref_get(&file_data->refs); +} + static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; @@ -7758,11 +7768,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return PTR_ERR(ref_node); } - file_data->node = ref_node; - spin_lock_bh(&file_data->lock); - list_add_tail(&ref_node->node, &file_data->ref_list); - spin_unlock_bh(&file_data->lock); - percpu_ref_get(&file_data->refs); + io_sqe_files_set_node(file_data, ref_node); return ret; out_fput: for (i = 0; i < ctx->nr_user_files; i++) { @@ -7918,11 +7924,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); - spin_lock_bh(&data->lock); - list_add_tail(&ref_node->node, &data->ref_list); - data->node = ref_node; - spin_unlock_bh(&data->lock); - percpu_ref_get(&ctx->file_data->refs); + io_sqe_files_set_node(data, ref_node); } else destroy_fixed_file_ref_node(ref_node); -- cgit 1.4.1 From 1ffc54220c444774b7f09e6d2121e732f8e19b94 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 30 Dec 2020 21:34:15 +0000 Subject: io_uring: fix io_sqe_files_unregister() hangs io_sqe_files_unregister() uninterruptibly waits for enqueued ref nodes, however requests keeping them may never complete, e.g. because of some userspace dependency. Make sure it's interruptible otherwise it would hang forever. Cc: stable@vger.kernel.org # 5.6+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6372aba8d0c2..ca46f314640b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -992,6 +992,10 @@ enum io_mem_account { ACCT_PINNED, }; +static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); +static struct fixed_file_ref_node *alloc_fixed_file_ref_node( + struct io_ring_ctx *ctx); + static void __io_complete_rw(struct io_kiocb *req, long res, long res2, struct io_comp_state *cs); static void io_cqring_fill_event(struct io_kiocb *req, long res); @@ -7244,11 +7248,15 @@ static void io_sqe_files_set_node(struct fixed_file_data *file_data, static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; - struct fixed_file_ref_node *ref_node = NULL; + struct fixed_file_ref_node *backup_node, *ref_node = NULL; unsigned nr_tables, i; + int ret; if (!data) return -ENXIO; + backup_node = alloc_fixed_file_ref_node(ctx); + if (!backup_node) + return -ENOMEM; spin_lock_bh(&data->lock); ref_node = data->node; @@ -7260,7 +7268,18 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) /* wait for all refs nodes to complete */ flush_delayed_work(&ctx->file_put_work); - wait_for_completion(&data->done); + do { + ret = wait_for_completion_interruptible(&data->done); + if (!ret) + break; + ret = io_run_task_work_sig(); + if (ret < 0) { + percpu_ref_resurrect(&data->refs); + reinit_completion(&data->done); + io_sqe_files_set_node(data, backup_node); + return ret; + } + } while (1); __io_sqe_files_unregister(ctx); nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE); @@ -7271,6 +7290,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) kfree(data); ctx->file_data = NULL; ctx->nr_user_files = 0; + destroy_fixed_file_ref_node(backup_node); return 0; } -- cgit 1.4.1 From b1b6b5a30dce872f500dc43f067cba8e7f86fc7d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 30 Dec 2020 21:34:16 +0000 Subject: kernel/io_uring: cancel io_uring before task works For cancelling io_uring requests it needs either to be able to run currently enqueued task_works or having it shut down by that moment. Otherwise io_uring_cancel_files() may be waiting for requests that won't ever complete. Go with the first way and do cancellations before setting PF_EXITING and so before putting the task_work infrastructure into a transition state where task_work_run() would better not be called. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/file.c | 2 -- kernel/exit.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index c0b60961c672..dab120b71e44 100644 --- a/fs/file.c +++ b/fs/file.c @@ -21,7 +21,6 @@ #include #include #include -#include unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; @@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk) struct files_struct * files = tsk->files; if (files) { - io_uring_files_cancel(files); task_lock(tsk); tsk->files = NULL; task_unlock(tsk); diff --git a/kernel/exit.c b/kernel/exit.c index 3594291a8542..04029e35e69a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -776,6 +777,7 @@ void __noreturn do_exit(long code) schedule(); } + io_uring_files_cancel(tsk->files); exit_signals(tsk); /* sets PF_EXITING */ /* sync mm's RSS info before statistics gathering */ -- cgit 1.4.1 From f93274ef0fe972c120c96b3207f8fce376231a60 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 09:01:36 +0100 Subject: crypto: asym_tpm: correct zero out potential secrets The function derive_pub_key() should be calling memzero_explicit() instead of memset() in case the complier decides to optimize away the call to memset() because it "knows" no one is going to touch the memory anymore. Cc: stable Reported-by: Ilil Blum Shem-Tov Tested-by: Ilil Blum Shem-Tov Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/X8ns4AfwjKudpyfe@kroah.com Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/asym_tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c index 511932aa94a6..0959613560b9 100644 --- a/crypto/asymmetric_keys/asym_tpm.c +++ b/crypto/asymmetric_keys/asym_tpm.c @@ -354,7 +354,7 @@ static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf) memcpy(cur, e, sizeof(e)); cur += sizeof(e); /* Zero parameters to satisfy set_pub_key ABI. */ - memset(cur, 0, SETKEY_PARAMS_SIZE); + memzero_explicit(cur, SETKEY_PARAMS_SIZE); return cur - buf; } -- cgit 1.4.1 From 957cbca7317f7413e1bac555a6b567af06598b10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Dec 2020 15:05:46 +0000 Subject: KVM: arm64: Remove spurious semicolon in reg_to_encoding() Although not a problem right now, it flared up while working on some other aspects of the code-base. Remove the useless semicolon. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d46e7f706cb0..42ccc27fb684 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -923,7 +923,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, #define reg_to_encoding(x) \ sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ - (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2); + (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ -- cgit 1.4.1 From 4f8af077a02eed4831885048a10e04daa4e61a72 Mon Sep 17 00:00:00 2001 From: "Nícolas F. R. A. Prado" Date: Mon, 28 Dec 2020 14:46:07 +0000 Subject: docs: Fix reST markup when linking to sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the process of converting the documentation to reST, some links were converted using the following wrong syntax (and sometimes using %20 instead of spaces): `Display text <#section-name-in-html>`__ This syntax isn't valid according to the docutils' spec [1], but more importantly, it is specific to HTML, since it uses '#' to link to an HTML anchor. The right syntax would instead use a docutils hyperlink reference as the embedded URI to point to the section [2], that is: `Display text
`__ This syntax works in both HTML and PDF. The LaTeX toolchain doesn't mind the HTML anchor syntax when generating the pdf documentation (make pdfdocs), that is, the build succeeds but the links don't work, but that syntax causes errors when trying to build using the not-yet-merged rst2pdf: ValueError: format not resolved, probably missing URL scheme or undefined destination target for 'Forcing%20Quiescent%20States' So, use the correct syntax in order to have it work in all different output formats. [1]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names [2]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases Fixes: ccc9971e2147 ("docs: rcu: convert some articles from html to ReST") Fixes: c8cce10a62aa ("docs: Fix the reference labels in Locking.rst") Fixes: e548cdeffcd8 ("docs-rst: convert kernel-locking to ReST") Fixes: 7ddedebb03b7 ("ALSA: doc: ReSTize writing-an-alsa-driver document") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: Takashi Iwai Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20201228144537.135353-1-nfraprado@protonmail.com Signed-off-by: Jonathan Corbet --- .../Memory-Ordering/Tree-RCU-Memory-Ordering.rst | 8 ++++---- .../RCU/Design/Requirements/Requirements.rst | 20 ++++++++++---------- Documentation/kernel-hacking/locking.rst | 8 ++++---- .../sound/kernel-api/writing-an-alsa-driver.rst | 16 ++++++++-------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst index 83ae3b79a643..a648b423ba0e 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst @@ -473,7 +473,7 @@ read-side critical sections that follow the idle period (the oval near the bottom of the diagram above). Plumbing this into the full grace-period execution is described -`below <#Forcing%20Quiescent%20States>`__. +`below `__. CPU-Hotplug Interface ^^^^^^^^^^^^^^^^^^^^^ @@ -494,7 +494,7 @@ mask to detect CPUs having gone offline since the beginning of this grace period. Plumbing this into the full grace-period execution is described -`below <#Forcing%20Quiescent%20States>`__. +`below `__. Forcing Quiescent States ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -532,7 +532,7 @@ from other CPUs. | RCU. But this diagram is complex enough as it is, so simplicity | | overrode accuracy. You can think of it as poetic license, or you can | | think of it as misdirection that is resolved in the | -| `stitched-together diagram <#Putting%20It%20All%20Together>`__. | +| `stitched-together diagram `__. | +-----------------------------------------------------------------------+ Grace-Period Cleanup @@ -596,7 +596,7 @@ maintain ordering. For example, if the callback function wakes up a task that runs on some other CPU, proper ordering must in place in both the callback function and the task being awakened. To see why this is important, consider the top half of the `grace-period -cleanup <#Grace-Period%20Cleanup>`__ diagram. The callback might be +cleanup`_ diagram. The callback might be running on a CPU corresponding to the leftmost leaf ``rcu_node`` structure, and awaken a task that is to run on a CPU corresponding to the rightmost leaf ``rcu_node`` structure, and the grace-period kernel diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index e8c84fcc0507..d4c9a016074b 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -45,7 +45,7 @@ requirements: #. `Other RCU Flavors`_ #. `Possible Future Changes`_ -This is followed by a `summary <#Summary>`__, however, the answers to +This is followed by a summary_, however, the answers to each quick quiz immediately follows the quiz. Select the big white space with your mouse to see the answer. @@ -1096,7 +1096,7 @@ memory barriers. | case, voluntary context switch) within an RCU read-side critical | | section. However, sleeping locks may be used within userspace RCU | | read-side critical sections, and also within Linux-kernel sleepable | -| RCU `(SRCU) <#Sleepable%20RCU>`__ read-side critical sections. In | +| RCU `(SRCU) `__ read-side critical sections. In | | addition, the -rt patchset turns spinlocks into a sleeping locks so | | that the corresponding critical sections can be preempted, which also | | means that these sleeplockified spinlocks (but not other sleeping | @@ -1186,7 +1186,7 @@ non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny RCU `__ was born. Josh Triplett has since taken over the small-memory banner with his `Linux kernel tinification `__ -project, which resulted in `SRCU <#Sleepable%20RCU>`__ becoming optional +project, which resulted in `SRCU `__ becoming optional for those kernels not needing it. The remaining performance requirements are, for the most part, @@ -1457,8 +1457,8 @@ will vary as the value of ``HZ`` varies, and can also be changed using the relevant Kconfig options and kernel boot parameters. RCU currently does not do much sanity checking of these parameters, so please use caution when changing them. Note that these forward-progress measures -are provided only for RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks -RCU <#Tasks%20RCU>`__. +are provided only for RCU, not for `SRCU `__ or `Tasks +RCU`_. RCU takes the following steps in ``call_rcu()`` to encourage timely invocation of callbacks when any given non-\ ``rcu_nocbs`` CPU has @@ -1477,8 +1477,8 @@ encouragement was provided: Again, these are default values when running at ``HZ=1000``, and can be overridden. Again, these forward-progress measures are provided only for -RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks -RCU <#Tasks%20RCU>`__. Even for RCU, callback-invocation forward +RCU, not for `SRCU `__ or `Tasks +RCU`_. Even for RCU, callback-invocation forward progress for ``rcu_nocbs`` CPUs is much less well-developed, in part because workloads benefiting from ``rcu_nocbs`` CPUs tend to invoke ``call_rcu()`` relatively infrequently. If workloads emerge that need @@ -1920,7 +1920,7 @@ Hotplug CPU The Linux kernel supports CPU hotplug, which means that CPUs can come and go. It is of course illegal to use any RCU API member from an -offline CPU, with the exception of `SRCU <#Sleepable%20RCU>`__ read-side +offline CPU, with the exception of `SRCU `__ read-side critical sections. This requirement was present from day one in DYNIX/ptx, but on the other hand, the Linux kernel's CPU-hotplug implementation is “interesting.” @@ -2177,7 +2177,7 @@ handles these states differently: However, RCU must be reliably informed as to whether any given CPU is currently in the idle loop, and, for ``NO_HZ_FULL``, also whether that CPU is executing in usermode, as discussed -`earlier <#Energy%20Efficiency>`__. It also requires that the +`earlier `__. It also requires that the scheduling-clock interrupt be enabled when RCU needs it to be: #. If a CPU is either idle or executing in usermode, and RCU believes it @@ -2294,7 +2294,7 @@ Performance, Scalability, Response Time, and Reliability ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Expanding on the `earlier -discussion <#Performance%20and%20Scalability>`__, RCU is used heavily by +discussion `__, RCU is used heavily by hot code paths in performance-critical portions of the Linux kernel's networking, security, virtualization, and scheduling code paths. RCU must therefore use efficient implementations, especially in its diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index 6ed806e6061b..c3448929a824 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -118,11 +118,11 @@ spinlock, but you may block holding a mutex. If you can't lock a mutex, your task will suspend itself, and be woken up when the mutex is released. This means the CPU can do something else while you are waiting. There are many cases when you simply can't sleep (see -`What Functions Are Safe To Call From Interrupts? <#sleeping-things>`__), +`What Functions Are Safe To Call From Interrupts?`_), and so have to use a spinlock instead. Neither type of lock is recursive: see -`Deadlock: Simple and Advanced <#deadlock>`__. +`Deadlock: Simple and Advanced`_. Locks and Uniprocessor Kernels ------------------------------ @@ -179,7 +179,7 @@ perfect world). Note that you can also use spin_lock_irq() or spin_lock_irqsave() here, which stop hardware interrupts -as well: see `Hard IRQ Context <#hard-irq-context>`__. +as well: see `Hard IRQ Context`_. This works perfectly for UP as well: the spin lock vanishes, and this macro simply becomes local_bh_disable() @@ -230,7 +230,7 @@ The Same Softirq ~~~~~~~~~~~~~~~~ The same softirq can run on the other CPUs: you can use a per-CPU array -(see `Per-CPU Data <#per-cpu-data>`__) for better performance. If you're +(see `Per-CPU Data`_) for better performance. If you're going so far as to use a softirq, you probably care about scalable performance enough to justify the extra complexity. diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst index 73bbd59afc33..e6365836fa8b 100644 --- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst +++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst @@ -71,7 +71,7 @@ core/oss The codes for PCM and mixer OSS emulation modules are stored in this directory. The rawmidi OSS emulation is included in the ALSA rawmidi code since it's quite small. The sequencer code is stored in -``core/seq/oss`` directory (see `below <#core-seq-oss>`__). +``core/seq/oss`` directory (see `below `__). core/seq ~~~~~~~~ @@ -382,7 +382,7 @@ where ``enable[dev]`` is the module option. Each time the ``probe`` callback is called, check the availability of the device. If not available, simply increment the device index and returns. dev will be incremented also later (`step 7 -<#set-the-pci-driver-data-and-return-zero>`__). +<7) Set the PCI driver data and return zero._>`__). 2) Create a card instance ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -450,10 +450,10 @@ field contains the information shown in ``/proc/asound/cards``. 5) Create other components, such as mixer, MIDI, etc. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here you define the basic components such as `PCM <#PCM-Interface>`__, -mixer (e.g. `AC97 <#API-for-AC97-Codec>`__), MIDI (e.g. -`MPU-401 <#MIDI-MPU401-UART-Interface>`__), and other interfaces. -Also, if you want a `proc file <#Proc-Interface>`__, define it here, +Here you define the basic components such as `PCM `__, +mixer (e.g. `AC97 `__), MIDI (e.g. +`MPU-401 `__), and other interfaces. +Also, if you want a `proc file `__, define it here, too. 6) Register the card instance. @@ -941,7 +941,7 @@ The allocation of an interrupt source is done like this: chip->irq = pci->irq; where :c:func:`snd_mychip_interrupt()` is the interrupt handler -defined `later <#pcm-interface-interrupt-handler>`__. Note that +defined `later `__. Note that ``chip->irq`` should be defined only when :c:func:`request_irq()` succeeded. @@ -3104,7 +3104,7 @@ processing the output stream in the irq handler. If the MPU-401 interface shares its interrupt with the other logical devices on the card, set ``MPU401_INFO_IRQ_HOOK`` (see -`below <#MIDI-Interrupt-Handler>`__). +`below `__). Usually, the port address corresponds to the command port and port + 1 corresponds to the data port. If not, you may change the ``cport`` -- cgit 1.4.1 From 81e79063004f32aae5196f0c929192e69aca1694 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Dec 2020 09:44:33 -0800 Subject: Documentation: admin: early_param()s are also listed in kernel-parameters Add info that "early_param()" kernel boot parameters are also listed in kernel-parameters.txt. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20201226174433.7885-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 06fb1b4aa849..682ab28b5c94 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -3,8 +3,8 @@ The kernel's command-line parameters ==================================== -The following is a consolidated list of the kernel parameters as -implemented by the __setup(), core_param() and module_param() macros +The following is a consolidated list of the kernel parameters as implemented +by the __setup(), early_param(), core_param() and module_param() macros and sorted into English Dictionary order (defined as ignoring all punctuation and sorting digits before letters in a case insensitive manner), and with descriptions where known. -- cgit 1.4.1 From c7e74b3c7b1cf4c04164ff16e6c047232fd3bcef Mon Sep 17 00:00:00 2001 From: Liao Pingfang Date: Sun, 27 Dec 2020 15:15:19 +0800 Subject: docs/mm: concepts.rst: Correct the threshold to low watermark It should be "low watermark" where we wake up kswapd daemon. Signed-off-by: Liao Pingfang Link: https://lore.kernel.org/r/1609053319-3112-1-git-send-email-winndows@163.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/mm/concepts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/mm/concepts.rst b/Documentation/admin-guide/mm/concepts.rst index fa0974fbeae7..b966fcff993b 100644 --- a/Documentation/admin-guide/mm/concepts.rst +++ b/Documentation/admin-guide/mm/concepts.rst @@ -184,7 +184,7 @@ pages either asynchronously or synchronously, depending on the state of the system. When the system is not loaded, most of the memory is free and allocation requests will be satisfied immediately from the free pages supply. As the load increases, the amount of the free pages goes -down and when it reaches a certain threshold (high watermark), an +down and when it reaches a certain threshold (low watermark), an allocation request will awaken the ``kswapd`` daemon. It will asynchronously scan memory pages and either just free them if the data they contain is available elsewhere, or evict to the backing storage -- cgit 1.4.1 From 0be1511f516e2b9766597336cedc6dc6d19e5af1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 15:12:12 -0800 Subject: Documentation: doc-guide: fixes to sphinx.rst Various fixes to sphinx.rst: - eliminate a double-space between 2 words - grammar/wording - punctuation - call rows in a table 'rows' instead of 'columns' (or does Sphinx call everything a column?) - It seems that "amdfonts" should be "amsfonts". I can't find any amdfonts. Signed-off-by: Randy Dunlap Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20201228231212.22448-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/sphinx.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 2fb2ff297d69..36ac2166ad67 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -48,12 +48,12 @@ or ``virtualenv``, depending on how your distribution packaged Python 3. those versions, you should run ``pip install 'docutils==0.12'``. #) It is recommended to use the RTD theme for html output. Depending - on the Sphinx version, it should be installed in separate, + on the Sphinx version, it should be installed separately, with ``pip install sphinx_rtd_theme``. - #) Some ReST pages contain math expressions. Due to the way Sphinx work, + #) Some ReST pages contain math expressions. Due to the way Sphinx works, those expressions are written using LaTeX notation. It needs texlive - installed with amdfonts and amsmath in order to evaluate them. + installed with amsfonts and amsmath in order to evaluate them. In summary, if you want to install Sphinx version 1.7.9, you should do:: @@ -128,7 +128,7 @@ Sphinx Build ============ The usual way to generate the documentation is to run ``make htmldocs`` or -``make pdfdocs``. There are also other formats available, see the documentation +``make pdfdocs``. There are also other formats available: see the documentation section of ``make help``. The generated documentation is placed in format-specific subdirectories under ``Documentation/output``. @@ -303,17 +303,17 @@ and *targets* (e.g. a ref to ``:ref:`last row ``` / :ref:`last row - head col 3 - head col 4 - * - column 1 + * - row 1 - field 1.1 - field 1.2 with autospan - * - column 2 + * - row 2 - field 2.1 - :rspan:`1` :cspan:`1` field 2.2 - 3.3 * .. _`last row`: - - column 3 + - row 3 Rendered as: @@ -325,17 +325,17 @@ Rendered as: - head col 3 - head col 4 - * - column 1 + * - row 1 - field 1.1 - field 1.2 with autospan - * - column 2 + * - row 2 - field 2.1 - :rspan:`1` :cspan:`1` field 2.2 - 3.3 * .. _`last row`: - - column 3 + - row 3 Cross-referencing ----------------- @@ -361,7 +361,7 @@ Figures & Images If you want to add an image, you should use the ``kernel-figure`` and ``kernel-image`` directives. E.g. to insert a figure with a scalable -image format use SVG (:ref:`svg_image_example`):: +image format, use SVG (:ref:`svg_image_example`):: .. kernel-figure:: svg_image.svg :alt: simple SVG image @@ -375,7 +375,7 @@ image format use SVG (:ref:`svg_image_example`):: SVG image example -The kernel figure (and image) directive support **DOT** formatted files, see +The kernel figure (and image) directive supports **DOT** formatted files, see * DOT: http://graphviz.org/pdf/dotguide.pdf * Graphviz: http://www.graphviz.org/content/dot-language @@ -394,7 +394,7 @@ A simple example (:ref:`hello_dot_file`):: DOT's hello world example -Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the +Embedded *render* markups (or languages) like Graphviz's **DOT** are provided by the ``kernel-render`` directives.:: .. kernel-render:: DOT @@ -406,7 +406,7 @@ Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the } How this will be rendered depends on the installed tools. If Graphviz is -installed, you will see an vector image. If not the raw markup is inserted as +installed, you will see a vector image. If not, the raw markup is inserted as *literal-block* (:ref:`hello_dot_render`). .. _hello_dot_render: @@ -421,8 +421,8 @@ installed, you will see an vector image. If not the raw markup is inserted as The *render* directive has all the options known from the *figure* directive, plus option ``caption``. If ``caption`` has a value, a *figure* node is -inserted. If not, a *image* node is inserted. A ``caption`` is also needed, if -you want to refer it (:ref:`hello_svg_render`). +inserted. If not, an *image* node is inserted. A ``caption`` is also needed, if +you want to refer to it (:ref:`hello_svg_render`). Embedded **SVG**:: -- cgit 1.4.1 From 798ed7800e20dfc3304de1b99df5ac71ad48966b Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 20 Dec 2020 07:09:27 +0100 Subject: atomic: remove further references to atomic_ops Commit f0400a77ebdc ("atomic: Delete obsolete documentation") removed ./Documentation/core-api/atomic_ops.rst, but missed to remove further references to that file. Hence, make htmldocs warns: Documentation/core-api/index.rst:53: WARNING: toctree contains reference to nonexisting document 'core-api/atomic_ops' Also, ./scripts/get_maintainer.pl --self-test=patterns warns: warning: no file matches F: Documentation/core-api/atomic_ops.rst Remove further references to ./Documentation/core-api/atomic_ops.rst. Fixes: f0400a77ebdc ("atomic: Delete obsolete documentation") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20201220060927.21582-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/core-api/index.rst | 1 - MAINTAINERS | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 69171b1799f2..f1c9d20bd42d 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -53,7 +53,6 @@ How Linux keeps everything from happening at the same time. See .. toctree:: :maxdepth: 1 - atomic_ops refcount-vs-atomic irq/index local_ops diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..ddc8e90c1224 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10260,7 +10260,6 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: Documentation/atomic_bitops.txt F: Documentation/atomic_t.txt -F: Documentation/core-api/atomic_ops.rst F: Documentation/core-api/refcount-vs-atomic.rst F: Documentation/litmus-tests/ F: Documentation/memory-barriers.txt -- cgit 1.4.1 From cedd1862be7e666be87ec824dabc6a2b05618f36 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Dec 2020 11:40:22 -0800 Subject: depmod: handle the case of /sbin/depmod without /sbin in PATH Commit 436e980e2ed5 ("kbuild: don't hardcode depmod path") stopped hard-coding the path of depmod, but in the process caused trouble for distributions that had that /sbin location, but didn't have it in the PATH (generally because /sbin is limited to the super-user path). Work around it for now by just adding /sbin to the end of PATH in the depmod.sh script. Reported-and-tested-by: Sedat Dilek Signed-off-by: Linus Torvalds --- scripts/depmod.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/depmod.sh b/scripts/depmod.sh index e083bcae343f..3643b4f896ed 100755 --- a/scripts/depmod.sh +++ b/scripts/depmod.sh @@ -15,6 +15,8 @@ if ! test -r System.map ; then exit 0 fi +# legacy behavior: "depmod" in /sbin, no /sbin in PATH +PATH="$PATH:/sbin" if [ -z $(command -v $DEPMOD) ]; then echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2 echo "This is probably in the kmod package." >&2 -- cgit 1.4.1 From fd16931a2f518a32753920ff20895e5cf04c8ff1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 13 Dec 2020 15:39:29 +0100 Subject: crypto: arm/chacha-neon - add missing counter increment Commit 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples") refactored the chacha block handling in the glue code in a way that may result in the counter increment to be omitted when calling chacha_block_xor_neon() to process a full block. This violates the skcipher API, which requires that the output IV is suitable for handling more input as long as the preceding input has been presented in round multiples of the block size. Also, the same code is exposed via the chacha library interface whose callers may actually rely on this increment to occur even for final blocks that are smaller than the chacha block size. So increment the counter after calling chacha_block_xor_neon(). Fixes: 86cd97ec4b943af3 ("crypto: arm/chacha-neon - optimize for non-block size multiples") Reported-by: Eric Biggers Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/chacha-glue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 7b5cf8430c6d..cdde8fd01f8f 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -60,6 +60,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, chacha_block_xor_neon(state, d, s, nrounds); if (d != dst) memcpy(dst, buf, bytes); + state[12]++; } } -- cgit 1.4.1 From 0aa171e9b267ce7c52d3a3df7bc9c1fc0203dec5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 2 Jan 2021 14:59:09 +0100 Subject: crypto: ecdh - avoid buffer overflow in ecdh_set_secret() Pavel reports that commit 17858b140bf4 ("crypto: ecdh - avoid unaligned accesses in ecdh_set_secret()") fixes one problem but introduces another: the unconditional memcpy() introduced by that commit may overflow the target buffer if the source data is invalid, which could be the result of intentional tampering. So check params.key_size explicitly against the size of the target buffer before validating the key further. Fixes: 17858b140bf4 ("crypto: ecdh - avoid unaligned accesses in ecdh_set_secret()") Reported-by: Pavel Machek Cc: Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/ecdh.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/ecdh.c b/crypto/ecdh.c index d56b8603dec9..96f80c8f8e30 100644 --- a/crypto/ecdh.c +++ b/crypto/ecdh.c @@ -39,7 +39,8 @@ static int ecdh_set_secret(struct crypto_kpp *tfm, const void *buf, struct ecdh params; unsigned int ndigits; - if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0) + if (crypto_ecdh_decode_key(buf, len, ¶ms) < 0 || + params.key_size > sizeof(ctx->private_key)) return -EINVAL; ndigits = ecdh_supported_curve(params.curve_id); -- cgit 1.4.1 From 04901aab40ea3779f6fc6383ef74d8e130e817bf Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 30 Dec 2020 21:24:18 -0800 Subject: bpf: Fix a task_iter bug caused by a merge conflict resolution Latest bpf tree has a bug for bpf_iter selftest: $ ./test_progs -n 4/25 test_bpf_sk_storage_get:PASS:bpf_iter_bpf_sk_storage_helpers__open_and_load 0 nsec test_bpf_sk_storage_get:PASS:socket 0 nsec ... do_dummy_read:PASS:read 0 nsec test_bpf_sk_storage_get:FAIL:bpf_map_lookup_elem map value wasn't set correctly (expected 1792, got -1, err=0) #4/25 bpf_sk_storage_get:FAIL #4 bpf_iter:FAIL Summary: 0/0 PASSED, 0 SKIPPED, 2 FAILED When doing merge conflict resolution, Commit 4bfc4714849d missed to save curr_task to seq_file private data. The task pointer in seq_file private data is passed to bpf program. This caused NULL-pointer task passed to bpf program which will immediately return upon checking whether task pointer is NULL. This patch added back the assignment of curr_task to seq_file private data and fixed the issue. Fixes: 4bfc4714849d ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201231052418.577024-1-yhs@fb.com --- kernel/bpf/task_iter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 3efe38191d1c..175b7b42bfc4 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -159,6 +159,7 @@ again: } /* set info->task and info->tid */ + info->task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; } else { -- cgit 1.4.1 From 36a106a4c1c100d55ba3d32a21ef748cfcd4fa99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:42:39 +0100 Subject: block: rsxx: select CONFIG_CRC32 Without crc32, the driver fails to link: arm-linux-gnueabi-ld: drivers/block/rsxx/config.o: in function `rsxx_load_config': config.c:(.text+0x124): undefined reference to `crc32_le' Fixes: 8722ff8cdbfa ("block: IBM RamSan 70/80 device driver") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 262326973ee0..583b671b1d2d 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -445,6 +445,7 @@ config BLK_DEV_RBD config BLK_DEV_RSXX tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" depends on PCI + select CRC32 help Device driver for IBM's high speed PCIe SSD storage device: Flash Adapter 900GB Full Height. -- cgit 1.4.1 From 19cd3403cb0d522dd5e10188eef85817de29e26e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:43:09 +0100 Subject: lightnvm: select CONFIG_CRC32 Without CRC32 support, this fails to link: arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_init': pblk-init.c:(.text+0x2654): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_exit': pblk-init.c:(.text+0x2a7c): undefined reference to `crc32_le' Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 8f39f9ba5c80..4c2ce210c123 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -19,6 +19,7 @@ if NVM config NVM_PBLK tristate "Physical Block Device Open-Channel SSD target" + select CRC32 help Allows an open-channel SSD to be exposed as a block device to the host. The target assumes the device exposes raw flash and must be -- cgit 1.4.1 From e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jan 2021 15:55:30 -0800 Subject: Linux 5.11-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3d328b7ab200..8b2c3f88ee5e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit 1.4.1 From 4f8b848788f77c7f5c3bd98febce66b7aa14785f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:43:52 +0100 Subject: zonefs: select CONFIG_CRC32 When CRC32 is disabled, zonefs cannot be linked: ld: fs/zonefs/super.o: in function `zonefs_fill_super': Add a Kconfig 'select' statement for it. Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal --- fs/zonefs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig index ef2697b78820..827278f937fe 100644 --- a/fs/zonefs/Kconfig +++ b/fs/zonefs/Kconfig @@ -3,6 +3,7 @@ config ZONEFS_FS depends on BLOCK depends on BLK_DEV_ZONED select FS_IOMAP + select CRC32 help zonefs is a simple file system which exposes zones of a zoned block device (e.g. host-managed or host-aware SMR disk drives) as files. -- cgit 1.4.1 From 5136bb8c8b5872676f397b27f93a30568baf3a25 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 19 Dec 2020 17:24:56 +0100 Subject: MAINTAINERS: adjust GCC PLUGINS after gcc-plugin.sh removal Commit 1e860048c53e ("gcc-plugins: simplify GCC plugin-dev capability test") removed ./scripts/gcc-plugin.sh, but missed to adjust MAINTAINERS. Hence, ./scripts/get_maintainers.pl --self-test=patterns warns: warning: no file matches F: scripts/gcc-plugin.sh Adjust entries in GGC PLUGINS section after this file removal. Signed-off-by: Lukas Bulwahn Signed-off-by: Masahiro Yamada --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6eff4f720c72..181c32c18aaf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7363,7 +7363,6 @@ L: linux-hardening@vger.kernel.org S: Maintained F: Documentation/kbuild/gcc-plugins.rst F: scripts/Makefile.gcc-plugins -F: scripts/gcc-plugin.sh F: scripts/gcc-plugins/ GCOV BASED KERNEL PROFILING -- cgit 1.4.1 From d39648eb67ac851c7918c794424c266a5d2635b9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 19 Dec 2020 09:08:05 -0800 Subject: kconfig: config script: add a little user help Give the user a clue about the problem along with the 35 lines of usage/help text. Signed-off-by: Randy Dunlap Cc: Andi Kleen Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Signed-off-by: Masahiro Yamada --- scripts/config | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/config b/scripts/config index 8c8d7c3d7acc..ff88e2faefd3 100755 --- a/scripts/config +++ b/scripts/config @@ -223,6 +223,7 @@ while [ "$1" != "" ] ; do ;; *) + echo "bad command: $CMD" >&2 usage ;; esac -- cgit 1.4.1 From c0f975af1745391749e4306aa8081b9a4d2cced8 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Wed, 23 Dec 2020 14:04:23 +0900 Subject: kconfig: Support building mconf with vendor sysroot ncurses Changes the final fallback path in the ncurses locator for mconf to support host compilers with a non-default sysroot. This is similar to the hardcoded search for ncurses under '/usr/include', but can support compilers that keep their default header and library directories elsewhere. For nconfig, do nothing because the only vendor compiler I'm aware of with this layout (Apple Clang) ships an ncurses version that's too old for nconfig anyway. Signed-off-by: John Millikin Signed-off-by: Masahiro Yamada --- scripts/kconfig/mconf-cfg.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh index aa68ec95620d..fcd4acd4e9cb 100755 --- a/scripts/kconfig/mconf-cfg.sh +++ b/scripts/kconfig/mconf-cfg.sh @@ -33,7 +33,9 @@ if [ -f /usr/include/ncurses/ncurses.h ]; then exit 0 fi -if [ -f /usr/include/ncurses.h ]; then +# As a final fallback before giving up, check if $HOSTCC knows of a default +# ncurses installation (e.g. from a vendor-specific sysroot). +if echo '#include ' | "${HOSTCC}" -E - >/dev/null 2>&1; then echo cflags=\"-D_GNU_SOURCE\" echo libs=\"-lncurses\" exit 0 -- cgit 1.4.1 From 0c36d88cff4d72149f94809303c5180b6f716d39 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Wed, 23 Dec 2020 15:23:25 +0900 Subject: lib/raid6: Let $(UNROLL) rules work with macOS userland Older versions of BSD awk are fussy about the order of '-v' and '-f' flags, and require a space after the flag name. This causes build failures on platforms with an old awk, such as macOS and NetBSD. Since GNU awk and modern versions of BSD awk (distributed with FreeBSD/OpenBSD) are fine with either form, the definition of 'cmd_unroll' can be trivially tweaked to let the lib/raid6 Makefile work with both old and new awk flag dialects. Signed-off-by: John Millikin Signed-off-by: Masahiro Yamada --- lib/raid6/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index b4c0df6d706d..c770570bfe4f 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -48,7 +48,7 @@ endif endif quiet_cmd_unroll = UNROLL $@ - cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$* < $< > $@ + cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@ targets += int1.c int2.c int4.c int8.c int16.c int32.c $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE -- cgit 1.4.1 From 9bba03d4473df0b707224d4d2067b62d1e1e2a77 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 23 Dec 2020 15:35:42 +0900 Subject: kconfig: remove 'kvmconfig' and 'xenconfig' shorthands Linux 5.10 is out. Remove the 'kvmconfig' and 'xenconfig' shorthands as previously announced. Signed-off-by: Masahiro Yamada --- scripts/kconfig/Makefile | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index e46df0a2d4f9..2c40e68853dd 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -94,16 +94,6 @@ configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/c $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles) $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig -PHONY += kvmconfig -kvmconfig: kvm_guest.config - @echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10" - @echo >&2 " Please use 'make $<' instead." - -PHONY += xenconfig -xenconfig: xen.config - @echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10" - @echo >&2 " Please use 'make $<' instead." - PHONY += tinyconfig tinyconfig: $(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config -- cgit 1.4.1 From d6c1ddd938d84a1adef7e19e8efc10e1b4df5034 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 30 Dec 2020 16:25:34 +0100 Subject: USB: serial: option: add Quectel EM160R-GL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New modem using ff/ff/30 for QCDM, ff/00/00 for AT and NMEA, and ff/ff/ff for RMNET/QMI. T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0620 Rev= 4.09 S: Manufacturer=Quectel S: Product=EM160R-GL S: SerialNumber=e31cedc1 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork [ johan: add model comment ] Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2c21e34235bb..ee726a106706 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1117,6 +1117,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */ + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), -- cgit 1.4.1 From 42e85f90171a4ba59a1e1cedbbc30ce3f68f2317 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 15 Dec 2020 11:30:26 +0100 Subject: arm64/smp: Remove unused irq variable in arch_show_interrupts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/arm64/kernel/smp.c: In function ‘arch_show_interrupts’: arch/arm64/kernel/smp.c:808:16: warning: unused variable ‘irq’ [-Wunused-variable] 808 | unsigned int irq = irq_desc_get_irq(ipi_desc[i]); | ^~~ The removal of the last user forgot to remove the variable. Fixes: 5089bc51f81f ("arm64/smp: Use irq_desc_kstat_cpu() in arch_show_interrupts()") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201215103026.2872532-1-geert+renesas@glider.be Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6bc3a3698c3d..376343d6f13a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -807,7 +807,6 @@ int arch_show_interrupts(struct seq_file *p, int prec) unsigned int cpu, i; for (i = 0; i < NR_IPI; i++) { - unsigned int irq = irq_desc_get_irq(ipi_desc[i]); seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) -- cgit 1.4.1 From b614231dec7864a338ce85032aa3d2d7ea2bc46d Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 2 Dec 2020 23:34:58 -0800 Subject: arm64: mte: remove an ISB on kernel exit This ISB is unnecessary because we will soon do an ERET. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I69f1ee6bb09b1372dd744a0e01cedaf090c8d448 Link: https://lore.kernel.org/r/20201203073458.2675400-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2a93fa5f4e49..a8c3e7aaca74 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -182,7 +182,6 @@ alternative_else_nop_endif mrs_s \tmp2, SYS_GCR_EL1 bfi \tmp2, \tmp, #0, #16 msr_s SYS_GCR_EL1, \tmp2 - isb #endif .endm @@ -194,6 +193,7 @@ alternative_else_nop_endif ldr_l \tmp, gcr_kernel_excl mte_set_gcr \tmp, \tmp2 + isb 1: #endif .endm -- cgit 1.4.1 From 095507dc1350b3a2b8b39fdc05edba0c10859eca Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 18 Dec 2020 17:33:07 +0100 Subject: arm64: mm: Fix ARCH_LOW_ADDRESS_LIMIT when !CONFIG_ZONE_DMA Systems configured with CONFIG_ZONE_DMA32, CONFIG_ZONE_NORMAL and !CONFIG_ZONE_DMA will fail to properly setup ARCH_LOW_ADDRESS_LIMIT. The limit will default to ~0ULL, effectively spanning the whole memory, which is too high for a configuration that expects low memory to be capped at 4GB. Fix ARCH_LOW_ADDRESS_LIMIT by falling back to arm64_dma32_phys_limit when arm64_dma_phys_limit isn't set. arm64_dma32_phys_limit will honour CONFIG_ZONE_DMA32, or span the entire memory when not enabled. Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20201218163307.10150-1-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 3 ++- arch/arm64/mm/init.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ca2cd75d3286..69ad25fbeae4 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -94,7 +94,8 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; -#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) +extern phys_addr_t arm64_dma32_phys_limit; +#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1) struct debug_info { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 75addb36354a..7deddf56f7c3 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -59,7 +59,7 @@ EXPORT_SYMBOL(memstart_addr); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* -- cgit 1.4.1 From 26982a89cad77c0efc1c0c79bee0e3d75e9281d4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 21 Dec 2020 22:37:58 +0000 Subject: afs: Work around strnlen() oops with CONFIG_FORTIFIED_SOURCE=y AFS has a structured layout in its directory contents (AFS dirs are downloaded as files and parsed locally by the client for lookup/readdir). The slots in the directory are defined by union afs_xdr_dirent. This, however, only directly allows a name of a length that will fit into that union. To support a longer name, the next 1-8 contiguous entries are annexed to the first one and the name flows across these. afs_dir_iterate_block() uses strnlen(), limited to the space to the end of the page, to find out how long the name is. This worked fine until 6a39e62abbaf. With that commit, the compiler determines the size of the array and asserts that the string fits inside that array. This is a problem for AFS because we *expect* it to overflow one or more arrays. A similar problem also occurs in afs_dir_scan_block() when a directory file is being locally edited to avoid the need to redownload it. There strlen() was being used safely because each page has the last byte set to 0 when the file is downloaded and validated (in afs_dir_check_page()). Fix this by changing the afs_xdr_dirent union name field to an indeterminate-length array and dropping the overflow field. (Note that whilst looking at this, I realised that the calculation of the number of slots a dirent used is non-standard and not quite right, but I'll address that in a separate patch.) The issue can be triggered by something like: touch /afs/example.com/thisisaveryveryverylongname and it generates a report that looks like: detected buffer overflow in strnlen ------------[ cut here ]------------ kernel BUG at lib/string.c:1149! ... RIP: 0010:fortify_panic+0xf/0x11 ... Call Trace: afs_dir_iterate_block+0x12b/0x35b afs_dir_iterate+0x14e/0x1ce afs_do_lookup+0x131/0x417 afs_lookup+0x24f/0x344 lookup_open.isra.0+0x1bb/0x27d open_last_lookups+0x166/0x237 path_openat+0xe0/0x159 do_filp_open+0x48/0xa4 ? kmem_cache_alloc+0xf5/0x16e ? __clear_close_on_exec+0x13/0x22 ? _raw_spin_unlock+0xa/0xb do_sys_openat2+0x72/0xde do_sys_open+0x3b/0x58 do_syscall_64+0x2d/0x3a entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 6a39e62abbaf ("lib: string.h: detect intra-object overflow in fortified string functions") Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne cc: Daniel Axtens --- fs/afs/xdr_fs.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index 94f1f398eefa..c926430fd08a 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -54,10 +54,15 @@ union afs_xdr_dirent { __be16 hash_next; __be32 vnode; __be32 unique; - u8 name[16]; - u8 overflow[4]; /* if any char of the name (inc - * NUL) reaches here, consume - * the next dirent too */ + u8 name[]; + /* When determining the number of dirent slots needed to + * represent a directory entry, name should be assumed to be 16 + * bytes, due to a now-standardised (mis)calculation, but it is + * in fact 20 bytes in size. + * + * For names longer than (16 or) 20 bytes, extra slots should + * be annexed to this one using the extended_name format. + */ } u; u8 extended_name[32]; } __packed; -- cgit 1.4.1 From 366911cd762db02c2dd32fad1be96b72a66f205d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 23 Dec 2020 10:39:57 +0000 Subject: afs: Fix directory entry size calculation The number of dirent records used by an AFS directory entry should be calculated using the assumption that there is a 16-byte name field in the first block, rather than a 20-byte name field (which is actually the case). This miscalculation is historic and effectively standard, so we have to use it. The calculation we need to use is: 1 + (((strlen(name) + 1) + 15) >> 5) where we are adding one to the strlen() result to account for the NUL termination. Fix this by the following means: (1) Create an inline function to do the calculation for a given name length. (2) Use the function to calculate the number of records used for a dirent in afs_dir_iterate_block(). Use this to move the over-end check out of the loop since it only needs to be done once. Further use this to only go through the loop for the 2nd+ records composing an entry. The only test there now is for if the record is allocated - and we already checked the first block at the top of the outer loop. (3) Add a max name length check in afs_dir_iterate_block(). (4) Make afs_edit_dir_add() and afs_edit_dir_remove() use the function from (1) to calculate the number of blocks rather than doing it incorrectly themselves. Fixes: 63a4681ff39c ("afs: Locally edit directory data for mkdir/create/unlink/...") Fixes: ^1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/dir.c | 49 ++++++++++++++++++++++++---------------------- fs/afs/dir_edit.c | 6 ++---- fs/afs/xdr_fs.h | 14 ++++++++++++- include/trace/events/afs.h | 2 ++ 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9068d5578a26..7bd659ad959e 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -350,7 +350,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, unsigned blkoff) { union afs_xdr_dirent *dire; - unsigned offset, next, curr; + unsigned offset, next, curr, nr_slots; size_t nlen; int tmp; @@ -363,13 +363,12 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, offset < AFS_DIR_SLOTS_PER_BLOCK; offset = next ) { - next = offset + 1; - /* skip entries marked unused in the bitmap */ if (!(block->hdr.bitmap[offset / 8] & (1 << (offset % 8)))) { _debug("ENT[%zu.%u]: unused", blkoff / sizeof(union afs_xdr_dir_block), offset); + next = offset + 1; if (offset >= curr) ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent); @@ -381,35 +380,39 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode, nlen = strnlen(dire->u.name, sizeof(*block) - offset * sizeof(union afs_xdr_dirent)); + if (nlen > AFSNAMEMAX - 1) { + _debug("ENT[%zu]: name too long (len %u/%zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, nlen); + return afs_bad(dvnode, afs_file_error_dir_name_too_long); + } _debug("ENT[%zu.%u]: %s %zu \"%s\"", blkoff / sizeof(union afs_xdr_dir_block), offset, (offset < curr ? "skip" : "fill"), nlen, dire->u.name); - /* work out where the next possible entry is */ - for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) { - if (next >= AFS_DIR_SLOTS_PER_BLOCK) { - _debug("ENT[%zu.%u]:" - " %u travelled beyond end dir block" - " (len %u/%zu)", - blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); - return afs_bad(dvnode, afs_file_error_dir_over_end); - } - if (!(block->hdr.bitmap[next / 8] & - (1 << (next % 8)))) { - _debug("ENT[%zu.%u]:" - " %u unmarked extension (len %u/%zu)", + nr_slots = afs_dir_calc_slots(nlen); + next = offset + nr_slots; + if (next > AFS_DIR_SLOTS_PER_BLOCK) { + _debug("ENT[%zu.%u]:" + " %u extends beyond end dir block" + " (len %zu)", + blkoff / sizeof(union afs_xdr_dir_block), + offset, next, nlen); + return afs_bad(dvnode, afs_file_error_dir_over_end); + } + + /* Check that the name-extension dirents are all allocated */ + for (tmp = 1; tmp < nr_slots; tmp++) { + unsigned int ix = offset + tmp; + if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) { + _debug("ENT[%zu.u]:" + " %u unmarked extension (%u/%u)", blkoff / sizeof(union afs_xdr_dir_block), - offset, next, tmp, nlen); + offset, tmp, nr_slots); return afs_bad(dvnode, afs_file_error_dir_unmarked_ext); } - - _debug("ENT[%zu.%u]: ext %u/%zu", - blkoff / sizeof(union afs_xdr_dir_block), - next, tmp, nlen); - next++; } /* skip if starts before the current position */ diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c index 2ffe09abae7f..f4600c1353ad 100644 --- a/fs/afs/dir_edit.c +++ b/fs/afs/dir_edit.c @@ -215,8 +215,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode, } /* Work out how many slots we're going to need. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); meta_page = kmap(page0); meta = &meta_page->blocks[0]; @@ -393,8 +392,7 @@ void afs_edit_dir_remove(struct afs_vnode *vnode, } /* Work out how many slots we're going to discard. */ - need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE); - need_slots /= AFS_DIR_DIRENT_SIZE; + need_slots = afs_dir_calc_slots(name->len); meta_page = kmap(page0); meta = &meta_page->blocks[0]; diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h index c926430fd08a..8ca868164507 100644 --- a/fs/afs/xdr_fs.h +++ b/fs/afs/xdr_fs.h @@ -58,7 +58,8 @@ union afs_xdr_dirent { /* When determining the number of dirent slots needed to * represent a directory entry, name should be assumed to be 16 * bytes, due to a now-standardised (mis)calculation, but it is - * in fact 20 bytes in size. + * in fact 20 bytes in size. afs_dir_calc_slots() should be + * used for this. * * For names longer than (16 or) 20 bytes, extra slots should * be annexed to this one using the extended_name format. @@ -101,4 +102,15 @@ struct afs_xdr_dir_page { union afs_xdr_dir_block blocks[AFS_DIR_BLOCKS_PER_PAGE]; }; +/* + * Calculate the number of dirent slots required for any given name length. + * The calculation is made assuming the part of the name in the first slot is + * 16 bytes, rather than 20, but this miscalculation is now standardised. + */ +static inline unsigned int afs_dir_calc_slots(size_t name_len) +{ + name_len++; /* NUL-terminated */ + return 1 + ((name_len + 15) / AFS_DIR_DIRENT_SIZE); +} + #endif /* XDR_FS_H */ diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 4eef374d4413..4a5cc8c64be3 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -231,6 +231,7 @@ enum afs_file_error { afs_file_error_dir_bad_magic, afs_file_error_dir_big, afs_file_error_dir_missing_page, + afs_file_error_dir_name_too_long, afs_file_error_dir_over_end, afs_file_error_dir_small, afs_file_error_dir_unmarked_ext, @@ -488,6 +489,7 @@ enum afs_cb_break_reason { EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \ EM(afs_file_error_dir_big, "DIR_BIG") \ EM(afs_file_error_dir_missing_page, "DIR_MISSING_PAGE") \ + EM(afs_file_error_dir_name_too_long, "DIR_NAME_TOO_LONG") \ EM(afs_file_error_dir_over_end, "DIR_ENT_OVER_END") \ EM(afs_file_error_dir_small, "DIR_SMALL") \ EM(afs_file_error_dir_unmarked_ext, "DIR_UNMARKED_EXT") \ -- cgit 1.4.1 From 0bd1bf86ab79555425b9f0b63005e181defe4da6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:57:29 +0100 Subject: dmaengine: qcom: fix gpi undefined behavior gcc points out an incorrect error handling loop: drivers/dma/qcom/gpi.c: In function 'gpi_ch_init': drivers/dma/qcom/gpi.c:1254:15: error: iteration 2 invokes undefined behavior [-Werror=aggressive-loop-optimizations] 1254 | struct gpii *gpii = gchan->gpii; | ^~~~ drivers/dma/qcom/gpi.c:1951:2: note: within this loop 1951 | for (i = i - 1; i >= 0; i++) { | ^~~ Change the loop to correctly walk backwards through the initialized fields rather than off into the woods. Fixes: 5d0c3533a19f ("dmaengine: qcom: Add GPI dma driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210103135738.3741123-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 556c070a514c..1a0bf6b0567a 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1948,7 +1948,7 @@ static int gpi_ch_init(struct gchan *gchan) return ret; error_start_chan: - for (i = i - 1; i >= 0; i++) { + for (i = i - 1; i >= 0; i--) { gpi_stop_chan(&gpii->gchan[i]); gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET); } -- cgit 1.4.1 From 99974aedbd73523969afb09f33c6e3047cd0ddae Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:00 +0530 Subject: dmaengine: xilinx_dma: check dma_async_device_register return value dma_async_device_register() can return non-zero error code. Add condition to check the return value of dma_async_device_register function and handle the error path. Addresses-Coverity: Event check_return. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-2-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 22faea653ea8..091f1f80dcff 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -3112,7 +3112,11 @@ static int xilinx_dma_probe(struct platform_device *pdev) } /* Register the DMA engine with the core */ - dma_async_device_register(&xdev->common); + err = dma_async_device_register(&xdev->common); + if (err) { + dev_err(xdev->dev, "failed to register the dma device\n"); + goto error; + } err = of_dma_controller_register(node, of_dma_xilinx_xlate, xdev); -- cgit 1.4.1 From faeb0731be0a31e2246b21a85fa7dabbd750101d Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:01 +0530 Subject: dmaengine: xilinx_dma: fix incompatible param warning in _child_probe() In xilinx_dma_child_probe function, the nr_channels variable is passed to of_property_read_u32() which expects an u32 return value pointer. Modify the nr_channels variable type from int to u32 to fix the incompatible parameter coverity warning. Addresses-Coverity: Event incompatible_param. Fixes: 1a9e7a03c761 ("dmaengine: vdma: Add support for mulit-channel dma mode") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-3-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 091f1f80dcff..3e374e8d2244 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2900,7 +2900,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, struct device_node *node) { - int ret, i, nr_channels = 1; + int ret, i; + u32 nr_channels = 1; ret = of_property_read_u32(node, "dma-channels", &nr_channels); if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0) -- cgit 1.4.1 From 2d5efea64472469117dc1a9a39530069e95b21e9 Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:02 +0530 Subject: dmaengine: xilinx_dma: fix mixed_enum_type coverity warning Typecast the fls(width -1) with (enum dmaengine_alignment) in xilinx_dma_chan_probe function to fix the coverity warning. Addresses-Coverity: Event mixed_enum_type. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-4-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 3e374e8d2244..79777550a6ff 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2781,7 +2781,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, has_dre = false; if (!has_dre) - xdev->common.copy_align = fls(width - 1); + xdev->common.copy_align = (enum dmaengine_alignment)fls(width - 1); if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") || of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") || -- cgit 1.4.1 From 98bf2d3f4970179c702ef64db658e0553bc6ef3a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Dec 2020 07:11:18 +0000 Subject: powerpc/32s: Fix RTAS machine check with VMAP stack When we have VMAP stack, exception prolog 1 sets r1, not r11. When it is not an RTAS machine check, don't trash r1 because it is needed by prolog 1. Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU") Fixes: d2e006036082 ("powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Christophe Leroy [mpe: Squash in fixup for RTAS machine check from Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bc77d61d1c18940e456a2dee464f1e2eda65a3f0.1608621048.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 349bf3f0c3af..858fbc8b19f3 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -260,10 +260,19 @@ __secondary_hold_acknowledge: MachineCheck: EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH2,r1 + mfspr r1, SPRN_SPRG_THREAD + lwz r1, RTAS_SP(r1) + cmpwi cr1, r1, 0 + bne cr1, 7f + mfspr r1, SPRN_SPRG_SCRATCH2 +#else mfspr r11, SPRN_SPRG_THREAD lwz r11, RTAS_SP(r11) cmpwi cr1, r11, 0 bne cr1, 7f +#endif #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 for_rtas=1 7: EXCEPTION_PROLOG_2 -- cgit 1.4.1 From c3d6eb6e54373f297313b65c1f2319d36914d579 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 30 Dec 2020 20:44:07 +0800 Subject: HID: multitouch: Enable multi-input for Synaptics pointstick/touchpad device Pointstick and its left/right buttons on HP EliteBook 850 G7 need multi-input quirk to work correctly. Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index d670bcd57bde..0743ef51d3b2 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2054,6 +2054,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xce09) }, + /* TopSeed panels */ { .driver_data = MT_CLS_TOPSEED, MT_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, -- cgit 1.4.1 From 6170d077bf92c5b3dfbe1021688d3c0404f7c9e9 Mon Sep 17 00:00:00 2001 From: Xu Yilun Date: Mon, 4 Jan 2021 09:29:09 +0800 Subject: spi: fix the divide by 0 error when calculating xfer waiting time The xfer waiting time is the result of xfer->len / xfer->speed_hz. This patch makes the assumption of 100khz xfer speed if the xfer->speed_hz is not assigned and stays 0. This avoids the divide by 0 issue and ensures a reasonable tolerant waiting time. Signed-off-by: Xu Yilun Link: https://lore.kernel.org/r/1609723749-3557-1-git-send-email-yilun.xu@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f59bf5094adb..720ab34784c1 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1108,6 +1108,7 @@ static int spi_transfer_wait(struct spi_controller *ctlr, { struct spi_statistics *statm = &ctlr->statistics; struct spi_statistics *stats = &msg->spi->statistics; + u32 speed_hz = xfer->speed_hz; unsigned long long ms; if (spi_controller_is_slave(ctlr)) { @@ -1116,8 +1117,11 @@ static int spi_transfer_wait(struct spi_controller *ctlr, return -EINTR; } } else { + if (!speed_hz) + speed_hz = 100000; + ms = 8LL * 1000LL * xfer->len; - do_div(ms, xfer->speed_hz); + do_div(ms, speed_hz); ms += ms + 200; /* some tolerance */ if (ms > UINT_MAX) -- cgit 1.4.1 From 2bf3a72b08e7f6356a2db9e1571ca65f683510bb Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 31 Dec 2020 15:23:45 +0300 Subject: dt-bindings: regulator: qcom,rpmh-regulator: add pm8009 revision PMIC pm8009 has special revision (P=1) made for sm8250 platform. The major difference is the S2 regulator which supplies 0.95 V instead of 2.848V. Add special compatibility string for this chip revision. The datasheet calls the chip just pm8009-1, so use the same name. Signed-off-by: Dmitry Baryshkov Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201231122348.637917-2-dmitry.baryshkov@linaro.org Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt index b8f0b7809c02..7d462b899473 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.txt @@ -44,6 +44,7 @@ First Level Nodes - PMIC Definition: Must be one of below: "qcom,pm8005-rpmh-regulators" "qcom,pm8009-rpmh-regulators" + "qcom,pm8009-1-rpmh-regulators" "qcom,pm8150-rpmh-regulators" "qcom,pm8150l-rpmh-regulators" "qcom,pm8350-rpmh-regulators" -- cgit 1.4.1 From df6b92fa40050e59ea89784294bf6d04c0c47705 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 31 Dec 2020 15:23:46 +0300 Subject: regulator: qcom-rpmh-regulator: correct hfsmps515 definition According to the datasheet pm8009's HFS515 regulators have 16mV resolution rather than declared 1.6 mV. Correct the resolution. Signed-off-by: Dmitry Baryshkov Fixes: 06369bcc15a1 ("regulator: qcom-rpmh: Add support for SM8150") Reviewed-by: Vinod Koul Link: https://lore.kernel.org/r/20201231122348.637917-3-dmitry.baryshkov@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/qcom-rpmh-regulator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index fe030ec4b7db..c395a8dda6f7 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -726,7 +726,7 @@ static const struct rpmh_vreg_hw_data pmic5_ftsmps510 = { static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = { .regulator_type = VRM, .ops = &rpmh_regulator_vrm_ops, - .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 1600), + .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 16000), .n_voltages = 5, .pmic_mode_map = pmic_mode_map_pmic5_smps, .of_map_mode = rpmh_regulator_pmic4_smps_of_map_mode, -- cgit 1.4.1 From d957d1610c661e758426654de3b04bea6fb29f8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 15:56:23 +0100 Subject: regulator: qcom-rpmh: add QCOM_COMMAND_DB dependency A built-in regulator driver cannot link against a modular cmd_db driver: qcom-rpmh-regulator.c:(.text+0x174): undefined reference to `cmd_db_read_addr' There is already a dependency for RPMh, so add another one of this type for cmd_db. Fixes: 34c5aa2666db ("regulator: Kconfig: Fix REGULATOR_QCOM_RPMH dependencies to avoid build error") Fixes: 46fc033eba42 ("regulator: add QCOM RPMh regulator driver") Signed-off-by: Arnd Bergmann Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201230145712.3133110-1-arnd@kernel.org Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 53fa84f4d1e1..5abdd29fb9f3 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -881,6 +881,7 @@ config REGULATOR_QCOM_RPM config REGULATOR_QCOM_RPMH tristate "Qualcomm Technologies, Inc. RPMh regulator driver" depends on QCOM_RPMH || (QCOM_RPMH=n && COMPILE_TEST) + depends on QCOM_COMMAND_DB || (QCOM_COMMAND_DB=n && COMPILE_TEST) help This driver supports control of PMIC regulators via the RPMh hardware block found on Qualcomm Technologies Inc. SoCs. RPMh regulator -- cgit 1.4.1 From 4b1a60a1bb8f03d82c3f6da424adc96667b59f2a Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Mon, 4 Jan 2021 15:50:43 +0200 Subject: MAINTAINERS: Update Georgi's email address Use my kernel.org email as main address to make things a bit easier for me to handle. Link: https://lore.kernel.org/r/20210104135043.31262-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..a15e306123ef 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9240,7 +9240,7 @@ F: tools/testing/selftests/sgx/* K: \bSGX_ INTERCONNECT API -M: Georgi Djakov +M: Georgi Djakov L: linux-pm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/interconnect/ -- cgit 1.4.1 From de30491e8bfeeba1500bba293333eb51ece529d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:53:55 +0100 Subject: HID: sfh: fix address space confusion The new driver uses a phys_addr_t to store a DMA address, which does not work when the two are different size: drivers/hid/amd-sfh-hid/amd_sfh_client.c:157:11: error: incompatible pointer types passing 'phys_addr_t *' (aka 'unsigned int *') to parameter of type 'dma_addr_t *' (aka 'unsigned long long *') [-Werror,-Wincompatible-pointer-types] &cl_data->sensor_phys_addr[i], ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:393:15: note: passing argument to parameter 'dma_handle' here dma_addr_t *dma_handle, gfp_t gfp) ^ Change both the type and the variable name to dma_addr for consistency. Fixes: 4b2c53d93a4b ("SFH:Transport Driver to add support of AMD Sensor Fusion Hub (SFH)") Signed-off-by: Arnd Bergmann Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 8 ++++---- drivers/hid/amd-sfh-hid/amd_sfh_hid.h | 2 +- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 2 +- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 3d1ccac5d99a..2ab38b715347 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -154,7 +154,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8, - &cl_data->sensor_phys_addr[i], + &cl_data->sensor_dma_addr[i], GFP_KERNEL); cl_data->sensor_sts[i] = 0; cl_data->sensor_requested_cnt[i] = 0; @@ -187,7 +187,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) } info.period = msecs_to_jiffies(AMD_SFH_IDLE_LOOP); info.sensor_idx = cl_idx; - info.phys_address = cl_data->sensor_phys_addr[i]; + info.dma_address = cl_data->sensor_dma_addr[i]; cl_data->report_descr[i] = kzalloc(cl_data->report_descr_sz[i], GFP_KERNEL); if (!cl_data->report_descr[i]) { @@ -212,7 +212,7 @@ cleanup: if (cl_data->sensor_virt_addr[i]) { dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int), cl_data->sensor_virt_addr[i], - cl_data->sensor_phys_addr[i]); + cl_data->sensor_dma_addr[i]); } kfree(cl_data->feature_report[i]); kfree(cl_data->input_report[i]); @@ -238,7 +238,7 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata) if (cl_data->sensor_virt_addr[i]) { dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int), cl_data->sensor_virt_addr[i], - cl_data->sensor_phys_addr[i]); + cl_data->sensor_dma_addr[i]); } } kfree(cl_data); diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h index 6be0783d885c..d7eac1728e31 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h @@ -27,7 +27,7 @@ struct amdtp_cl_data { int hid_descr_size[MAX_HID_DEVICES]; phys_addr_t phys_addr_base; u32 *sensor_virt_addr[MAX_HID_DEVICES]; - phys_addr_t sensor_phys_addr[MAX_HID_DEVICES]; + dma_addr_t sensor_dma_addr[MAX_HID_DEVICES]; u32 sensor_sts[MAX_HID_DEVICES]; u32 sensor_requested_cnt[MAX_HID_DEVICES]; u8 report_type[MAX_HID_DEVICES]; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index a51c7b76283b..dbac16641662 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -41,7 +41,7 @@ void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info i cmd_param.s.buf_layout = 1; cmd_param.s.buf_length = 16; - writeq(info.phys_address, privdata->mmio + AMD_C2P_MSG2); + writeq(info.dma_address, privdata->mmio + AMD_C2P_MSG2); writel(cmd_param.ul, privdata->mmio + AMD_C2P_MSG1); writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index e8be94f935b7..8f8d19b2cfe5 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -67,7 +67,7 @@ struct amd_mp2_dev { struct amd_mp2_sensor_info { u8 sensor_idx; u32 period; - phys_addr_t phys_address; + dma_addr_t dma_address; }; void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info); -- cgit 1.4.1 From 273435a1d4e5826f039625c23ba4fe9a09f24d75 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:41:44 +0100 Subject: HID: sony: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/hid/hid-sony.o: in function `sony_raw_event': hid-sony.c:(.text+0x8f4): undefined reference to `crc32_le' arm-linux-gnueabi-ld: hid-sony.c:(.text+0x900): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/hid/hid-sony.o:hid-sony.c:(.text+0x4408): more undefined references to `crc32_le' follow Signed-off-by: Arnd Bergmann Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 7bdda1b5b221..09fa75a2b289 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -899,6 +899,7 @@ config HID_SONY depends on NEW_LEDS depends on LEDS_CLASS select POWER_SUPPLY + select CRC32 help Support for -- cgit 1.4.1 From 0e2d6795e8dbe91c2f5473564c6b25d11df3778b Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sun, 27 Dec 2020 12:17:16 +0900 Subject: USB: serial: option: add LongSung M5710 module support Add a device-id entry for the LongSung M5710 module. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2df3 ProdID=9d03 Rev= 1.00 S: Manufacturer=Marvell S: Product=Mobile Composite Device Bus S: SerialNumber= C:* #Ifs= 5 Cfg#= 1 Atr=c0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0c(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0a(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniel Palmer https://lore.kernel.org/r/20201227031716.1343300-1-daniel@0x0f.com [ johan: drop id defines, only bind to vendor class ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ee726a106706..3fe959104311 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2059,6 +2059,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ -- cgit 1.4.1 From 54d0a3ab80f49f19ee916def62fe067596833403 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 Jan 2021 15:50:07 +0100 Subject: USB: serial: iuu_phoenix: fix DMA from stack Stack-allocated buffers cannot be used for DMA (on all architectures) so allocate the flush command buffer using kmalloc(). Fixes: 60a8fc017103 ("USB: add iuu_phoenix driver") Cc: stable # 2.6.25 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/iuu_phoenix.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index f1201d4de297..e8f06b41a503 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -532,23 +532,29 @@ static int iuu_uart_flush(struct usb_serial_port *port) struct device *dev = &port->dev; int i; int status; - u8 rxcmd = IUU_UART_RX; + u8 *rxcmd; struct iuu_private *priv = usb_get_serial_port_data(port); if (iuu_led(port, 0xF000, 0, 0, 0xFF) < 0) return -EIO; + rxcmd = kmalloc(1, GFP_KERNEL); + if (!rxcmd) + return -ENOMEM; + + rxcmd[0] = IUU_UART_RX; + for (i = 0; i < 2; i++) { - status = bulk_immediate(port, &rxcmd, 1); + status = bulk_immediate(port, rxcmd, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_write error\n", __func__); - return status; + goto out_free; } status = read_immediate(port, &priv->len, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } if (priv->len > 0) { @@ -556,12 +562,16 @@ static int iuu_uart_flush(struct usb_serial_port *port) status = read_immediate(port, priv->buf, priv->len); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } } } dev_dbg(dev, "%s - uart_flush_read OK!\n", __func__); iuu_led(port, 0, 0xF000, 0, 0xFF); + +out_free: + kfree(rxcmd); + return status; } -- cgit 1.4.1 From 020a1f453449294926ca548d8d5ca970926e8dfd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 Jan 2021 15:53:02 +0100 Subject: USB: usblp: fix DMA to stack Stack-allocated buffers cannot be used for DMA (on all architectures). Replace the HP-channel macro with a helper function that allocates a dedicated transfer buffer so that it can continue to be used with arguments from the stack. Note that the buffer is cleared on allocation as usblp_ctrl_msg() returns success also on short transfers (the buffer is only used for debugging). Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210104145302.2087-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 67cbd42421be..134dc2005ce9 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -274,8 +274,25 @@ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, i #define usblp_reset(usblp)\ usblp_ctrl_msg(usblp, USBLP_REQ_RESET, USB_TYPE_CLASS, USB_DIR_OUT, USB_RECIP_OTHER, 0, NULL, 0) -#define usblp_hp_channel_change_request(usblp, channel, buffer) \ - usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, channel, buffer, 1) +static int usblp_hp_channel_change_request(struct usblp *usblp, int channel, u8 *new_channel) +{ + u8 *buf; + int ret; + + buf = kzalloc(1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, + USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, + channel, buf, 1); + if (ret == 0) + *new_channel = buf[0]; + + kfree(buf); + + return ret; +} /* * See the description for usblp_select_alts() below for the usage -- cgit 1.4.1 From 718bf42b119de652ebcc93655a1f33a9c0d04b3c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 23:13:09 -0800 Subject: usb: usbip: vhci_hcd: protect shift size Fix shift out-of-bounds in vhci_hcd.c: UBSAN: shift-out-of-bounds in ../drivers/usb/usbip/vhci_hcd.c:399:41 shift exponent 768 is too large for 32-bit type 'int' Fixes: 03cd00d538a6 ("usbip: vhci-hcd: Set the vhci structure up to work") Signed-off-by: Randy Dunlap Reported-by: syzbot+297d20e437b79283bf6d@syzkaller.appspotmail.com Cc: Yuyang Du Cc: Shuah Khan Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: stable Link: https://lore.kernel.org/r/20201229071309.18418-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 66cde5e5f796..3209b5ddd30c 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -396,6 +396,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n", wValue); + if (wValue >= 32) + goto error; vhci_hcd->port_status[rhport] &= ~(1 << wValue); break; } -- cgit 1.4.1 From a1383b3537a7bea1c213baa7878ccc4ecf4413b5 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 29 Dec 2020 15:00:37 -0800 Subject: usb: dwc3: gadget: Restart DWC3 gadget when enabling pullup usb_gadget_deactivate/usb_gadget_activate does not execute the UDC start operation, which may leave EP0 disabled and event IRQs disabled when re-activating the function. Move the enabling/disabling of USB EP0 and device event IRQs to be performed in the pullup routine. Fixes: ae7e86108b12 ("usb: dwc3: Stop active transfers before halting the controller") Tested-by: Michael Tretter Cc: stable Reported-by: Michael Tretter Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1609282837-21666-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 78cb4db8a6e4..25f654b79e48 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2083,6 +2083,7 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend) static void dwc3_gadget_disable_irq(struct dwc3 *dwc); static void __dwc3_gadget_stop(struct dwc3 *dwc); +static int __dwc3_gadget_start(struct dwc3 *dwc); static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) { @@ -2145,6 +2146,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) % dwc->ev_buf->length; } + } else { + __dwc3_gadget_start(dwc); } ret = dwc3_gadget_run_stop(dwc, is_on, false); @@ -2319,10 +2322,6 @@ static int dwc3_gadget_start(struct usb_gadget *g, } dwc->gadget_driver = driver; - - if (pm_runtime_active(dwc->dev)) - __dwc3_gadget_start(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); return 0; @@ -2348,13 +2347,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g) unsigned long flags; spin_lock_irqsave(&dwc->lock, flags); - - if (pm_runtime_suspended(dwc->dev)) - goto out; - - __dwc3_gadget_stop(dwc); - -out: dwc->gadget_driver = NULL; spin_unlock_irqrestore(&dwc->lock, flags); -- cgit 1.4.1 From 64e6bbfff52db4bf6785fab9cffab850b2de6870 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Tue, 29 Dec 2020 18:53:35 +0800 Subject: usb: gadget: configfs: Fix use-after-free issue with udc_name There is a use-after-free issue, if access udc_name in function gadget_dev_desc_UDC_store after another context free udc_name in function unregister_gadget. Context 1: gadget_dev_desc_UDC_store()->unregister_gadget()-> free udc_name->set udc_name to NULL Context 2: gadget_dev_desc_UDC_show()-> access udc_name Call trace: dump_backtrace+0x0/0x340 show_stack+0x14/0x1c dump_stack+0xe4/0x134 print_address_description+0x78/0x478 __kasan_report+0x270/0x2ec kasan_report+0x10/0x18 __asan_report_load1_noabort+0x18/0x20 string+0xf4/0x138 vsnprintf+0x428/0x14d0 sprintf+0xe4/0x12c gadget_dev_desc_UDC_show+0x54/0x64 configfs_read_file+0x210/0x3a0 __vfs_read+0xf0/0x49c vfs_read+0x130/0x2b4 SyS_read+0x114/0x208 el0_svc_naked+0x34/0x38 Add mutex_lock to protect this kind of scenario. Signed-off-by: Eddie Hung Signed-off-by: Macpaul Lin Reviewed-by: Peter Chen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1609239215-21819-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 56051bb97349..d9743f4b56c3 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -221,9 +221,16 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct config_item *item, static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page) { - char *udc_name = to_gadget_info(item)->composite.gadget_driver.udc_name; + struct gadget_info *gi = to_gadget_info(item); + char *udc_name; + int ret; + + mutex_lock(&gi->lock); + udc_name = gi->composite.gadget_driver.udc_name; + ret = sprintf(page, "%s\n", udc_name ?: ""); + mutex_unlock(&gi->lock); - return sprintf(page, "%s\n", udc_name ?: ""); + return ret; } static int unregister_gadget(struct gadget_info *gi) -- cgit 1.4.1 From 7043e311a57625467b6fdb032dec8a6dea878208 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 30 Dec 2020 13:11:14 +0800 Subject: usb: gadget: core: change the comment for usb_gadget_connect The pullup does not need to be enabled at below situations: - For platforms which the hardware pullup starts after setting register even they do not see the VBUS. If the pullup is always enabled for these platforms, it will consume more power and break the USB IF compliance tests [1]. - For platforms which need to do BC 1.2 charger detection after seeing the VBUS. Pullup D+ will break the charger detection flow. - For platforms which the system suspend is allowed when the connection with host is there but the bus is not at suspend. For these platforms, it is better to disable pullup when entering the system suspend otherwise the host may confuse the device behavior after controller is in low power mode. Disable pullup is considered as a disconnection event from host side. [1] USB-IF Full and Low Speed Compliance Test Procedure F Back-voltage Testing Section 7.2.1 of the USB specification requires that no device shall supply (source) current on VBUS at its upstream facing port at any time. From VBUS on its upstream facing port, a device may only draw (sink) current. They may not provide power to the pull-up resistor on D+/D- unless VBUS is present (see Section 7.1.5). Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/20201230051114.21370-1-peter.chen@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 5b5cfeb6c14a..6a62bbd01324 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -659,8 +659,7 @@ EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect); * * Enables the D+ (or potentially D-) pullup. The host will start * enumerating this gadget when the pullup is active and a VBUS session - * is active (the link is powered). This pullup is always enabled unless - * usb_gadget_disconnect() has been used to disable it. + * is active (the link is powered). * * Returns zero on success, else negative errno. */ -- cgit 1.4.1 From d7889c2020e08caab0d7e36e947f642d91015bd0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:42:17 +0100 Subject: usb: gadget: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_eem.o: in function `eem_unwrap': f_eem.c:(.text+0x11cc): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_ncm.o:f_ncm.c:(.text+0x1e40): more undefined references to `crc32_le' follow Fixes: 6d3865f9d41f ("usb: gadget: NCM: Add transmit multi-frame.") Signed-off-by: Arnd Bergmann Cc: stable Link: https://lore.kernel.org/r/20210103214224.1996535-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 7e47e6223089..2d152571a7de 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -265,6 +265,7 @@ config USB_CONFIGFS_NCM depends on NET select USB_U_ETHER select USB_F_NCM + select CRC32 help NCM is an advanced protocol for Ethernet encapsulation, allows grouping of several ethernet frames into one USB transfer and @@ -314,6 +315,7 @@ config USB_CONFIGFS_EEM depends on NET select USB_U_ETHER select USB_F_EEM + select CRC32 help CDC EEM is a newer USB standard that is somewhat simpler than CDC ECM and therefore can be supported by more hardware. Technically ECM and -- cgit 1.4.1 From 6cd0fe91387917be48e91385a572a69dfac2f3f7 Mon Sep 17 00:00:00 2001 From: Chandana Kishori Chiluveru Date: Tue, 29 Dec 2020 14:44:43 -0800 Subject: usb: gadget: configfs: Preserve function ordering after bind failure When binding the ConfigFS gadget to a UDC, the functions in each configuration are added in list order. However, if usb_add_function() fails, the failed function is put back on its configuration's func_list and purge_configs_funcs() is called to further clean up. purge_configs_funcs() iterates over the configurations and functions in forward order, calling unbind() on each of the previously added functions. But after doing so, each function gets moved to the tail of the configuration's func_list. This results in reshuffling the original order of the functions within a configuration such that the failed function now appears first even though it may have originally appeared in the middle or even end of the list. At this point if the ConfigFS gadget is attempted to re-bind to the UDC, the functions will be added in a different order than intended, with the only recourse being to remove and relink the functions all over again. An example of this as follows: ln -s functions/mass_storage.0 configs/c.1 ln -s functions/ncm.0 configs/c.1 ln -s functions/ffs.adb configs/c.1 # oops, forgot to start adbd echo "" > UDC # fails start adbd echo "" > UDC # now succeeds, but... # bind order is # "ADB", mass_storage, ncm [30133.118289] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520 [30133.119875] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520 [30133.119974] using random self ethernet address [30133.120002] using random host ethernet address [30133.139604] usb0: HOST MAC 3e:27:46:ba:3e:26 [30133.140015] usb0: MAC 6e:28:7e:42:66:6a [30133.140062] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520 [30133.140081] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 --> -19 [30133.140098] configfs-gadget gadget: unbind function 'Mass Storage Function'/ffffff810af87200 [30133.140119] configfs-gadget gadget: unbind function 'cdc_network'/ffffff80f48d1a00 [30133.173201] configfs-gadget a600000.dwc3: failed to start g1: -19 [30136.661933] init: starting service 'adbd'... [30136.700126] read descriptors [30136.700413] read strings [30138.574484] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520 [30138.575497] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520 [30138.575554] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520 [30138.575631] using random self ethernet address [30138.575660] using random host ethernet address [30138.595338] usb0: HOST MAC 2e:cf:43:cd:ca:c8 [30138.597160] usb0: MAC 6a:f0:9f:ee:82:a0 [30138.791490] configfs-gadget gadget: super-speed config #1: c Fix this by reversing the iteration order of the functions in purge_config_funcs() when unbinding them, and adding them back to the config's func_list at the head instead of the tail. This ensures that we unbind and unwind back to the original list order. Fixes: 88af8bbe4ef7 ("usb: gadget: the start of the configfs interface") Signed-off-by: Chandana Kishori Chiluveru Signed-off-by: Jack Pham Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20201229224443.31623-1-jackp@codeaurora.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index d9743f4b56c3..408a5332a975 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1255,9 +1255,9 @@ static void purge_configs_funcs(struct gadget_info *gi) cfg = container_of(c, struct config_usb_cfg, c); - list_for_each_entry_safe(f, tmp, &c->functions, list) { + list_for_each_entry_safe_reverse(f, tmp, &c->functions, list) { - list_move_tail(&f->list, &cfg->func_list); + list_move(&f->list, &cfg->func_list); if (f->unbind) { dev_dbg(&gi->cdev.gadget->dev, "unbind function '%s'/%p\n", -- cgit 1.4.1 From e1263f9277bad198c2acc8092a41aea1edbea0e4 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 4 Jan 2021 15:20:45 +0100 Subject: dmaengine: stm32-mdma: fix STM32_MDMA_VERY_HIGH_PRIORITY value STM32_MDMA_VERY_HIGH_PRIORITY is b11 not 0x11, so fix it with 0x3. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20210104142045.25583-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index e4637ec786d3..36ba8b43e78d 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -199,7 +199,7 @@ #define STM32_MDMA_MAX_CHANNELS 63 #define STM32_MDMA_MAX_REQUESTS 256 #define STM32_MDMA_MAX_BURST 128 -#define STM32_MDMA_VERY_HIGH_PRIORITY 0x11 +#define STM32_MDMA_VERY_HIGH_PRIORITY 0x3 enum stm32_mdma_trigger_mode { STM32_MDMA_BUFFER, -- cgit 1.4.1 From 65a4e5299739abe0888cda0938d21f8ea3b5c606 Mon Sep 17 00:00:00 2001 From: David Gow Date: Mon, 21 Dec 2020 23:39:00 -0800 Subject: kunit: tool: Force the use of the 'tty' console for UML kunit_tool relies on the UML console outputting printk() output to the tty in order to get results. Since the default console driver could change, pass 'console=tty' to the kernel. This is triggered by a change[1] to use ttynull as a fallback console driver which -- by chance or by design -- seems to have changed the default console output on UML, breaking kunit_tool. While this may be fixed, we should be less fragile to such changes in the default. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=757055ae8dedf5333af17b3b5b4b70ba9bc9da4e Signed-off-by: David Gow Fixes: 757055ae8ded ("init/console: Use ttynull as a fallback when there is no console") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 57c1724b7e5d..698358c9c0d6 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -198,7 +198,7 @@ class LinuxSourceTree(object): return self.validate_config(build_dir) def run_kernel(self, args=[], build_dir='', timeout=None): - args.extend(['mem=1G']) + args.extend(['mem=1G', 'console=tty']) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) -- cgit 1.4.1 From 3b4cf848dad5dad4bf239ba664c809c8cf29f1ed Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:31:40 +0100 Subject: selftests/vDSO: add additional binaries to .gitignore Add the test binaries introduced by commit 693f5ca08ca0 ("kselftest: Extend vDSO selftest"), commit 03f55c7952c9 ("kselftest: Extend vDSO selftest to clock_getres") and commit c7e5789b24d3 ("kselftest: Move test_vdso to the vDSO test suite") to .gitignore. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 5eb64d41e541..a8dc51af5a9c 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test +vdso_test_abi +vdso_test_clock_getres +vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu vdso_standalone_test_x86 -- cgit 1.4.1 From df00d02989024d193a6efd1a85513a5658c6a10f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:32:35 +0100 Subject: selftests/vDSO: fix -Wformat warning in vdso_test_correctness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following -Wformat warnings in vdso_test_correctness.c: vdso_test_correctness.c: In function ‘test_one_clock_gettime64’: vdso_test_correctness.c:352:21: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 3 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, | ~~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:32: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 5 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, 354 | (unsigned long long)vdso.tv_sec, vdso.tv_nsec, | ~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:43: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 7 has type ‘long long int’ [-Wformat=] The tv_sec member of __kernel_timespec is long long, both in uapi/linux/time_types.h and locally in vdso_test_correctness.c. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_correctness.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 5029ef9b228c..c4aea794725a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -349,7 +349,7 @@ static void test_one_clock_gettime64(int clock, const char *name) return; } - printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + printf("\t%llu.%09lld %llu.%09lld %llu.%09lld\n", (unsigned long long)start.tv_sec, start.tv_nsec, (unsigned long long)vdso.tv_sec, vdso.tv_nsec, (unsigned long long)end.tv_sec, end.tv_nsec); -- cgit 1.4.1 From 7a6eb7c34a78498742b5f82543b7a68c1c443329 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Dec 2020 18:52:33 +0000 Subject: selftests: Skip BPF seftests by default The BPF selftests have build time dependencies on cutting edge versions of tools in the BPF ecosystem including LLVM which are more involved to satisfy than more typical requirements like installing a package from your distribution. This causes issues for users looking at kselftest in as a whole who find that a default build of kselftest fails and that resolving this is time consuming and adds administrative overhead. The fast pace of BPF development and the need for a full BPF stack to do substantial development or validation work on the code mean that people working directly on it don't see a reasonable way to keep supporting older environments without causing problems with the usability of the BPF tests in BPF development so these requirements are unlikely to be relaxed in the immediate future. There is already support for skipping targets so in order to reduce the barrier to entry for people interested in kselftest as a whole let's use that to skip the BPF tests by default when people work with the top level kselftest build system. Users can still build the BPF selftests as part of the wider kselftest build by specifying SKIP_TARGETS, including setting an empty SKIP_TARGETS to build everything. They can also continue to build the BPF selftests individually in cases where they are specifically focused on BPF. This isn't ideal since it means people will need to take special steps to build the BPF tests but the dependencies mean that realistically this is already the case to some extent and it makes it easier for people to pick up and work with the other selftests which is hopefully a net win. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index afbab4aeef3c..8a917cb4426a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -77,8 +77,10 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug -# User can optionally provide a TARGETS skiplist. -SKIP_TARGETS ?= +# User can optionally provide a TARGETS skiplist. By default we skip +# BPF since it has cutting edge build time dependencies which require +# more effort to install. +SKIP_TARGETS ?= bpf ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) -- cgit 1.4.1 From 8cbebc4118b5933b3ae6351ceb433f75ac6b7c6b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Jan 2021 16:50:16 +0000 Subject: KVM: arm64: Replace KVM_ARM_PMU with HW_PERF_EVENTS KVM_ARM_PMU only existed for the benefit of 32bit ARM hosts, and makes no sense now that we are 64bit only. Get rid of it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/Kconfig | 8 -------- arch/arm64/kvm/Makefile | 2 +- include/kvm/arm_pmu.h | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 043756db8f6e..3964acf5451e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -49,14 +49,6 @@ if KVM source "virt/kvm/Kconfig" -config KVM_ARM_PMU - bool "Virtual Performance Monitoring Unit (PMU) support" - depends on HW_PERF_EVENTS - default y - help - Adds support for a virtual Performance Monitoring Unit (PMU) in - virtual machines. - endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 60fd181df624..13b017284bf9 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,4 +24,4 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o +kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index fc85f50fa0e9..8dcb3e1477bc 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,7 +13,7 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) -#ifdef CONFIG_KVM_ARM_PMU +#ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ -- cgit 1.4.1 From 0b884fe71f9ee6a5df35e677154256ea2099ebb8 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 14 Dec 2020 12:58:50 +0800 Subject: i2c: sprd: use a specific timeout to avoid system hang up issue If the i2c device SCL bus being pulled up due to some exception before message transfer done, the system cannot receive the completing interrupt signal any more, it would not exit waiting loop until MAX_SCHEDULE_TIMEOUT jiffies eclipse, that would make the system seemed hang up. To avoid that happen, this patch adds a specific timeout for message transfer. Fixes: 8b9ec0719834 ("i2c: Add Spreadtrum I2C controller driver") Signed-off-by: Linhua Xu Signed-off-by: Chunyan Zhang [wsa: changed errno to ETIMEDOUT] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sprd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 19cda6742423..2917fecf6c80 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -72,6 +72,8 @@ /* timeout (ms) for pm runtime autosuspend */ #define SPRD_I2C_PM_TIMEOUT 1000 +/* timeout (ms) for transfer message */ +#define I2C_XFER_TIMEOUT 1000 /* SPRD i2c data structure */ struct sprd_i2c { @@ -244,6 +246,7 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, struct i2c_msg *msg, bool is_last_msg) { struct sprd_i2c *i2c_dev = i2c_adap->algo_data; + unsigned long time_left; i2c_dev->msg = msg; i2c_dev->buf = msg->buf; @@ -273,7 +276,10 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, sprd_i2c_opt_start(i2c_dev); - wait_for_completion(&i2c_dev->complete); + time_left = wait_for_completion_timeout(&i2c_dev->complete, + msecs_to_jiffies(I2C_XFER_TIMEOUT)); + if (!time_left) + return -ETIMEDOUT; return i2c_dev->err; } -- cgit 1.4.1 From 0b3ea2a06de1f52ea30865e227e109a5fd3b6214 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 21 Dec 2020 14:42:25 +0100 Subject: i2c: i801: Fix the i2c-mux gpiod_lookup_table not being properly terminated gpiod_add_lookup_table() expects the gpiod_lookup_table->table passed to it to be terminated with a zero-ed out entry. So we need to allocate one more entry then we will use. Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing") Signed-off-by: Hans de Goede Reviewed-by: Mika Westerberg Acked-by: Jean Delvare Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ae90713443fa..877fe3733a42 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1449,7 +1449,7 @@ static int i801_add_mux(struct i801_priv *priv) /* Register GPIO descriptor lookup table */ lookup = devm_kzalloc(dev, - struct_size(lookup, table, mux_config->n_gpios), + struct_size(lookup, table, mux_config->n_gpios + 1), GFP_KERNEL); if (!lookup) return -ENOMEM; -- cgit 1.4.1 From cc07d72bf350b77faeffee1c37bc52197171473f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 24 Sep 2020 13:14:52 -0400 Subject: dm raid: fix discard limits for raid1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Block core warned that discard_granularity was 0 for dm-raid with personality of raid1. Reason is that raid_io_hints() was incorrectly special-casing raid1 rather than raid0. Fix raid_io_hints() by removing discard limits settings for raid1. Check for raid0 instead. Fixes: 61697a6abd24a ("dm: eliminate 'split_discard_bios' flag from DM target interface") Cc: stable@vger.kernel.org Reported-by: Zdenek Kabelac Reported-by: Mikulas Patocka Reported-by: Stephan Bärwolf Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 23c38777e8f6..cab12b2251ba 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3729,10 +3729,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); /* - * RAID1 and RAID10 personalities require bio splitting, - * RAID0/4/5/6 don't and process large discard bios properly. + * RAID0 and RAID10 personalities require bio splitting, + * RAID1/4/5/6 don't and process large discard bios properly. */ - if (rs_is_raid1(rs) || rs_is_raid10(rs)) { + if (rs_is_raid0(rs) || rs_is_raid10(rs)) { limits->discard_granularity = chunk_size_bytes; limits->max_discard_sectors = rs->md.chunk_sectors; } -- cgit 1.4.1 From f7b347acb5f6c29d9229bb64893d8b6a2c7949fb Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Mon, 14 Dec 2020 18:18:11 +0100 Subject: dm integrity: select CRYPTO_SKCIPHER The integrity target relies on skcipher for encryption/decryption, but certain kernel configurations may not enable CRYPTO_SKCIPHER, leading to compilation errors due to unresolved symbols. Explicitly select CRYPTO_SKCIPHER for DM_INTEGRITY, since it is unconditionally dependent on it. Signed-off-by: Anthony Iliopoulos Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b7e2d9666614..a378a8967094 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -605,6 +605,7 @@ config DM_INTEGRITY select BLK_DEV_INTEGRITY select DM_BUFIO select CRYPTO + select CRYPTO_SKCIPHER select ASYNC_XOR help This device-mapper target emulates a block device that has -- cgit 1.4.1 From b690bd546b227c32b860dae985a18bed8aa946fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:40:51 +0100 Subject: dm zoned: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/md/dm-zoned-metadata.o: in function `dmz_write_sb': dm-zoned-metadata.c:(.text+0xe98): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/md/dm-zoned-metadata.o: in function `dmz_check_sb': dm-zoned-metadata.c:(.text+0x7978): undefined reference to `crc32_le' Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Signed-off-by: Arnd Bergmann Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index a378a8967094..9e44c09f6410 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -623,6 +623,7 @@ config DM_ZONED tristate "Drive-managed zoned block device target support" depends on BLK_DEV_DM depends on BLK_DEV_ZONED + select CRC32 help This device-mapper target takes a host-managed or host-aware zoned block device and exposes most of its capacity as a regular block -- cgit 1.4.1 From 8abec36d1274bbd5ae8f36f3658b9abb3db56c31 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 4 Jan 2021 14:59:47 +0000 Subject: dm crypt: do not wait for backlogged crypto request completion in softirq Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. When Crypto API backlogs a crypto request, dm-crypt uses wait_for_completion to avoid sending further requests to an already overloaded crypto driver. However, if the code is executing in softirq context, we might get the following stacktrace: [ 210.235213][ C0] BUG: scheduling while atomic: fio/2602/0x00000102 [ 210.236701][ C0] Modules linked in: [ 210.237566][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.239292][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.241233][ C0] Call Trace: [ 210.241946][ C0] [ 210.242561][ C0] dump_stack+0x7d/0xa3 [ 210.243466][ C0] __schedule_bug.cold+0xb3/0xc2 [ 210.244539][ C0] __schedule+0x156f/0x20d0 [ 210.245518][ C0] ? io_schedule_timeout+0x140/0x140 [ 210.246660][ C0] schedule+0xd0/0x270 [ 210.247541][ C0] schedule_timeout+0x1fb/0x280 [ 210.248586][ C0] ? usleep_range+0x150/0x150 [ 210.249624][ C0] ? unpoison_range+0x3a/0x60 [ 210.250632][ C0] ? ____kasan_kmalloc.constprop.0+0x82/0xa0 [ 210.251949][ C0] ? unpoison_range+0x3a/0x60 [ 210.252958][ C0] ? __prepare_to_swait+0xa7/0x190 [ 210.254067][ C0] do_wait_for_common+0x2ab/0x370 [ 210.255158][ C0] ? usleep_range+0x150/0x150 [ 210.256192][ C0] ? bit_wait_io_timeout+0x160/0x160 [ 210.257358][ C0] ? blk_update_request+0x757/0x1150 [ 210.258582][ C0] ? _raw_spin_lock_irq+0x82/0xd0 [ 210.259674][ C0] ? _raw_read_unlock_irqrestore+0x30/0x30 [ 210.260917][ C0] wait_for_completion+0x4c/0x90 [ 210.261971][ C0] crypt_convert+0x19a6/0x4c00 [ 210.263033][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.264193][ C0] ? kasan_set_track+0x1c/0x30 [ 210.265191][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.266283][ C0] ? kmem_cache_free+0x104/0x470 [ 210.267363][ C0] ? crypt_endio+0x91/0x180 [ 210.268327][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.269565][ C0] blk_update_request+0x757/0x1150 [ 210.270563][ C0] blk_mq_end_request+0x4b/0x480 [ 210.271680][ C0] blk_done_softirq+0x21d/0x340 [ 210.272775][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.273847][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.275031][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.276182][ C0] __do_softirq+0x190/0x611 [ 210.277203][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.278340][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.279514][ C0] [ 210.280164][ C0] do_softirq_own_stack+0x37/0x40 [ 210.281281][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.282286][ C0] common_interrupt+0x74/0x120 [ 210.283376][ C0] asm_common_interrupt+0x1e/0x40 [ 210.284496][ C0] RIP: 0010:_aesni_enc1+0x65/0xb0 Fix this by making crypt_convert function reentrant from the point of a single bio and make dm-crypt defer further bio processing to a workqueue, if Crypto API backlogs a request in interrupt context. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53791138d78b..527b1475b7a0 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1529,13 +1529,19 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ * Encrypt / decrypt data from one bio to another one (can be the same one) */ static blk_status_t crypt_convert(struct crypt_config *cc, - struct convert_context *ctx, bool atomic) + struct convert_context *ctx, bool atomic, bool reset_pending) { unsigned int tag_offset = 0; unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; int r; - atomic_set(&ctx->cc_pending, 1); + /* + * if reset_pending is set we are dealing with the bio for the first time, + * else we're continuing to work on the previous bio, so don't mess with + * the cc_pending counter + */ + if (reset_pending) + atomic_set(&ctx->cc_pending, 1); while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { @@ -1553,7 +1559,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc, * but the driver request queue is full, let's wait. */ case -EBUSY: - wait_for_completion(&ctx->restart); + if (in_interrupt()) { + if (try_wait_for_completion(&ctx->restart)) { + /* + * we don't have to block to wait for completion, + * so proceed + */ + } else { + /* + * we can't wait for completion without blocking + * exit and continue processing in a workqueue + */ + ctx->r.req = NULL; + ctx->cc_sector += sector_step; + tag_offset++; + return BLK_STS_DEV_RESOURCE; + } + } else { + wait_for_completion(&ctx->restart); + } reinit_completion(&ctx->restart); fallthrough; /* @@ -1945,6 +1969,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc, } } +static void kcryptd_crypt_write_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + struct convert_context *ctx = &io->ctx; + int crypt_finished; + sector_t sector = io->sector; + blk_status_t r; + + wait_for_completion(&ctx->restart); + reinit_completion(&ctx->restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + crypt_finished = atomic_dec_and_test(&ctx->cc_pending); + if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) { + /* Wait for completion signaled by kcryptd_async_done() */ + wait_for_completion(&ctx->restart); + crypt_finished = 1; + } + + /* Encryption was already finished, submit io now */ + if (crypt_finished) { + kcryptd_crypt_write_io_submit(io, 0); + io->sector = sector; + } + + crypt_dec_pending(io); +} + static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -1973,7 +2028,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); r = crypt_convert(cc, ctx, - test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + * (TODO: is it actually possible to be in softirq in the write path?) + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_write_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; crypt_finished = atomic_dec_and_test(&ctx->cc_pending); @@ -1998,6 +2063,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) crypt_dec_pending(io); } +static void kcryptd_crypt_read_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + blk_status_t r; + + wait_for_completion(&io->ctx.restart); + reinit_completion(&io->ctx.restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + + if (atomic_dec_and_test(&io->ctx.cc_pending)) + kcryptd_crypt_read_done(io); + + crypt_dec_pending(io); +} + static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2009,7 +2093,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) io->sector); r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_read_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; -- cgit 1.4.1 From d68b29584c25dbacd01ed44a3e45abb35353f1de Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 4 Jan 2021 14:59:48 +0000 Subject: dm crypt: use GFP_ATOMIC when allocating crypto requests from softirq Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. In this case sometimes when allocating a new crypto request we may get a stacktrace like below: [ 210.103008][ C0] BUG: sleeping function called from invalid context at mm/mempool.c:381 [ 210.104746][ C0] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2602, name: fio [ 210.106599][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.108331][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.110212][ C0] Call Trace: [ 210.110921][ C0] [ 210.111527][ C0] dump_stack+0x7d/0xa3 [ 210.112411][ C0] ___might_sleep.cold+0x122/0x151 [ 210.113527][ C0] mempool_alloc+0x16b/0x2f0 [ 210.114524][ C0] ? __queue_work+0x515/0xde0 [ 210.115553][ C0] ? mempool_resize+0x700/0x700 [ 210.116586][ C0] ? crypt_endio+0x91/0x180 [ 210.117479][ C0] ? blk_update_request+0x757/0x1150 [ 210.118513][ C0] ? blk_mq_end_request+0x4b/0x480 [ 210.119572][ C0] ? blk_done_softirq+0x21d/0x340 [ 210.120628][ C0] ? __do_softirq+0x190/0x611 [ 210.121626][ C0] crypt_convert+0x29f9/0x4c00 [ 210.122668][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.123824][ C0] ? kasan_set_track+0x1c/0x30 [ 210.124858][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.125930][ C0] ? kmem_cache_free+0x104/0x470 [ 210.126973][ C0] ? crypt_endio+0x91/0x180 [ 210.127947][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.129165][ C0] blk_update_request+0x757/0x1150 [ 210.130231][ C0] blk_mq_end_request+0x4b/0x480 [ 210.131294][ C0] blk_done_softirq+0x21d/0x340 [ 210.132332][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.133289][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.134399][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.135458][ C0] __do_softirq+0x190/0x611 [ 210.136409][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.137447][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.138507][ C0] [ 210.139118][ C0] do_softirq_own_stack+0x37/0x40 [ 210.140191][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.141151][ C0] common_interrupt+0x74/0x120 [ 210.142171][ C0] asm_common_interrupt+0x1e/0x40 Fix this by allocating crypto requests with GFP_ATOMIC mask in interrupt context. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Reported-by: Maciej S. Szmigiero Signed-off-by: Ignat Korchagin Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 527b1475b7a0..d2d6d3000f5b 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, static void kcryptd_async_done(struct crypto_async_request *async_req, int error); -static void crypt_alloc_req_skcipher(struct crypt_config *cc, +static int crypt_alloc_req_skcipher(struct crypt_config *cc, struct convert_context *ctx) { unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); - if (!ctx->r.req) - ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + } skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); @@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, skcipher_request_set_callback(ctx->r.req, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); + + return 0; } -static void crypt_alloc_req_aead(struct crypt_config *cc, +static int crypt_alloc_req_aead(struct crypt_config *cc, struct convert_context *ctx) { - if (!ctx->r.req_aead) - ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + } aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]); @@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, aead_request_set_callback(ctx->r.req_aead, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); + + return 0; } -static void crypt_alloc_req(struct crypt_config *cc, +static int crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { if (crypt_integrity_aead(cc)) - crypt_alloc_req_aead(cc, ctx); + return crypt_alloc_req_aead(cc, ctx); else - crypt_alloc_req_skcipher(cc, ctx); + return crypt_alloc_req_skcipher(cc, ctx); } static void crypt_free_req_skcipher(struct crypt_config *cc, @@ -1545,7 +1555,12 @@ static blk_status_t crypt_convert(struct crypt_config *cc, while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { - crypt_alloc_req(cc, ctx); + r = crypt_alloc_req(cc, ctx); + if (r) { + complete(&ctx->restart); + return BLK_STS_DEV_RESOURCE; + } + atomic_inc(&ctx->cc_pending); if (crypt_integrity_aead(cc)) -- cgit 1.4.1 From a0a6df9afcaf439a6b4c88a3b522e3d05fdef46f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Jan 2021 15:25:34 -0500 Subject: umount(2): move the flag validity checks first Unfortunately, there's userland code that used to rely upon these checks being done before anything else to check for UMOUNT_NOFOLLOW support. That broke in 41525f56e256 ("fs: refactor ksys_umount"). Separate those from the rest of checks and move them to ksys_umount(); unlike everything else in there, this can be sanely done there. Reported-by: Sargun Dhillon Fixes: 41525f56e256 ("fs: refactor ksys_umount") Signed-off-by: Al Viro --- fs/namespace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d2db7dfe232b..9d33909d0f9e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1713,8 +1713,6 @@ static int can_umount(const struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); - if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) - return -EINVAL; if (!may_mount()) return -EPERM; if (path->dentry != path->mnt->mnt_root) @@ -1728,6 +1726,7 @@ static int can_umount(const struct path *path, int flags) return 0; } +// caller is responsible for flags being sane int path_umount(struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); @@ -1749,6 +1748,10 @@ static int ksys_umount(char __user *name, int flags) struct path path; int ret; + // basic validity checks done first + if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) + return -EINVAL; + if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); -- cgit 1.4.1 From d9e44981739a96f1a468c13bbbd54ace378caf1c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 28 Dec 2020 15:21:36 +0000 Subject: bareudp: set NETIF_F_LLTX flag Like other tunneling interfaces, the bareudp doesn't need TXLOCK. So, It is good to set the NETIF_F_LLTX flag to improve performance and to avoid lockdep's false-positive warning. Test commands: ip netns add A ip netns add B ip link add veth0 netns A type veth peer name veth1 netns B ip netns exec A ip link set veth0 up ip netns exec A ip a a 10.0.0.1/24 dev veth0 ip netns exec B ip link set veth1 up ip netns exec B ip a a 10.0.0.2/24 dev veth1 for i in {2..1} do let A=$i-1 ip netns exec A ip link add bareudp$i type bareudp \ dstport $i ethertype ip ip netns exec A ip link set bareudp$i up ip netns exec A ip a a 10.0.$i.1/24 dev bareudp$i ip netns exec A ip r a 10.0.$i.2 encap ip src 10.0.$A.1 \ dst 10.0.$A.2 via 10.0.$i.2 dev bareudp$i ip netns exec B ip link add bareudp$i type bareudp \ dstport $i ethertype ip ip netns exec B ip link set bareudp$i up ip netns exec B ip a a 10.0.$i.2/24 dev bareudp$i ip netns exec B ip r a 10.0.$i.1 encap ip src 10.0.$A.2 \ dst 10.0.$A.1 via 10.0.$i.1 dev bareudp$i done ip netns exec A ping 10.0.2.2 Splat looks like: [ 96.992803][ T822] ============================================ [ 96.993954][ T822] WARNING: possible recursive locking detected [ 96.995102][ T822] 5.10.0+ #819 Not tainted [ 96.995927][ T822] -------------------------------------------- [ 96.997091][ T822] ping/822 is trying to acquire lock: [ 96.998083][ T822] ffff88810f753898 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960 [ 96.999813][ T822] [ 96.999813][ T822] but task is already holding lock: [ 97.001192][ T822] ffff88810c385498 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960 [ 97.002908][ T822] [ 97.002908][ T822] other info that might help us debug this: [ 97.004401][ T822] Possible unsafe locking scenario: [ 97.004401][ T822] [ 97.005784][ T822] CPU0 [ 97.006407][ T822] ---- [ 97.007010][ T822] lock(_xmit_NONE#2); [ 97.007779][ T822] lock(_xmit_NONE#2); [ 97.008550][ T822] [ 97.008550][ T822] *** DEADLOCK *** [ 97.008550][ T822] [ 97.010057][ T822] May be due to missing lock nesting notation [ 97.010057][ T822] [ 97.011594][ T822] 7 locks held by ping/822: [ 97.012426][ T822] #0: ffff888109a144f0 (sk_lock-AF_INET){+.+.}-{0:0}, at: raw_sendmsg+0x12f7/0x2b00 [ 97.014191][ T822] #1: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: ip_finish_output2+0x249/0x2020 [ 97.016045][ T822] #2: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x1fd/0x2960 [ 97.017897][ T822] #3: ffff88810c385498 (_xmit_NONE#2){+.-.}-{2:2}, at: __dev_queue_xmit+0x1f52/0x2960 [ 97.019684][ T822] #4: ffffffffbce2f600 (rcu_read_lock){....}-{1:2}, at: bareudp_xmit+0x31b/0x3690 [bareudp] [ 97.021573][ T822] #5: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: ip_finish_output2+0x249/0x2020 [ 97.023424][ T822] #6: ffffffffbce2f5a0 (rcu_read_lock_bh){....}-{1:2}, at: __dev_queue_xmit+0x1fd/0x2960 [ 97.025259][ T822] [ 97.025259][ T822] stack backtrace: [ 97.026349][ T822] CPU: 3 PID: 822 Comm: ping Not tainted 5.10.0+ #819 [ 97.027609][ T822] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 97.029407][ T822] Call Trace: [ 97.030015][ T822] dump_stack+0x99/0xcb [ 97.030783][ T822] __lock_acquire.cold.77+0x149/0x3a9 [ 97.031773][ T822] ? stack_trace_save+0x81/0xa0 [ 97.032661][ T822] ? register_lock_class+0x1910/0x1910 [ 97.033673][ T822] ? register_lock_class+0x1910/0x1910 [ 97.034679][ T822] ? rcu_read_lock_sched_held+0x91/0xc0 [ 97.035697][ T822] ? rcu_read_lock_bh_held+0xa0/0xa0 [ 97.036690][ T822] lock_acquire+0x1b2/0x730 [ 97.037515][ T822] ? __dev_queue_xmit+0x1f52/0x2960 [ 97.038466][ T822] ? check_flags+0x50/0x50 [ 97.039277][ T822] ? netif_skb_features+0x296/0x9c0 [ 97.040226][ T822] ? validate_xmit_skb+0x29/0xb10 [ 97.041151][ T822] _raw_spin_lock+0x30/0x70 [ 97.041977][ T822] ? __dev_queue_xmit+0x1f52/0x2960 [ 97.042927][ T822] __dev_queue_xmit+0x1f52/0x2960 [ 97.043852][ T822] ? netdev_core_pick_tx+0x290/0x290 [ 97.044824][ T822] ? mark_held_locks+0xb7/0x120 [ 97.045712][ T822] ? lockdep_hardirqs_on_prepare+0x12c/0x3e0 [ 97.046824][ T822] ? __local_bh_enable_ip+0xa5/0xf0 [ 97.047771][ T822] ? ___neigh_create+0x12a8/0x1eb0 [ 97.048710][ T822] ? trace_hardirqs_on+0x41/0x120 [ 97.049626][ T822] ? ___neigh_create+0x12a8/0x1eb0 [ 97.050556][ T822] ? __local_bh_enable_ip+0xa5/0xf0 [ 97.051509][ T822] ? ___neigh_create+0x12a8/0x1eb0 [ 97.052443][ T822] ? check_chain_key+0x244/0x5f0 [ 97.053352][ T822] ? rcu_read_lock_bh_held+0x56/0xa0 [ 97.054317][ T822] ? ip_finish_output2+0x6ea/0x2020 [ 97.055263][ T822] ? pneigh_lookup+0x410/0x410 [ 97.056135][ T822] ip_finish_output2+0x6ea/0x2020 [ ... ] Acked-by: Guillaume Nault Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20201228152136.24215-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 85ebd2b7e446..aea10196c222 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -534,6 +534,7 @@ static void bareudp_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &bareudp_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; -- cgit 1.4.1 From 10ad3e998fa0c25315f27cf3002ff8b02dc31c38 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 28 Dec 2020 15:21:46 +0000 Subject: bareudp: Fix use of incorrect min_headroom size In the bareudp6_xmit_skb(), it calculates min_headroom. At that point, it uses struct iphdr, but it's not correct. So panic could occur. The struct ipv6hdr should be used. Test commands: ip netns add A ip netns add B ip link add veth0 netns A type veth peer name veth1 netns B ip netns exec A ip link set veth0 up ip netns exec A ip a a 2001:db8:0::1/64 dev veth0 ip netns exec B ip link set veth1 up ip netns exec B ip a a 2001:db8:0::2/64 dev veth1 for i in {10..1} do let A=$i-1 ip netns exec A ip link add bareudp$i type bareudp dstport $i \ ethertype 0x86dd ip netns exec A ip link set bareudp$i up ip netns exec A ip -6 a a 2001:db8:$i::1/64 dev bareudp$i ip netns exec A ip -6 r a 2001:db8:$i::2 encap ip6 src \ 2001:db8:$A::1 dst 2001:db8:$A::2 via 2001:db8:$i::2 \ dev bareudp$i ip netns exec B ip link add bareudp$i type bareudp dstport $i \ ethertype 0x86dd ip netns exec B ip link set bareudp$i up ip netns exec B ip -6 a a 2001:db8:$i::2/64 dev bareudp$i ip netns exec B ip -6 r a 2001:db8:$i::1 encap ip6 src \ 2001:db8:$A::2 dst 2001:db8:$A::1 via 2001:db8:$i::1 \ dev bareudp$i done ip netns exec A ping 2001:db8:7::2 Splat looks like: [ 66.436679][ C2] skbuff: skb_under_panic: text:ffffffff928614c8 len:454 put:14 head:ffff88810abb4000 data:ffff88810abb3ffa tail:0x1c0 end:0x3ec0 dev:veth0 [ 66.441626][ C2] ------------[ cut here ]------------ [ 66.443458][ C2] kernel BUG at net/core/skbuff.c:109! [ 66.445313][ C2] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 66.447606][ C2] CPU: 2 PID: 913 Comm: ping Not tainted 5.10.0+ #819 [ 66.450251][ C2] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 66.453713][ C2] RIP: 0010:skb_panic+0x15d/0x15f [ 66.455345][ C2] Code: 98 fe 4c 8b 4c 24 10 53 8b 4d 70 45 89 e0 48 c7 c7 60 8b 78 93 41 57 41 56 41 55 48 8b 54 24 20 48 8b 74 24 28 e8 b5 40 f9 ff <0f> 0b 48 8b 6c 24 20 89 34 24 e8 08 c9 98 fe 8b 34 24 48 c7 c1 80 [ 66.462314][ C2] RSP: 0018:ffff888119209648 EFLAGS: 00010286 [ 66.464281][ C2] RAX: 0000000000000089 RBX: ffff888003159000 RCX: 0000000000000000 [ 66.467216][ C2] RDX: 0000000000000089 RSI: 0000000000000008 RDI: ffffed10232412c0 [ 66.469768][ C2] RBP: ffff88810a53d440 R08: ffffed102328018d R09: ffffed102328018d [ 66.472297][ C2] R10: ffff888119400c67 R11: ffffed102328018c R12: 000000000000000e [ 66.474833][ C2] R13: ffff88810abb3ffa R14: 00000000000001c0 R15: 0000000000003ec0 [ 66.477361][ C2] FS: 00007f37c0c72f00(0000) GS:ffff888119200000(0000) knlGS:0000000000000000 [ 66.480214][ C2] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 66.482296][ C2] CR2: 000055a058808570 CR3: 000000011039e002 CR4: 00000000003706e0 [ 66.484811][ C2] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 66.487793][ C2] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 66.490424][ C2] Call Trace: [ 66.491469][ C2] [ 66.492374][ C2] ? eth_header+0x28/0x190 [ 66.494054][ C2] ? eth_header+0x28/0x190 [ 66.495401][ C2] skb_push.cold.99+0x22/0x22 [ 66.496700][ C2] eth_header+0x28/0x190 [ 66.497867][ C2] neigh_resolve_output+0x3de/0x720 [ 66.499615][ C2] ? __neigh_update+0x7e8/0x20a0 [ 66.501176][ C2] __neigh_update+0x8bd/0x20a0 [ 66.502749][ C2] ndisc_update+0x34/0xc0 [ 66.504010][ C2] ndisc_recv_na+0x8da/0xb80 [ 66.505041][ C2] ? pndisc_redo+0x20/0x20 [ 66.505888][ C2] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 66.506965][ C2] ndisc_rcv+0x3a0/0x470 [ 66.507797][ C2] icmpv6_rcv+0xad9/0x1b00 [ 66.508645][ C2] ip6_protocol_deliver_rcu+0xcd6/0x1560 [ 66.509719][ C2] ip6_input_finish+0x5b/0xf0 [ 66.510615][ C2] ip6_input+0xcd/0x2d0 [ 66.511406][ C2] ? ip6_input_finish+0xf0/0xf0 [ 66.512327][ C2] ? rcu_read_lock_held+0x91/0xa0 [ 66.513279][ C2] ? ip6_protocol_deliver_rcu+0x1560/0x1560 [ 66.514414][ C2] ipv6_rcv+0xe8/0x300 [ ... ] Acked-by: Guillaume Nault Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Taehee Yoo Link: https://lore.kernel.org/r/20201228152146.24270-1-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index aea10196c222..708171c0d628 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -380,7 +380,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, goto free_dst; min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + - BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct ipv6hdr); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) -- cgit 1.4.1 From 01e31bea7e622f1890c274f4aaaaf8bccd296aa5 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Tue, 29 Dec 2020 10:01:48 +0800 Subject: vhost_net: fix ubuf refcount incorrectly when sendmsg fails Currently the vhost_zerocopy_callback() maybe be called to decrease the refcount when sendmsg fails in tun. The error handling in vhost handle_tx_zerocopy() will try to decrease the same refcount again. This is wrong. To fix this issue, we only call vhost_net_ubuf_put() when vq->heads[nvq->desc].len == VHOST_DMA_IN_PROGRESS. Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support") Signed-off-by: Yunjian Wang Acked-by: Willem de Bruijn Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://lore.kernel.org/r/1609207308-20544-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski --- drivers/vhost/net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 531a00d703cd..c8784dfafdd7 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -863,6 +863,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) size_t len, total_len = 0; int err; struct vhost_net_ubuf_ref *ubufs; + struct ubuf_info *ubuf; bool zcopy_used; int sent_pkts = 0; @@ -895,9 +896,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { - struct ubuf_info *ubuf; ubuf = nvq->ubuf_info + nvq->upend_idx; - vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; @@ -927,7 +926,8 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { - vhost_net_ubuf_put(ubufs); + if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) + vhost_net_ubuf_put(ubufs); nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } -- cgit 1.4.1 From 17e94567c57df3d9609e6bacaed9247c4f2629e2 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 29 Dec 2020 11:08:38 +0200 Subject: docs: networking: packet_mmap: fix formatting for C macros The citation of macro definitions should appear in a code block. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/5cb47005e7a59b64299e038827e295822193384c.1609232919.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- Documentation/networking/packet_mmap.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst index 6c009ceb1183..f3646c80b019 100644 --- a/Documentation/networking/packet_mmap.rst +++ b/Documentation/networking/packet_mmap.rst @@ -437,7 +437,7 @@ and the following flags apply: Capture process ^^^^^^^^^^^^^^^ - from include/linux/if_packet.h +From include/linux/if_packet.h:: #define TP_STATUS_COPY (1 << 1) #define TP_STATUS_LOSING (1 << 2) -- cgit 1.4.1 From e4da63cda51f17fa1e86a10e84d47d692932530d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 29 Dec 2020 11:08:39 +0200 Subject: docs: networking: packet_mmap: fix old config reference Before commit 889b8f964f2f ("packet: Kill CONFIG_PACKET_MMAP.") there used to be a CONFIG_PACKET_MMAP config symbol that depended on CONFIG_PACKET. The text still implies that PACKET_MMAP can be disabled. Remove that from the text, as well as reference to old kernel versions. Also, drop reference to broken link to information for pre 2.6.5 kernels. Make a slight working improvement (s/In/On/) while at it. Signed-off-by: Baruch Siach Link: https://lore.kernel.org/r/80089f3783372c8fd7833f28ce774a171b2ef252.1609232919.git.baruch@tkos.co.il Signed-off-by: Jakub Kicinski --- Documentation/networking/packet_mmap.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst index f3646c80b019..500ef60b1b82 100644 --- a/Documentation/networking/packet_mmap.rst +++ b/Documentation/networking/packet_mmap.rst @@ -8,7 +8,7 @@ Abstract ======== This file documents the mmap() facility available with the PACKET -socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for +socket interface. This type of sockets is used for i) capture network traffic with utilities like tcpdump, ii) transmit network traffic, or any other that needs raw @@ -25,12 +25,12 @@ Please send your comments to Why use PACKET_MMAP =================== -In Linux 2.4/2.6/3.x if PACKET_MMAP is not enabled, the capture process is very +Non PACKET_MMAP capture process (plain AF_PACKET) is very inefficient. It uses very limited buffers and requires one system call to capture each packet, it requires two if you want to get packet's timestamp (like libpcap always does). -In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size +On the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size configurable circular buffer mapped in user space that can be used to either send or receive packets. This way reading packets just needs to wait for them, most of the time there is no need to issue a single system call. Concerning @@ -252,8 +252,7 @@ PACKET_MMAP setting constraints In kernel versions prior to 2.4.26 (for the 2.4 branch) and 2.6.5 (2.6 branch), the PACKET_MMAP buffer could hold only 32768 frames in a 32 bit architecture or -16384 in a 64 bit architecture. For information on these kernel versions -see http://pusa.uv.es/~ulisses/packet_mmap/packet_mmap.pre-2.4.26_2.6.5.txt +16384 in a 64 bit architecture. Block size limit ---------------- -- cgit 1.4.1 From 862aecbd9569e563b979c0e23a908b43cda4b0b9 Mon Sep 17 00:00:00 2001 From: YANG LI Date: Wed, 30 Dec 2020 15:23:14 +0800 Subject: ibmvnic: fix: NULL pointer dereference. The error is due to dereference a null pointer in function reset_one_sub_crq_queue(): if (!scrq) { netdev_dbg(adapter->netdev, "Invalid scrq reset. irq (%d) or msgs(%p).\n", scrq->irq, scrq->msgs); return -EINVAL; } If the expression is true, scrq must be a null pointer and cannot dereference. Fixes: 9281cf2d5840 ("ibmvnic: avoid memset null scrq msgs") Signed-off-by: YANG LI Reported-by: Abaci Acked-by: Lijun Pan Link: https://lore.kernel.org/r/1609312994-121032-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 91ebcde30292..9778c83150f1 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2981,9 +2981,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter, int rc; if (!scrq) { - netdev_dbg(adapter->netdev, - "Invalid scrq reset. irq (%d) or msgs (%p).\n", - scrq->irq, scrq->msgs); + netdev_dbg(adapter->netdev, "Invalid scrq reset.\n"); return -EINVAL; } -- cgit 1.4.1 From 1d0d561ad1d7606bb745c1ed9478e7206860e56e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 4 Jan 2021 10:38:02 +0000 Subject: net: macb: Correct usage of MACB_CAPS_CLK_HW_CHG flag A new flag MACB_CAPS_CLK_HW_CHG was added and all callers of macb_set_tx_clk were gated on the presence of this flag. - if (!clk) + if (!bp->tx_clk || !(bp->caps & MACB_CAPS_CLK_HW_CHG)) However the flag was not added to anything other than the new sama7g5_gem, turning that function call into a no op for all other systems. This breaks the networking on Zynq. The commit message adding this states: a new capability so that macb_set_tx_clock() to not be called for IPs having this capability This strongly implies that present of the flag was intended to skip the function not absence of the flag. Update the if statement to this effect, which repairs the existing users. Fixes: daafa1d33cc9 ("net: macb: add capability to not set the clock rate") Suggested-by: Andrew Lunn Signed-off-by: Charles Keepax Reviewed-by: Claudiu Beznea Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210104103802.13091-1-ckeepax@opensource.cirrus.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index d5d910916c2e..814a5b10141d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -467,7 +467,7 @@ static void macb_set_tx_clk(struct macb *bp, int speed) { long ferr, rate, rate_rounded; - if (!bp->tx_clk || !(bp->caps & MACB_CAPS_CLK_HW_CHG)) + if (!bp->tx_clk || (bp->caps & MACB_CAPS_CLK_HW_CHG)) return; switch (speed) { -- cgit 1.4.1 From 2ff2c7e274392871bfdee00ff2adbb8ebae5d240 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 30 Dec 2020 13:42:51 +0200 Subject: selftests: mlxsw: Set headroom size of correct port The test was setting the headroom size of the wrong port. This was not visible because of a firmware bug that canceled this bug. Set the headroom size of the correct port, so that the test will pass with both old and new firmware versions. Fixes: bfa804784e32 ("selftests: mlxsw: Add a PFC test") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20201230114251.394009-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 4d900bc1f76c..5c7700212f75 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -230,7 +230,7 @@ switch_create() __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null # bridges # ------- -- cgit 1.4.1 From cfd82dfc9799c53ef109343a23af006a0f6860a9 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 30 Dec 2020 16:24:51 +0100 Subject: net: usb: qmi_wwan: add Quectel EM160R-GL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New modem using ff/ff/30 for QCDM, ff/00/00 for AT and NMEA, and ff/ff/ff for RMNET/QMI. T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0620 Rev= 4.09 S: Manufacturer=Quectel S: Product=EM160R-GL S: SerialNumber=e31cedc1 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Signed-off-by: Bjørn Mork Link: https://lore.kernel.org/r/20201230152451.245271-1-bjorn@mork.no Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index d166c321ee9b..af19513a9f75 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1013,6 +1013,7 @@ static const struct usb_device_id products[] = { {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)}, /* Quectel EM160R-GL */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ -- cgit 1.4.1 From e80bd76fbf563cc7ed8c9e9f3bbcdf59b0897f69 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 30 Dec 2020 19:33:34 +0100 Subject: r8169: work around power-saving bug on some chip versions A user reported failing network with RTL8168dp (a quite rare chip version). Realtek confirmed that few chip versions suffer from a PLL power-down hw bug. Fixes: 07df5bd874f0 ("r8169: power down chip in probe") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/a1c39460-d533-7f9e-fa9d-2b8990b02426@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 46d8510b2fe2..a569abe7f5ef 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2207,7 +2207,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) } switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_43: @@ -2233,7 +2234,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) static void rtl_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_43: -- cgit 1.4.1 From b40f97b91a3b167ab22c9e9f1ef00b1615ff01e9 Mon Sep 17 00:00:00 2001 From: Xie He Date: Thu, 31 Dec 2020 09:43:31 -0800 Subject: net: lapb: Decrease the refcount of "struct lapb_cb" in lapb_device_event In lapb_device_event, lapb_devtostruct is called to get a reference to an object of "struct lapb_cb". lapb_devtostruct increases the refcount of the object and returns a pointer to it. However, we didn't decrease the refcount after we finished using the pointer. This patch fixes this problem. Fixes: a4989fa91110 ("net/lapb: support netdev events") Cc: Martin Schiller Signed-off-by: Xie He Link: https://lore.kernel.org/r/20201231174331.64539-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- net/lapb/lapb_iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 213ea7abc9ab..40961889e9c0 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -489,6 +489,7 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event, break; } + lapb_put(lapb); return NOTIFY_DONE; } -- cgit 1.4.1 From c1a9ec7e5d577a9391660800c806c53287fca991 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 3 Jan 2021 02:25:43 +0100 Subject: net: dsa: lantiq_gswip: Enable GSWIP_MII_CFG_EN also for internal PHYs Enable GSWIP_MII_CFG_EN also for internal PHYs to make traffic flow. Without this the PHY link is detected properly and ethtool statistics for TX are increasing but there's no RX traffic coming in. Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Suggested-by: Hauke Mehrtens Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 09701c17f3f6..5d378c8026f0 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1541,9 +1541,7 @@ static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port, { struct gswip_priv *priv = ds->priv; - /* Enable the xMII interface only for the external PHY */ - if (interface != PHY_INTERFACE_MODE_INTERNAL) - gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); + gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); } static void gswip_get_strings(struct dsa_switch *ds, int port, u32 stringset, -- cgit 1.4.1 From 709a3c9dff2a639966ae7d8ba6239d2b8aba036d Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sun, 3 Jan 2021 02:25:44 +0100 Subject: net: dsa: lantiq_gswip: Fix GSWIP_MII_CFG(p) register access There is one GSWIP_MII_CFG register for each switch-port except the CPU port. The register offset for the first port is 0x0, 0x02 for the second, 0x04 for the third and so on. Update the driver to not only restrict the GSWIP_MII_CFG registers to ports 0, 1 and 5. Handle ports 0..5 instead but skip the CPU port. This means we are not overwriting the configuration for the third port (port two since we start counting from zero) with the settings for the sixth port (with number five) anymore. The GSWIP_MII_PCDU(p) registers are not updated because there's really only three (one for each of the following ports: 0, 1, 5). Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Signed-off-by: Martin Blumenstingl Acked-by: Hauke Mehrtens Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 5d378c8026f0..4b36d89bec06 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -92,9 +92,7 @@ GSWIP_MDIO_PHY_FDUP_MASK) /* GSWIP MII Registers */ -#define GSWIP_MII_CFG0 0x00 -#define GSWIP_MII_CFG1 0x02 -#define GSWIP_MII_CFG5 0x04 +#define GSWIP_MII_CFGp(p) (0x2 * (p)) #define GSWIP_MII_CFG_EN BIT(14) #define GSWIP_MII_CFG_LDCLKDIS BIT(12) #define GSWIP_MII_CFG_MODE_MIIP 0x0 @@ -392,17 +390,9 @@ static void gswip_mii_mask(struct gswip_priv *priv, u32 clear, u32 set, static void gswip_mii_mask_cfg(struct gswip_priv *priv, u32 clear, u32 set, int port) { - switch (port) { - case 0: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG0); - break; - case 1: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG1); - break; - case 5: - gswip_mii_mask(priv, clear, set, GSWIP_MII_CFG5); - break; - } + /* There's no MII_CFG register for the CPU port */ + if (!dsa_is_cpu_port(priv->ds, port)) + gswip_mii_mask(priv, clear, set, GSWIP_MII_CFGp(port)); } static void gswip_mii_mask_pcdu(struct gswip_priv *priv, u32 clear, u32 set, @@ -822,9 +812,8 @@ static int gswip_setup(struct dsa_switch *ds) gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); /* Disable the xMII link */ - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 0); - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 1); - gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, 5); + for (i = 0; i < priv->hw_info->max_ports; i++) + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i); /* enable special tag insertion on cpu port */ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, -- cgit 1.4.1 From 81b6d05ccad4f3d8a9dfb091fb46ad6978ee40e4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:36:35 +0000 Subject: io_uring: synchronise IOPOLL on task_submit fail io_req_task_submit() might be called for IOPOLL, do the fail path under uring_lock to comply with IOPOLL synchronisation based solely on it. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca46f314640b..5be33fd8b6bc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2126,15 +2126,16 @@ static void io_req_task_cancel(struct callback_head *cb) static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + bool fail; - if (!__io_sq_thread_acquire_mm(ctx) && - !__io_sq_thread_acquire_files(ctx)) { - mutex_lock(&ctx->uring_lock); + fail = __io_sq_thread_acquire_mm(ctx) || + __io_sq_thread_acquire_files(ctx); + mutex_lock(&ctx->uring_lock); + if (!fail) __io_queue_sqe(req, NULL); - mutex_unlock(&ctx->uring_lock); - } else { + else __io_req_task_cancel(req, -EFAULT); - } + mutex_unlock(&ctx->uring_lock); } static void io_req_task_submit(struct callback_head *cb) -- cgit 1.4.1 From 6c503150ae33ee19036255cfda0998463613352c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:36:36 +0000 Subject: io_uring: patch up IOPOLL overflow_flush sync IOPOLL skips completion locking but keeps it under uring_lock, thus io_cqring_overflow_flush() and so io_cqring_events() need additional locking with uring_lock in some cases for IOPOLL. Remove __io_cqring_overflow_flush() from io_cqring_events(), introduce a wrapper around flush doing needed synchronisation and call it by hand. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 78 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5be33fd8b6bc..445035b24a50 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1713,9 +1713,9 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) } /* Returns true if there are no backlogged entries after the flush */ -static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, - struct task_struct *tsk, - struct files_struct *files) +static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) { struct io_rings *rings = ctx->rings; struct io_kiocb *req, *tmp; @@ -1768,6 +1768,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, return all_flushed; } +static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) +{ + if (test_bit(0, &ctx->cq_check_overflow)) { + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx, force, tsk, files); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); + } +} + static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) { struct io_ring_ctx *ctx = req->ctx; @@ -2314,20 +2328,8 @@ static void io_double_put_req(struct io_kiocb *req) io_free_req(req); } -static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) +static unsigned io_cqring_events(struct io_ring_ctx *ctx) { - if (test_bit(0, &ctx->cq_check_overflow)) { - /* - * noflush == true is from the waitqueue handler, just ensure - * we wake up the task, and the next invocation will flush the - * entries. We cannot safely to it from here. - */ - if (noflush) - return -1U; - - io_cqring_overflow_flush(ctx, false, NULL, NULL); - } - /* See comment at the top of this file */ smp_rmb(); return __io_cqring_events(ctx); @@ -2552,7 +2554,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) * If we do, we can potentially be spinning for commands that * already triggered a CQE (eg in error). */ - if (io_cqring_events(ctx, false)) + if (test_bit(0, &ctx->cq_check_overflow)) + __io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) break; /* @@ -6827,7 +6831,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { - if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) + if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL)) return -EBUSY; } @@ -7090,7 +7094,7 @@ struct io_wait_queue { unsigned nr_timeouts; }; -static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) +static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; @@ -7099,7 +7103,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ - return io_cqring_events(ctx, noflush) >= iowq->to_wait || + return io_cqring_events(ctx) >= iowq->to_wait || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } @@ -7109,11 +7113,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, wq); - /* use noflush == true, as we can't safely rely on locking context */ - if (!io_should_wake(iowq, true)) - return -1; - - return autoremove_wake_function(curr, mode, wake_flags, key); + /* + * Cannot safely flush overflowed CQEs from here, ensure we wake up + * the task, and the next invocation will do it. + */ + if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow)) + return autoremove_wake_function(curr, mode, wake_flags, key); + return -1; } static int io_run_task_work_sig(void) @@ -7150,7 +7156,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, int ret = 0; do { - if (io_cqring_events(ctx, false) >= min_events) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx) >= min_events) return 0; if (!io_run_task_work()) break; @@ -7178,6 +7185,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); trace_io_uring_cqring_wait(ctx, min_events); do { + io_cqring_overflow_flush(ctx, false, NULL, NULL); prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ @@ -7186,8 +7194,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, continue; else if (ret < 0) break; - if (io_should_wake(&iowq, false)) + if (io_should_wake(&iowq)) break; + if (test_bit(0, &ctx->cq_check_overflow)) + continue; if (uts) { timeout = schedule_timeout(timeout); if (timeout == 0) { @@ -8625,7 +8635,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) smp_rmb(); if (!io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; - if (io_cqring_events(ctx, false)) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) mask |= EPOLLIN | EPOLLRDNORM; return mask; @@ -8683,7 +8694,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) - io_cqring_overflow_flush(ctx, true, NULL, NULL); + __io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); io_kill_timeouts(ctx, NULL, NULL); @@ -8857,9 +8868,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } io_cancel_defer_files(ctx, task, files); - io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); io_cqring_overflow_flush(ctx, true, task, files); - io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); if (!files) __io_uring_cancel_task_requests(ctx, task); @@ -9195,13 +9204,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - if (!list_empty_careful(&ctx->cq_overflow_list)) { - bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL; + io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_lock(ctx, needs_lock); - io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_unlock(ctx, needs_lock); - } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) -- cgit 1.4.1 From de7f1d9e99d8b99e4e494ad8fcd91f0c4c5c9357 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:43:29 +0000 Subject: io_uring: drop file refs after task cancel io_uring fds marked O_CLOEXEC and we explicitly cancel all requests before going through exec, so we don't want to leave task's file references to not our anymore io_uring instances. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 445035b24a50..85de42c42433 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8958,6 +8958,15 @@ static void io_uring_attempt_task_drop(struct file *file) io_uring_del_task_file(file); } +static void io_uring_remove_task_files(struct io_uring_task *tctx) +{ + struct file *file; + unsigned long index; + + xa_for_each(&tctx->xa, index, file) + io_uring_del_task_file(file); +} + void __io_uring_files_cancel(struct files_struct *files) { struct io_uring_task *tctx = current->io_uring; @@ -8966,16 +8975,12 @@ void __io_uring_files_cancel(struct files_struct *files) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - - xa_for_each(&tctx->xa, index, file) { - struct io_ring_ctx *ctx = file->private_data; - - io_uring_cancel_task_requests(ctx, files); - if (files) - io_uring_del_task_file(file); - } - + xa_for_each(&tctx->xa, index, file) + io_uring_cancel_task_requests(file->private_data, files); atomic_dec(&tctx->in_idle); + + if (files) + io_uring_remove_task_files(tctx); } static s64 tctx_inflight(struct io_uring_task *tctx) @@ -9038,6 +9043,8 @@ void __io_uring_task_cancel(void) } while (1); atomic_dec(&tctx->in_idle); + + io_uring_remove_task_files(tctx); } static int io_uring_flush(struct file *file, void *data) -- cgit 1.4.1 From 90df08538c07b7135703358a0c8c08d97889a704 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:43:30 +0000 Subject: io_uring: cancel more aggressively in exit_work While io_ring_exit_work() is running new requests of all sorts may be issued, so it should do a bit more to cancel them, otherwise they may just get stuck. e.g. in io-wq, in poll lists, etc. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 85de42c42433..5bccb235271f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -992,6 +992,9 @@ enum io_mem_account { ACCT_PINNED, }; +static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, + struct task_struct *task); + static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); static struct fixed_file_ref_node *alloc_fixed_file_ref_node( struct io_ring_ctx *ctx); @@ -8675,7 +8678,7 @@ static void io_ring_exit_work(struct work_struct *work) * as nobody else will be looking for them. */ do { - io_iopoll_try_reap_events(ctx); + __io_uring_cancel_task_requests(ctx, NULL); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } @@ -8830,9 +8833,11 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, enum io_wq_cancel cret; bool ret = false; - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); - if (cret != IO_WQ_CANCEL_NOTFOUND) - ret = true; + if (ctx->io_wq) { + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, + &cancel, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } /* SQPOLL thread does its own polling */ if (!(ctx->flags & IORING_SETUP_SQPOLL)) { -- cgit 1.4.1 From 75353bcd2184010f08a3ed2f0da019bd9d604e1e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 24 Dec 2020 15:13:57 +0000 Subject: drm/i915: clear the shadow batch The shadow batch is an internal object, which doesn't have any page clearing, and since the batch_len can be smaller than the object, we should take care to clear it. Testcase: igt/gen9_exec_parse/shadow-peek Fixes: 4f7af1948abc ("drm/i915: Support ro ppgtt mapped cmdparser shadow buffers") Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-1-matthew.auld@intel.com Cc: stable@vger.kernel.org (cherry picked from commit eeb52ee6c4a429ec301faf1dc48988744960786e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_cmd_parser.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 93265951fdbb..b0899b665e85 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1166,7 +1166,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { - unsigned long x, n; + unsigned long x, n, remain; void *ptr; /* @@ -1177,14 +1177,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * We don't care about copying too much here as we only * validate up to the end of the batch. */ + remain = length; if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - length = round_up(length, + remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min(length, PAGE_SIZE - x); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); if (needs_clflush) @@ -1193,13 +1194,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, kunmap_atomic(src); ptr += len; - length -= len; + remain -= len; x = 0; } } i915_gem_object_unpin_pages(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); + /* dst_obj is returned with vmap pinned */ return dst; } @@ -1392,11 +1395,6 @@ static unsigned long *alloc_whitelist(u32 batch_length) #define LENGTH_BIAS 2 -static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); -} - /** * intel_engine_cmd_parser() - parse a batch buffer for privilege violations * @engine: the engine on which the batch is to execute @@ -1538,16 +1536,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = 0; /* allow execution */ } } - - if (shadow_needs_clflush(shadow->obj)) - drm_clflush_virt_range(batch_end, 8); } - if (shadow_needs_clflush(shadow->obj)) { - void *ptr = page_mask_bits(shadow->obj->mm.mapping); - - drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); - } + i915_gem_object_flush_map(shadow->obj); if (!IS_ERR_OR_NULL(jump_whitelist)) kfree(jump_whitelist); -- cgit 1.4.1 From 641382e9b44fba81a0778e1914ee35b8471121f9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 24 Dec 2020 15:13:58 +0000 Subject: drm/i915: clear the gpu reloc batch The reloc batch is short lived but can exist in the user visible ppGTT, and since it's backed by an internal object, which lacks page clearing, we should take care to clear it upfront. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-2-matthew.auld@intel.com Cc: stable@vger.kernel.org (cherry picked from commit 26ebc511e799f621357982ccc37a7987a56a00f4) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index bcc80f428172..bd3046e5a934 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1046,7 +1046,7 @@ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cach GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(cache->rq->engine->gt); @@ -1296,6 +1296,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_pool; } + memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); -- cgit 1.4.1 From 557862535c2cad6de6f6fb12312b7a6d09c06407 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Dec 2020 12:08:28 +0000 Subject: drm/i915/gt: Define guc firmware blob for older Cometlakes When splitting the Coffeelake define to also identify Cometlakes, I missed the double fw_def for Coffeelake. That is only newer Cometlakes use the cml specific guc firmware, older Cometlakes should use kbl firmware. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2859 Fixes: 5f4ae2704d59 ("drm/i915: Identify Cometlake platform") Signed-off-by: Chris Wilson Cc: # v5.9+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201229120828.29931-1-chris@chris-wilson.co.uk (cherry picked from commit 70960ab27542d8dc322f909f516391f331fbd3f1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 180c23e2e25e..602f1a0bc587 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ -- cgit 1.4.1 From 9397d66212cdf7a21c66523f1583e5d63a609e84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Dec 2020 20:23:09 +0000 Subject: drm/i915/dp: Track pm_qos per connector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since multiple connectors may run intel_dp_aux_xfer conncurrently, a single global pm_qos does not suffice. (One connector may disable the dma-latency boost prematurely while the second is still depending on it.) Instead of a single global pm_qos, track the pm_qos request for each intel_dp. v2: Move the pm_qos setup/teardown to intel_dp_aux_init/fini Fixes: 9ee32fea5fe8 ("drm/i915: irq-drive the dp aux communication") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Imre Deak Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20201230202309.23982-1-chris@chris-wilson.co.uk (cherry picked from commit b3304591f14b437b6bccd8dbff06006c11837031) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 8 ++++++-- drivers/gpu/drm/i915/i915_drv.c | 5 ----- drivers/gpu/drm/i915/i915_drv.h | 3 --- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ce82d654d0f2..34d78c654df3 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1436,6 +1436,9 @@ struct intel_dp { bool ycbcr_444_to_420; } dfp; + /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ + struct pm_qos_request pm_qos; + /* Display stream compression testing */ bool force_dsc_en; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2165398d2c7c..37f1a10fd021 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1489,7 +1489,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, * lowest possible wakeup latency and so prevent the cpu from going into * deep sleep states. */ - cpu_latency_qos_update_request(&i915->pm_qos, 0); + cpu_latency_qos_update_request(&intel_dp->pm_qos, 0); intel_dp_check_edp(intel_dp); @@ -1622,7 +1622,7 @@ done: ret = recv_bytes; out: - cpu_latency_qos_update_request(&i915->pm_qos, PM_QOS_DEFAULT_VALUE); + cpu_latency_qos_update_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); if (vdd) edp_panel_vdd_off(intel_dp, false); @@ -1898,6 +1898,9 @@ static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index) static void intel_dp_aux_fini(struct intel_dp *intel_dp) { + if (cpu_latency_qos_request_active(&intel_dp->pm_qos)) + cpu_latency_qos_remove_request(&intel_dp->pm_qos); + kfree(intel_dp->aux.name); } @@ -1950,6 +1953,7 @@ intel_dp_aux_init(struct intel_dp *intel_dp) encoder->base.name); intel_dp->aux.transfer = intel_dp_aux_transfer; + cpu_latency_qos_add_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); } bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 320856b665a1..88ad754962af 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -578,8 +578,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pci_set_master(pdev); - cpu_latency_qos_add_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE); - intel_gt_init_workarounds(dev_priv); /* On the 945G/GM, the chipset reports the MSI capability on the @@ -626,7 +624,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); - cpu_latency_qos_remove_request(&dev_priv->pm_qos); err_mem_regions: intel_memory_regions_driver_release(dev_priv); err_ggtt: @@ -648,8 +645,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) if (pdev->msi_enabled) pci_disable_msi(pdev); - - cpu_latency_qos_remove_request(&dev_priv->pm_qos); } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a3ee4f9dc0a..632c713227dc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -891,9 +891,6 @@ struct drm_i915_private { bool display_irqs_enabled; - /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ - struct pm_qos_request pm_qos; - /* Sideband mailbox protection */ struct mutex sb_lock; struct pm_qos_request sb_qos; -- cgit 1.4.1 From 05f6f7271a38c482c5021967433f7b698e102c45 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Thu, 24 Dec 2020 20:26:07 +0800 Subject: i2c: mediatek: Fix apdma and i2c hand-shake timeout With the apdma remove hand-shake signal, it requirs special operation timing to reset i2c manually, otherwise the interrupt will not be triggered, i2c transmission will be timeout. Fixes: 8426fe70cfa4("i2c: mediatek: Add apdma sync in i2c driver") Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 33de99b7bc20..0818d3e50734 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -38,6 +38,7 @@ #define I2C_IO_CONFIG_OPEN_DRAIN 0x0003 #define I2C_IO_CONFIG_PUSH_PULL 0x0000 #define I2C_SOFT_RST 0x0001 +#define I2C_HANDSHAKE_RST 0x0020 #define I2C_FIFO_ADDR_CLR 0x0001 #define I2C_DELAY_LEN 0x0002 #define I2C_TIME_CLR_VALUE 0x0000 @@ -45,6 +46,7 @@ #define I2C_WRRD_TRANAC_VALUE 0x0002 #define I2C_RD_TRANAC_VALUE 0x0001 #define I2C_SCL_MIS_COMP_VALUE 0x0000 +#define I2C_CHN_CLR_FLAG 0x0000 #define I2C_DMA_CON_TX 0x0000 #define I2C_DMA_CON_RX 0x0001 @@ -54,7 +56,9 @@ #define I2C_DMA_START_EN 0x0001 #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 +#define I2C_DMA_WARM_RST 0x0001 #define I2C_DMA_HARD_RST 0x0002 +#define I2C_DMA_HANDSHAKE_RST 0x0004 #define MAX_SAMPLE_CNT_DIV 8 #define MAX_STEP_CNT_DIV 64 @@ -475,11 +479,24 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; - writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); - udelay(50); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); - - mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + if (i2c->dev_comp->dma_sync) { + writel(I2C_DMA_WARM_RST, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_HANDSHAKE_RST | I2C_DMA_HARD_RST, + i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_HANDSHAKE_RST | I2C_SOFT_RST, + OFFSET_SOFTRESET); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_SOFTRESET); + } else { + writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); + udelay(50); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + } /* Set ioconfig */ if (i2c->use_push_pull) -- cgit 1.4.1 From d1c5246e08eb64991001d97a3bd119c93edbc79a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 2 Dec 2020 22:28:12 -0800 Subject: x86/mm: Fix leak of pmd ptlock Commit 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") introduced a new location where a pmd was released, but neglected to run the pmd page destructor. In fact, this happened previously for a different pmd release path and was fixed by commit: c283610e44ec ("x86, mm: do not leak page->ptl for pmd page tables"). This issue was hidden until recently because the failure mode is silent, but commit: b2b29d6d0119 ("mm: account PMD tables like PTE tables") turns the failure mode into this signature: BUG: Bad page state in process lt-pmem-ns pfn:15943d page:000000007262ed7b refcount:0 mapcount:-1024 mapping:0000000000000000 index:0x0 pfn:0x15943d flags: 0xaffff800000000() raw: 00affff800000000 dead000000000100 0000000000000000 0000000000000000 raw: 0000000000000000 ffff913a029bcc08 00000000fffffbff 0000000000000000 page dumped because: nonzero mapcount [..] dump_stack+0x8b/0xb0 bad_page.cold+0x63/0x94 free_pcp_prepare+0x224/0x270 free_unref_page+0x18/0xd0 pud_free_pmd_page+0x146/0x160 ioremap_pud_range+0xe3/0x350 ioremap_page_range+0x108/0x160 __ioremap_caller.constprop.0+0x174/0x2b0 ? memremap+0x7a/0x110 memremap+0x7a/0x110 devm_memremap+0x53/0xa0 pmem_attach_disk+0x4ed/0x530 [nd_pmem] ? __devm_release_region+0x52/0x80 nvdimm_bus_probe+0x85/0x210 [libnvdimm] Given this is a repeat occurrence it seemed prudent to look for other places where this destructor might be missing and whether a better helper is needed. try_to_free_pmd_page() looks like a candidate, but testing with setting up and tearing down pmd mappings via the dax unit tests is thus far not triggering the failure. As for a better helper pmd_free() is close, but it is a messy fit due to requiring an @mm arg. Also, ___pmd_free_tlb() wants to call paravirt_tlb_remove_table() instead of free_page(), so open-coded pgtable_pmd_page_dtor() seems the best way forward for now. Debugged together with Matthew Wilcox . Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Tested-by: Yi Zhang Acked-by: Peter Zijlstra (Intel) Cc: Link: https://lkml.kernel.org/r/160697689204.605323.17629854984697045602.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/mm/pgtable.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfd82f51ba66..f6a9e2e36642 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) } free_page((unsigned long)pmd_sv); + + pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); return 1; -- cgit 1.4.1 From 311bea3cb9ee20ef150ca76fc60a592bf6b159f5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 17 Dec 2020 16:24:32 -0800 Subject: arm64: link with -z norelro for LLD or aarch64-elf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With GNU binutils 2.35+, linking with BFD produces warnings for vmlinux: aarch64-linux-gnu-ld: warning: -z norelro ignored BFD can produce this warning when the target emulation mode does not support RELRO program headers, and -z relro or -z norelro is passed. Alan Modra clarifies: The default linker emulation for an aarch64-linux ld.bfd is -maarch64linux, the default for an aarch64-elf linker is -maarch64elf. They are not equivalent. If you choose -maarch64elf you get an emulation that doesn't support -z relro. The ARCH=arm64 kernel prefers -maarch64elf, but may fall back to -maarch64linux based on the toolchain configuration. LLD will always create RELRO program header regardless of target emulation. To avoid the above warning when linking with BFD, pass -z norelro only when linking with LLD or with -maarch64linux. Fixes: 3b92fa7485eb ("arm64: link with -z norelro regardless of CONFIG_RELOCATABLE") Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options") Cc: # 5.0.x- Reported-by: kernelci.org bot Reported-by: Quentin Perret Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Ard Biesheuvel Cc: Alan Modra Cc: Fāng-ruì Sòng Link: https://lore.kernel.org/r/20201218002432.788499-1-ndesaulniers@google.com Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 6be9b3750250..90309208bb28 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X -z norelro +LDFLAGS_vmlinux :=--no-undefined -X ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -115,16 +115,20 @@ KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ # Prefer the baremetal ELF build target, but not all toolchains include # it so fall back to the standard linux version if needed. -KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb) +KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb -z norelro) UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ # Same as above, prefer ELF but fall back to linux target if needed. -KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux) +KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux -z norelro) UTS_MACHINE := aarch64 endif +ifeq ($(CONFIG_LD_IS_LLD), y) +KBUILD_LDFLAGS += -z norelro +endif + CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) -- cgit 1.4.1 From 96ebc9c871d8a28fb22aa758dd9188a4732df482 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 4 Jan 2021 20:07:15 -0800 Subject: usb: uas: Add PNY USB Portable SSD to unusual_uas Here's another variant PNY Pro Elite USB 3.1 Gen 2 portable SSD that hangs and doesn't respond to ATA_1x pass-through commands. If it doesn't support these commands, it should respond properly to the host. Add it to the unusual uas list to be able to move forward with other operations. Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Acked-by: Oliver Neukum Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/2edc7af892d0913bf06f5b35e49ec463f03d5ed8.1609819418.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 870e9cf3d5dc..f9677a5ec31b 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -90,6 +90,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA), +/* Reported-by: Thinh Nguyen */ +UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999, + "PNY", + "Pro Elite SSD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Thinh Nguyen */ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999, "PNY", -- cgit 1.4.1 From 45ba7b195a369f35cb39094fdb32efe5908b34ad Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Mon, 4 Jan 2021 19:38:44 +0800 Subject: arm64: cpufeature: remove non-exist CONFIG_KVM_ARM_HOST Commit d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST") deletes CONFIG_KVM_ARM_HOST option, it should use CONFIG_KVM instead. Just remove CONFIG_KVM_ARM_HOST here. Fixes: d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST") Signed-off-by: Shannon Zhao Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1609760324-92271-1-git-send-email-shannon.zhao@linux.alibaba.com --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d96f4554282d..bc3549663957 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2558,7 +2558,7 @@ static void verify_hyp_capabilities(void) int parange, ipa_max; unsigned int safe_vmid_bits, vmid_bits; - if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST)) + if (!IS_ENABLED(CONFIG_KVM)) return; safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); -- cgit 1.4.1 From c9c48bb701ba78df7d4652146b12bcf3ad716507 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 22 Dec 2020 02:47:56 +0100 Subject: speakup: Add github repository URL and bug tracker We have set up a repository for users to try newer releases more easily, and keep records of known bugs. Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20201222014756.ov5vi6fywylbp5n6@function Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9..62934e770646 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16710,6 +16710,8 @@ M: Samuel Thibault L: speakup@linux-speakup.org S: Odd Fixes W: http://www.linux-speakup.org/ +W: https://github.com/linux-speakup/speakup +B: https://github.com/linux-speakup/speakup/issues F: drivers/accessibility/speakup/ SPEAR CLOCK FRAMEWORK SUPPORT -- cgit 1.4.1 From f6bcb4c7f366905b66ce8ffca7190118244bb642 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Jan 2021 14:42:29 +0300 Subject: regmap: debugfs: Fix a reversed if statement in regmap_debugfs_init() This code will leak "map->debugfs_name" because the if statement is reversed so it only frees NULL pointers instead of non-NULL. In fact the if statement is not required and should just be removed because kfree() accepts NULL pointers. Fixes: cffa4b2122f5 ("regmap: debugfs: Fix a memory leak when calling regmap_attach_dev") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X/RQpfAwRdLg0GqQ@mwanda Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-debugfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index bf03cd343be2..ff2ee87987c7 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -594,9 +594,7 @@ void regmap_debugfs_init(struct regmap *map) } if (!strcmp(name, "dummy")) { - if (!map->debugfs_name) - kfree(map->debugfs_name); - + kfree(map->debugfs_name); map->debugfs_name = kasprintf(GFP_KERNEL, "dummy%d", dummy_index); if (!map->debugfs_name) -- cgit 1.4.1 From dfe94d4086e40e92b1926bddcefa629b791e9b28 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 21 Dec 2020 22:55:41 -0800 Subject: x86/hyperv: Fix kexec panic/hang issues Currently the kexec kernel can panic or hang due to 2 causes: 1) hv_cpu_die() is not called upon kexec, so the hypervisor corrupts the old VP Assist Pages when the kexec kernel runs. The same issue is fixed for hibernation in commit 421f090c819d ("x86/hyperv: Suspend/resume the VP assist page for hibernation"). Now fix it for kexec. 2) hyperv_cleanup() is called too early. In the kexec path, the other CPUs are stopped in hv_machine_shutdown() -> native_machine_shutdown(), so between hv_kexec_handler() and native_machine_shutdown(), the other CPUs can still try to access the hypercall page and cause panic. The workaround "hv_hypercall_pg = NULL;" in hyperv_cleanup() is unreliabe. Move hyperv_cleanup() to a better place. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201222065541.24312-1-decui@microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 4 ++++ arch/x86/include/asm/mshyperv.h | 2 ++ arch/x86/kernel/cpu/mshyperv.c | 18 ++++++++++++++++++ drivers/hv/vmbus_drv.c | 2 -- 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e04d90af4c27..4638a52d8eae 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,8 @@ #include #include +int hyperv_init_cpuhp; + void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -401,6 +404,7 @@ void __init hyperv_init(void) register_syscore_ops(&hv_syscore_ops); + hyperv_init_cpuhp = cpuhp; return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ffc289992d1b..30f76b966857 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) +extern int hyperv_init_cpuhp; + extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f628e3dc150f..43b54bef5448 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -135,14 +135,32 @@ static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) hv_kexec_handler(); + + /* + * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor + * corrupts the old VP Assist Pages and can crash the kexec kernel. + */ + if (kexec_in_progress && hyperv_init_cpuhp > 0) + cpuhp_remove_state(hyperv_init_cpuhp); + + /* The function calls stop_other_cpus(). */ native_machine_shutdown(); + + /* Disable the hypercall page when there is only 1 active CPU. */ + if (kexec_in_progress) + hyperv_cleanup(); } static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) hv_crash_handler(regs); + + /* The function calls crash_smp_send_stop(). */ native_machine_crash_shutdown(regs); + + /* Disable the hypercall page when there is only 1 active CPU. */ + hyperv_cleanup(); } #endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_HYPERV */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 502f8cd95f6d..d491fdcee61f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2550,7 +2550,6 @@ static void hv_kexec_handler(void) /* Make sure conn_state is set as hv_synic_cleanup checks for it */ mb(); cpuhp_remove_state(hyperv_cpuhp_online); - hyperv_cleanup(); }; static void hv_crash_handler(struct pt_regs *regs) @@ -2566,7 +2565,6 @@ static void hv_crash_handler(struct pt_regs *regs) cpu = smp_processor_id(); hv_stimer_cleanup(cpu); hv_synic_disable_regs(cpu); - hyperv_cleanup(); }; static int hv_synic_suspend(void) -- cgit 1.4.1 From 3fb6819f411b5a89afb5726afafacf0c4b62844f Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 28 Dec 2020 15:05:08 +0800 Subject: arm64: traps: remove duplicate include statement asm/exception.h is included more than once. Remove the one that isn't necessary. Signed-off-by: Tian Tao Link: https://lore.kernel.org/r/1609139108-10819-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 08156be75569..6895ce777e7f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include -- cgit 1.4.1 From e2bba5f92354488c331b7821d873db7c388e31aa Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 30 Dec 2020 14:19:54 -0800 Subject: arm64: vdso: disable .eh_frame_hdr via /DISCARD/ instead of --no-eh-frame-hdr Currently with ld.lld we emit an empty .eh_frame_hdr section (and a corresponding program header) into the vDSO. With ld.bfd the section is not emitted but the program header is, with p_vaddr set to 0. This can lead to unwinders attempting to interpret the data at whichever location the program header happens to point to as an unwind info header. This happens to be mostly harmless as long as the byte at that location (interpreted as a version number) has a value other than 1, causing both libgcc and LLVM libunwind to ignore the section (in libunwind's case, after printing an error message to stderr), but it could lead to worse problems if the byte happened to be 1 or the program header points to non-readable memory (e.g. if the empty section was placed at a page boundary). Instead of disabling .eh_frame_hdr via --no-eh-frame-hdr (which also has the downside of being unsupported by older versions of GNU binutils), disable it by discarding the section, and stop emitting the program header that points to it. I understand that we intend to emit valid unwind info for the vDSO at some point. Once that happens this patch can be reverted. Signed-off-by: Peter Collingbourne Acked-by: Ard Biesheuvel Acked-by: Vincenzo Frascino Link: https://linux-review.googlesource.com/id/If745fd9cadcb31b4010acbf5693727fe111b0863 Link: https://lore.kernel.org/r/20201230221954.2007257-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 3 +-- arch/arm64/kernel/vdso/vdso.lds.S | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index a8f8e409e2bf..cd9c3fa25902 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,8 +24,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id=sha1 -n \ - $(btildflags-y) -T + -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index d808ad31e01f..61dbb4c838ef 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -40,9 +40,6 @@ SECTIONS PROVIDE (_etext = .); PROVIDE (etext = .); - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .dynamic : { *(.dynamic) } :text :dynamic .rodata : { *(.rodata*) } :text @@ -54,6 +51,7 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.eh_frame .eh_frame_hdr) } } @@ -66,7 +64,6 @@ PHDRS text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; } /* -- cgit 1.4.1 From f34d93f30d6a72f6b15ba24b6994b746df0c30de Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 4 Jan 2021 12:15:41 +0000 Subject: arm64: kasan: Set TCR_EL1.TBID1 when KASAN_HW_TAGS is enabled Commit 49b3cf035edc ("kasan: arm64: set TCR_EL1.TBID1 when enabled") set the TBID1 bit for the KASAN_SW_TAGS configuration, freeing up 8 bits to be used by PAC. With in-kernel MTE now in mainline, also set this bit for the KASAN_HW_TAGS configuration. Signed-off-by: Catalin Marinas Cc: Peter Collingbourne Acked-by: Vincenzo Frascino Acked-by: Andrey Konovalov --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 37a54b57178a..1f7ee8c8b7b8 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 +#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 #else #define TCR_KASAN_HW_FLAGS 0 #endif -- cgit 1.4.1 From a8f7e08a81708920a928664a865208fdf451c49f Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 5 Jan 2021 08:33:11 -0800 Subject: x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling The IN and OUT instructions with port address as an immediate operand only use an 8-bit immediate (imm8). The current VC handler uses the entire 32-bit immediate value but these instructions only set the first bytes. Cast the operand to an u8 for that. [ bp: Massage commit message. ] Fixes: 25189d08e5168 ("x86/sev-es: Add support for handling IOIO exceptions") Signed-off-by: Peter Gonda Signed-off-by: Borislav Petkov Acked-by: David Rientjes Link: https://lkml.kernel.org/r/20210105163311.221490-1-pgonda@google.com --- arch/x86/kernel/sev-es-shared.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 7d04b356d44d..cdc04d091242 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* IN register opcodes */ -- cgit 1.4.1 From d16baa3f1453c14d680c5fee01cd122a22d0e0ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Jan 2021 12:37:23 -0500 Subject: blk-iocost: fix NULL iocg deref from racing against initialization When initializing iocost for a queue, its rqos should be registered before the blkcg policy is activated to allow policy data initiailization to lookup the associated ioc. This unfortunately means that the rqos methods can be called on bios before iocgs are attached to all existing blkgs. While the race is theoretically possible on ioc_rqos_throttle(), it mostly happened in ioc_rqos_merge() due to the difference in how they lookup ioc. The former determines it from the passed in @rqos and then bails before dereferencing iocg if the looked up ioc is disabled, which most likely is the case if initialization is still in progress. The latter looked up ioc by dereferencing the possibly NULL iocg making it a lot more prone to actually triggering the bug. * Make ioc_rqos_merge() use the same method as ioc_rqos_throttle() to look up ioc for consistency. * Make ioc_rqos_throttle() and ioc_rqos_merge() test for NULL iocg before dereferencing it. * Explain the danger of NULL iocgs in blk_iocost_init(). Signed-off-by: Tejun Heo Reported-by: Jonathan Lemon Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Jens Axboe --- block/blk-iocost.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index ac6078a34939..98d656bdb42b 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2551,8 +2551,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) bool use_debt, ioc_locked; unsigned long flags; - /* bypass IOs if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass IOs if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; /* calculate the absolute vtime cost */ @@ -2679,14 +2679,14 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); - struct ioc *ioc = iocg->ioc; + struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; u64 vtime, abs_cost, cost; unsigned long flags; - /* bypass if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; abs_cost = calc_vtime_cost(bio, iocg, true); @@ -2863,6 +2863,12 @@ static int blk_iocost_init(struct request_queue *q) ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); + /* + * rqos must be added before activation to allow iocg_pd_init() to + * lookup the ioc from q. This means that the rqos methods may get + * called before policy activation completion, can't assume that the + * target bio has an iocg associated and need to test for NULL iocg. + */ rq_qos_add(q, rqos); ret = blkcg_activate_policy(q, &blkcg_policy_iocost); if (ret) { -- cgit 1.4.1 From 6d4d273588378c65915acaf7b2ee74e9dd9c130a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 10 Dec 2020 10:44:33 +0100 Subject: bfq: Fix computation of shallow depth BFQ computes number of tags it allows to be allocated for each request type based on tag bitmap. However it uses 1 << bitmap.shift as number of available tags which is wrong. 'shift' is just an internal bitmap value containing logarithm of how many bits bitmap uses in each bitmap word. Thus number of tags allowed for some request types can be far to low. Use proper bitmap.depth which has the number of tags instead. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9e81d1052091..9e4eb0fc1c16 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6332,13 +6332,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); + bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6348,9 +6348,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) -- cgit 1.4.1 From 170b3bbda08852277b97f4f0516df0785c939764 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 5 Jan 2021 21:53:40 +0800 Subject: io_uring: Delete useless variable ‘id’ in io_prep_async_work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix follow warning: fs/io_uring.c:1523:22: warning: variable ‘id’ set but not used [-Wunused-but-set-variable] struct io_identity *id; ^~ Reported-by: Hulk Robot Signed-off-by: Ye Bin Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5bccb235271f..dc92ca5090a3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1523,10 +1523,8 @@ static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; - struct io_identity *id; io_req_init_async(req); - id = req->work.identity; if (req->flags & REQ_F_FORCE_ASYNC) req->work.flags |= IO_WQ_WORK_CONCURRENT; -- cgit 1.4.1 From aebf5db917055b38f4945ed6d621d9f07a44ff30 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 21 Dec 2020 12:33:35 +0800 Subject: block: fix use-after-free in disk_part_iter_next Make sure that bdgrab() is done on the 'block_device' instance before referring to it for avoiding use-after-free. Cc: Reported-by: syzbot+825f0f9657d4e528046e@syzkaller.appspotmail.com Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 73faec438e49..419548e92d82 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -246,15 +246,18 @@ struct block_device *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; + piter->part = bdgrab(part); + if (!piter->part) + continue; if (!bdev_nr_sectors(part) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && - piter->idx == 0)) + piter->idx == 0)) { + bdput(piter->part); + piter->part = NULL; continue; + } - piter->part = bdgrab(part); - if (!piter->part) - continue; piter->idx += inc; break; } -- cgit 1.4.1 From 6775ae901ffd130d0be9c32837f88d1f9d560189 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 22 Dec 2020 17:42:32 +0100 Subject: iommu/iova: fix 'domain' typos Replace misspelled 'doamin' with 'domain' in several comments. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201222164232.88795-1-sgarzare@redhat.com Signed-off-by: Will Deacon --- drivers/iommu/iova.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 4bb3293ae4d7..d20b8b333d30 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -358,7 +358,7 @@ static void private_free_iova(struct iova_domain *iovad, struct iova *iova) * @iovad: - iova domain in question. * @pfn: - page frame number * This function finds and returns an iova belonging to the - * given doamin which matches the given pfn. + * given domain which matches the given pfn. */ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { @@ -601,7 +601,7 @@ void queue_iova(struct iova_domain *iovad, EXPORT_SYMBOL_GPL(queue_iova); /** - * put_iova_domain - destroys the iova doamin + * put_iova_domain - destroys the iova domain * @iovad: - iova domain in question. * All the iova's in that domain are destroyed. */ @@ -712,9 +712,9 @@ EXPORT_SYMBOL_GPL(reserve_iova); /** * copy_reserved_iova - copies the reserved between domains - * @from: - source doamin from where to copy + * @from: - source domain from where to copy * @to: - destination domin where to copy - * This function copies reserved iova's from one doamin to + * This function copies reserved iova's from one domain to * other. */ void -- cgit 1.4.1 From ff2b46d7cff80d27d82f7f3252711f4ca1666129 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 5 Jan 2021 13:18:37 +0800 Subject: iommu/intel: Fix memleak in intel_irq_remapping_alloc When irq_domain_get_irq_data() or irqd_cfg() fails at i == 0, data allocated by kzalloc() has not been freed before returning, which leads to memleak. Fixes: b106ee63abcc ("irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical irqdomains") Signed-off-by: Dinghao Liu Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210105051837.32118-1-dinghao.liu@zju.edu.cn Signed-off-by: Will Deacon --- drivers/iommu/intel/irq_remapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index aeffda92b10b..685200a5cff0 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1353,6 +1353,8 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, irq_data = irq_domain_get_irq_data(domain, virq + i); irq_cfg = irqd_cfg(irq_data); if (!irq_data || !irq_cfg) { + if (!i) + kfree(data); ret = -EINVAL; goto out_free_data; } -- cgit 1.4.1 From 12bc4570c14e24e6244d66466aeda994f805634b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Jan 2021 01:32:51 +0000 Subject: iommu/amd: Set iommu->int_enabled consistently when interrupts are set up When I made the INTCAPXT support stop gratuitously pretending to be MSI, I missed the fact that iommu_setup_msi() also sets the ->int_enabled flag. I missed this in the iommu_setup_intcapxt() code path, which means that a resume from suspend will try to allocate the IRQ domains again, accidentally re-enabling interrupts as it does, resulting in much sadness. Lift out the bit which sets iommu->int_enabled into the iommu_init_irq() function which is also where it gets checked. Link: https://lore.kernel.org/r/20210104132250.GE32151@zn.tnic/ Fixes: d1adcfbb520c ("iommu/amd: Fix IOMMU interrupt generation in X2APIC mode") Reported-by: Borislav Petkov Signed-off-by: David Woodhouse Tested-by: Borislav Petkov Link: https://lore.kernel.org/r/50cd5f55be8ead0937ac315cd2f5b89364f6a9a5.camel@infradead.org Signed-off-by: Will Deacon --- drivers/iommu/amd/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f54cd79b43e4..6a1f7048dacc 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1973,8 +1973,6 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return r; } - iommu->int_enabled = true; - return 0; } @@ -2169,6 +2167,7 @@ static int iommu_init_irq(struct amd_iommu *iommu) if (ret) return ret; + iommu->int_enabled = true; enable_faults: iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); -- cgit 1.4.1 From b34f10c2dc5961021850c3c15f46a84b56a0c0e8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Jan 2021 01:36:13 +0000 Subject: iommu/amd: Stop irq_remapping_select() matching when remapping is disabled The AMD IOMMU initialisation registers the IRQ remapping domain for each IOMMU before doing the final sanity check that every I/OAPIC is covered. This means that the AMD irq_remapping_select() function gets invoked even when IRQ remapping has been disabled, eventually leading to a NULL pointer dereference in alloc_irq_table(). Unfortunately, the IVRS isn't fully parsed early enough that the sanity check can be done in time to registering the IRQ domain altogether. Doing that would be nice, but is a larger and more error-prone task. The simple fix is just for irq_remapping_select() to refuse to report a match when IRQ remapping has disabled. Link: https://lore.kernel.org/lkml/ed4be9b4-24ac-7128-c522-7ef359e8185d@gmx.at Fixes: a1a785b57242 ("iommu/amd: Implement select() method on remapping irqdomain") Reported-by: Johnathan Smithinovic Signed-off-by: David Woodhouse Link: https://lore.kernel.org/r/04bbe8bca87f81a3cfa93ec4299e53f47e00e5b3.camel@infradead.org Signed-off-by: Will Deacon --- drivers/iommu/amd/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7e2c445a1fae..f0adbc48fd17 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3854,6 +3854,9 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, struct amd_iommu *iommu; int devid = -1; + if (!amd_iommu_irq_remap) + return 0; + if (x86_fwspec_is_ioapic(fwspec)) devid = get_ioapic_devid(fwspec->param[0]); else if (x86_fwspec_is_hpet(fwspec)) -- cgit 1.4.1 From c2407cf7d22d0c0d94cf20342b3b8f06f1d904e7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Jan 2021 11:33:00 -0800 Subject: mm: make wait_on_page_writeback() wait for multiple pending writebacks Ever since commit 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic") we've had some very occasional reports of BUG_ON(PageWriteback) in write_cache_pages(), which we thought we already fixed in commit 073861ed77b6 ("mm: fix VM_BUG_ON(PageTail) and BUG_ON(PageWriteback)"). But syzbot just reported another one, even with that commit in place. And it turns out that there's a simpler way to trigger the BUG_ON() than the one Hugh found with page re-use. It all boils down to the fact that the page writeback is ostensibly serialized by the page lock, but that isn't actually really true. Yes, the people _setting_ writeback all do so under the page lock, but the actual clearing of the bit - and waking up any waiters - happens without any page lock. This gives us this fairly simple race condition: CPU1 = end previous writeback CPU2 = start new writeback under page lock CPU3 = write_cache_pages() CPU1 CPU2 CPU3 ---- ---- ---- end_page_writeback() test_clear_page_writeback(page) ... delayed... lock_page(); set_page_writeback() unlock_page() lock_page() wait_on_page_writeback(); wake_up_page(page, PG_writeback); .. wakes up CPU3 .. BUG_ON(PageWriteback(page)); where the BUG_ON() happens because we woke up the PG_writeback bit becasue of the _previous_ writeback, but a new one had already been started because the clearing of the bit wasn't actually atomic wrt the actual wakeup or serialized by the page lock. The reason this didn't use to happen was that the old logic in waiting on a page bit would just loop if it ever saw the bit set again. The nice proper fix would probably be to get rid of the whole "wait for writeback to clear, and then set it" logic in the writeback path, and replace it with an atomic "wait-to-set" (ie the same as we have for page locking: we set the page lock bit with a single "lock_page()", not with "wait for lock bit to clear and then set it"). However, out current model for writeback is that the waiting for the writeback bit is done by the generic VFS code (ie write_cache_pages()), but the actual setting of the writeback bit is done much later by the filesystem ".writepages()" function. IOW, to make the writeback bit have that same kind of "wait-to-set" behavior as we have for page locking, we'd have to change our roughly ~50 different writeback functions. Painful. Instead, just make "wait_on_page_writeback()" loop on the very unlikely situation that the PG_writeback bit is still set, basically re-instating the old behavior. This is very non-optimal in case of contention, but since we only ever set the bit under the page lock, that situation is controlled. Reported-by: syzbot+2fc0712f8f8b8b8fa0ef@syzkaller.appspotmail.com Fixes: 2a9127fcf229 ("mm: rewrite wait_on_page_bit_common() logic") Acked-by: Hugh Dickins Cc: Andrew Morton Cc: Matthew Wilcox Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 586042472ac9..eb34d204d4ee 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2826,7 +2826,7 @@ EXPORT_SYMBOL(__test_set_page_writeback); */ void wait_on_page_writeback(struct page *page) { - if (PageWriteback(page)) { + while (PageWriteback(page)) { trace_wait_on_page_writeback(page, page_mapping(page)); wait_on_page_bit(page, PG_writeback); } -- cgit 1.4.1 From 8a48c0a3360bf2bf4f40c980d0ec216e770e58ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 4 Jan 2021 19:44:53 -0800 Subject: arch/arc: add copy_user_page() to to fix build error on ARC fs/dax.c uses copy_user_page() but ARC does not provide that interface, resulting in a build error. Provide copy_user_page() in . ../fs/dax.c: In function 'copy_cow_page_dax': ../fs/dax.c:702:2: error: implicit declaration of function 'copy_user_page'; did you mean 'copy_to_user_page'? [-Werror=implicit-function-declaration] Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Dan Williams #Acked-by: Vineet Gupta # v1 Cc: Andrew Morton Cc: Matthew Wilcox Cc: Jan Kara Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org #Reviewed-by: Ira Weiny # v2 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 23e41e890eda..ad9b7fe4dba3 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) struct vm_area_struct; -- cgit 1.4.1 From f4d9359de8ac0fb64a5ecc9c34833705eb53327b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Nov 2020 13:22:10 -0800 Subject: include/soc: remove headers for EZChip NPS NPS platform has been removed from ARC port and there are no in-tree user of it now . So RIP ! Signed-off-by: Vineet Gupta --- include/soc/nps/common.h | 172 ----------------------------------------------- include/soc/nps/mtm.h | 59 ---------------- 2 files changed, 231 deletions(-) delete mode 100644 include/soc/nps/common.h delete mode 100644 include/soc/nps/mtm.h diff --git a/include/soc/nps/common.h b/include/soc/nps/common.h deleted file mode 100644 index 8c18dc6d3fde..000000000000 --- a/include/soc/nps/common.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SOC_NPS_COMMON_H -#define SOC_NPS_COMMON_H - -#ifdef CONFIG_SMP -#define NPS_IPI_IRQ 5 -#endif - -#define NPS_HOST_REG_BASE 0xF6000000 - -#define NPS_MSU_BLKID 0x018 - -#define CTOP_INST_RSPI_GIC_0_R12 0x3C56117E -#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST 0x5B60 -#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM 0x00010422 - -#ifndef AUX_IENABLE -#define AUX_IENABLE 0x40c -#endif - -#define CTOP_AUX_IACK (0xFFFFF800 + 0x088) - -#ifndef __ASSEMBLY__ - -/* In order to increase compilation test coverage */ -#ifdef CONFIG_ARC -static inline void nps_ack_gic(void) -{ - __asm__ __volatile__ ( - " .word %0\n" - : - : "i"(CTOP_INST_RSPI_GIC_0_R12) - : "memory"); -} -#else -static inline void nps_ack_gic(void) { } -#define write_aux_reg(r, v) -#define read_aux_reg(r) 0 -#endif - -/* CPU global ID */ -struct global_id { - union { - struct { -#ifdef CONFIG_EZNPS_MTM_EXT - u32 __reserved:20, cluster:4, core:4, thread:4; -#else - u32 __reserved:24, cluster:4, core:4; -#endif - }; - u32 value; - }; -}; - -/* - * Convert logical to physical CPU IDs - * - * The conversion swap bits 1 and 2 of cluster id (out of 4 bits) - * Now quad of logical clusters id's are adjacent physically, - * and not like the id's physically came with each cluster. - * Below table is 4x4 mesh of core clusters as it layout on chip. - * Cluster ids are in format: logical (physical) - * - * ----------------- ------------------ - * 3 | 5 (3) 7 (7) | | 13 (11) 15 (15)| - * - * 2 | 4 (2) 6 (6) | | 12 (10) 14 (14)| - * ----------------- ------------------ - * 1 | 1 (1) 3 (5) | | 9 (9) 11 (13)| - * - * 0 | 0 (0) 2 (4) | | 8 (8) 10 (12)| - * ----------------- ------------------ - * 0 1 2 3 - */ -static inline int nps_cluster_logic_to_phys(int cluster) -{ -#ifdef __arc__ - __asm__ __volatile__( - " mov r3,%0\n" - " .short %1\n" - " .word %2\n" - " mov %0,r3\n" - : "+r"(cluster) - : "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST), - "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM) - : "r3"); -#endif - - return cluster; -} - -#define NPS_CPU_TO_CLUSTER_NUM(cpu) \ - ({ struct global_id gid; gid.value = cpu; \ - nps_cluster_logic_to_phys(gid.cluster); }) - -struct nps_host_reg_address { - union { - struct { - u32 base:8, cl_x:4, cl_y:4, - blkid:6, reg:8, __reserved:2; - }; - u32 value; - }; -}; - -struct nps_host_reg_address_non_cl { - union { - struct { - u32 base:7, blkid:11, reg:12, __reserved:2; - }; - u32 value; - }; -}; - -static inline void *nps_host_reg_non_cl(u32 blkid, u32 reg) -{ - struct nps_host_reg_address_non_cl reg_address; - - reg_address.value = NPS_HOST_REG_BASE; - reg_address.blkid = blkid; - reg_address.reg = reg; - - return (void *)reg_address.value; -} - -static inline void *nps_host_reg(u32 cpu, u32 blkid, u32 reg) -{ - struct nps_host_reg_address reg_address; - u32 cl = NPS_CPU_TO_CLUSTER_NUM(cpu); - - reg_address.value = NPS_HOST_REG_BASE; - reg_address.cl_x = (cl >> 2) & 0x3; - reg_address.cl_y = cl & 0x3; - reg_address.blkid = blkid; - reg_address.reg = reg; - - return (void *)reg_address.value; -} -#endif /* __ASSEMBLY__ */ - -#endif /* SOC_NPS_COMMON_H */ diff --git a/include/soc/nps/mtm.h b/include/soc/nps/mtm.h deleted file mode 100644 index d2f5e7e3703e..000000000000 --- a/include/soc/nps/mtm.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SOC_NPS_MTM_H -#define SOC_NPS_MTM_H - -#define CTOP_INST_HWSCHD_OFF_R3 0x3B6F00BF -#define CTOP_INST_HWSCHD_RESTORE_R3 0x3E6F70C3 - -static inline void hw_schd_save(unsigned int *flags) -{ - __asm__ __volatile__( - " .word %1\n" - " st r3,[%0]\n" - : - : "r"(flags), "i"(CTOP_INST_HWSCHD_OFF_R3) - : "r3", "memory"); -} - -static inline void hw_schd_restore(unsigned int flags) -{ - __asm__ __volatile__( - " mov r3, %0\n" - " .word %1\n" - : - : "r"(flags), "i"(CTOP_INST_HWSCHD_RESTORE_R3) - : "r3"); -} - -#endif /* SOC_NPS_MTM_H */ -- cgit 1.4.1 From 2860d45a589818dd8ffd90cdc4bcf77f36a5a6be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:17 +0100 Subject: qed: select CONFIG_CRC32 Without this, the driver fails to link: lpc_eth.c:(.text+0x1934): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_grc_dump': qed_debug.c:(.text+0x4068): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_idle_chk_dump': qed_debug.c:(.text+0x51fc): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_mcp_trace_dump': qed_debug.c:(.text+0x6000): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o: in function `qed_dbg_reg_fifo_dump': qed_debug.c:(.text+0x66cc): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/ethernet/qlogic/qed/qed_debug.o:qed_debug.c:(.text+0x6aa4): more undefined references to `crc32_le' follow Fixes: 7a4b21b7d1f0 ("qed: Add nvram selftest") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 4366c7a8de95..6b5ddb07ee83 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -78,6 +78,7 @@ config QED depends on PCI select ZLIB_INFLATE select CRC8 + select CRC32 select NET_DEVLINK help This enables the support for Marvell FastLinQ adapters family. -- cgit 1.4.1 From f9d6f94132f01d2a552dcbab54fa56496638186d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:18 +0100 Subject: phy: dp83640: select CONFIG_CRC32 Without crc32, this driver fails to link: arm-linux-gnueabi-ld: drivers/net/phy/dp83640.o: in function `match': dp83640.c:(.text+0x476c): undefined reference to `crc32_le' Fixes: 539e44d26855 ("dp83640: Include hash in timestamp/packet matching") Signed-off-by: Arnd Bergmann Reviewed-by: Andrew Lunn Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 476d7c7fe70a..d2bf05ccbbe2 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -64,6 +64,7 @@ config DP83640_PHY depends on NETWORK_PHY_TIMESTAMPING depends on PHYLIB depends on PTP_1588_CLOCK + select CRC32 help Supports the DP83640 PHYTER with IEEE 1588 features. -- cgit 1.4.1 From 1d48595c786b1b9dc6be301e8d7f6fc74e9882aa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:19 +0100 Subject: can: kvaser_pciefd: select CONFIG_CRC32 Without crc32, this driver fails to link: arm-linux-gnueabi-ld: drivers/net/can/kvaser_pciefd.o: in function `kvaser_pciefd_probe': kvaser_pciefd.c:(.text+0x2b0): undefined reference to `crc32_be' Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Signed-off-by: Arnd Bergmann Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- drivers/net/can/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 424970939fd4..1c28eade6bec 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -123,6 +123,7 @@ config CAN_JANZ_ICAN3 config CAN_KVASER_PCIEFD depends on PCI tristate "Kvaser PCIe FD cards" + select CRC32 help This is a driver for the Kvaser PCI Express CAN FD family. -- cgit 1.4.1 From e186620d7bf11b274b985b839c38266d7918cc05 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:20 +0100 Subject: wil6210: select CONFIG_CRC32 Without crc32, the driver fails to link: arm-linux-gnueabi-ld: drivers/net/wireless/ath/wil6210/fw.o: in function `wil_fw_verify': fw.c:(.text+0x74c): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/net/wireless/ath/wil6210/fw.o:fw.c:(.text+0x758): more undefined references to `crc32_le' follow Fixes: 151a9706503f ("wil6210: firmware download") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/wireless/ath/wil6210/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/wil6210/Kconfig b/drivers/net/wireless/ath/wil6210/Kconfig index 6a95b199bf62..f074e9c31aa2 100644 --- a/drivers/net/wireless/ath/wil6210/Kconfig +++ b/drivers/net/wireless/ath/wil6210/Kconfig @@ -2,6 +2,7 @@ config WIL6210 tristate "Wilocity 60g WiFi card wil6210 support" select WANT_DEV_COREDUMP + select CRC32 depends on CFG80211 depends on PCI default n -- cgit 1.4.1 From 152a8a6c017bfdeda7f6d052fbc6e151891bd9b6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:21 +0100 Subject: cfg80211: select CONFIG_CRC32 Without crc32 support, this fails to link: arm-linux-gnueabi-ld: net/wireless/scan.o: in function `cfg80211_scan_6ghz': scan.c:(.text+0x928): undefined reference to `crc32_le' Fixes: c8cb5b854b40 ("nl80211/cfg80211: support 6 GHz scanning") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/wireless/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 27026f587fa6..f620acd2a0f5 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -21,6 +21,7 @@ config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL select FW_LOADER + select CRC32 # may need to update this when certificates are changed and are # using a different algorithm, though right now they shouldn't # (this is here rather than below to allow it to be a module) -- cgit 1.4.1 From 51049bd903a81307f751babe15a1df8d197884e8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:22 +0100 Subject: misdn: dsp: select CONFIG_BITREVERSE Without this, we run into a link error arm-linux-gnueabi-ld: drivers/isdn/mISDN/dsp_audio.o: in function `dsp_audio_generate_law_tables': (.text+0x30c): undefined reference to `byte_rev_table' arm-linux-gnueabi-ld: drivers/isdn/mISDN/dsp_audio.o:(.text+0x5e4): more undefined references to `byte_rev_table' follow Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/isdn/mISDN/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/isdn/mISDN/Kconfig b/drivers/isdn/mISDN/Kconfig index 26cf0ac9c4ad..c9a53c222472 100644 --- a/drivers/isdn/mISDN/Kconfig +++ b/drivers/isdn/mISDN/Kconfig @@ -13,6 +13,7 @@ if MISDN != n config MISDN_DSP tristate "Digital Audio Processing of transparent data" depends on MISDN + select BITREVERSE help Enable support for digital audio processing capability. -- cgit 1.4.1 From 69931e11288520c250152180ecf9b6ac5e6e40ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:36:23 +0100 Subject: wan: ds26522: select CONFIG_BITREVERSE Without this, the driver runs into a link failure arm-linux-gnueabi-ld: drivers/net/wan/slic_ds26522.o: in function `slic_ds26522_probe': slic_ds26522.c:(.text+0x100c): undefined reference to `byte_rev_table' arm-linux-gnueabi-ld: slic_ds26522.c:(.text+0x1cdc): undefined reference to `byte_rev_table' arm-linux-gnueabi-ld: drivers/net/wan/slic_ds26522.o: in function `slic_write': slic_ds26522.c:(.text+0x1e4c): undefined reference to `byte_rev_table' Fixes: c37d4a0085c5 ("Maxim/driver: Add driver for maxim ds26522") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/wan/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 4029fde71a9e..83c9481995dd 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -282,6 +282,7 @@ config SLIC_DS26522 tristate "Slic Maxim ds26522 card support" depends on SPI depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST + select BITREVERSE help This module initializes and configures the slic maxim card in T1 or E1 mode. -- cgit 1.4.1 From 0f7ba7bc46fa0b574ccacf5672991b321e028492 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Jan 2021 11:26:26 +1100 Subject: net/sonic: Fix some resource leaks in error handling paths A call to dma_alloc_coherent() is wrapped by sonic_alloc_descriptors(). This is correctly freed in the remove function, but not in the error handling path of the probe function. Fix this by adding the missing dma_free_coherent() call. While at it, rename a label in order to be slightly more informative. Cc: Christophe JAILLET Cc: Thomas Bogendoerfer Cc: Chris Zankel References: commit 10e3cc180e64 ("net/sonic: Fix a resource leak in an error handling path in 'jazz_sonic_probe()'") Fixes: 74f2a5f0ef64 ("xtensa: Add support for the Sonic Ethernet device for the XT2000 board.") Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Signed-off-by: Christophe JAILLET Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/macsonic.c | 12 ++++++++++-- drivers/net/ethernet/natsemi/xtsonic.c | 7 +++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index 776b7d264dc3..2289e1fe3741 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -506,10 +506,14 @@ static int mac_sonic_platform_probe(struct platform_device *pdev) err = register_netdev(dev); if (err) - goto out; + goto undo_probe; return 0; +undo_probe: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); out: free_netdev(dev); @@ -584,12 +588,16 @@ static int mac_sonic_nubus_probe(struct nubus_board *board) err = register_netdev(ndev); if (err) - goto out; + goto undo_probe; nubus_set_drvdata(board, ndev); return 0; +undo_probe: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); out: free_netdev(ndev); return err; diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index afa166ff7aef..28d9e98db81a 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -229,11 +229,14 @@ int xtsonic_probe(struct platform_device *pdev) sonic_msg_init(dev); if ((err = register_netdev(dev))) - goto out1; + goto undo_probe1; return 0; -out1: +undo_probe1: + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); release_region(dev->base_addr, SONIC_MEM_SIZE); out: free_netdev(dev); -- cgit 1.4.1 From cf0720697143f3eaa0779cca5a6602d8557d1c6f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Dec 2020 19:37:53 -0800 Subject: net: suggest L2 discards be counted towards rx_dropped From the existing definitions it's unclear which stat to use to report filtering based on L2 dst addr in old broadcast-medium Ethernet. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 874cc12a34d9..82708c6db432 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -75,8 +75,9 @@ struct rtnl_link_stats { * * @rx_dropped: Number of packets received but not processed, * e.g. due to lack of resources or unsupported protocol. - * For hardware interfaces this counter should not include packets - * dropped by the device which are counted separately in + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in * @rx_missed_errors (since procfs folds those two counters together). * * @tx_dropped: Number of packets dropped on their way to transmission, -- cgit 1.4.1 From 55b7ab1178cbf41f979ff83236d3321ad35ed2ad Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 30 Dec 2020 19:40:27 -0800 Subject: net: vlan: avoid leaks on register_vlan_dev() failures VLAN checks for NETREG_UNINITIALIZED to distinguish between registration failure and unregistration in progress. Since commit cb626bf566eb ("net-sysfs: Fix reference count leak") registration failure may, however, result in NETREG_UNREGISTERED as well as NETREG_UNINITIALIZED. This fix is similer to cebb69754f37 ("rtnetlink: Fix memory(net_device) leak when ->newlink fails") Fixes: cb626bf566eb ("net-sysfs: Fix reference count leak") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index f292e0267bb9..15bbfaf943fd 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -284,7 +284,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - if (new_dev->reg_state == NETREG_UNINITIALIZED) + if (new_dev->reg_state == NETREG_UNINITIALIZED || + new_dev->reg_state == NETREG_UNREGISTERED) free_netdev(new_dev); return err; } -- cgit 1.4.1 From 7eeecc4b1f480c7ba1932cb9a7693f8c452640f2 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:41 -0600 Subject: net: stmmac: dwmac-sun8i: Fix probe error handling stmmac_pltfr_remove does three things in one function, making it inapproprate for unwinding the steps in the probe function. Currently, a failure before the call to stmmac_dvr_probe would leak OF node references due to missing a call to stmmac_remove_config_dt. And an error in stmmac_dvr_probe would cause the driver to attempt to remove a netdevice that was never added. Fix these by reordering the init and splitting out the error handling steps. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Fixes: 40a1dcee2d18 ("net: ethernet: dwmac-sun8i: Use the correct function in exit path") Signed-off-by: Samuel Holland Reviewed-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 25 ++++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 58e0511badba..b20f261fce5b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1134,10 +1134,6 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); - if (IS_ERR(plat_dat)) - return PTR_ERR(plat_dat); - gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL); if (!gmac) return -ENOMEM; @@ -1201,11 +1197,15 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = of_get_phy_mode(dev->of_node, &interface); if (ret) return -EINVAL; - plat_dat->interface = interface; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. */ + plat_dat->interface = interface; plat_dat->rx_coe = STMMAC_RX_COE_TYPE2; plat_dat->tx_coe = 1; plat_dat->has_sun8i = true; @@ -1216,7 +1216,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); if (ret) - return ret; + goto dwmac_deconfig; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) @@ -1230,7 +1230,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) if (gmac->variant->soc_has_internal_phy) { ret = get_ephy_nodes(priv); if (ret) - goto dwmac_exit; + goto dwmac_remove; ret = sun8i_dwmac_register_mdio_mux(priv); if (ret) { dev_err(&pdev->dev, "Failed to register mux\n"); @@ -1239,15 +1239,20 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) } else { ret = sun8i_dwmac_reset(priv); if (ret) - goto dwmac_exit; + goto dwmac_remove; } return ret; dwmac_mux: sun8i_dwmac_unset_syscon(gmac); +dwmac_remove: + stmmac_dvr_remove(&pdev->dev); dwmac_exit: - stmmac_pltfr_remove(pdev); -return ret; + sun8i_dwmac_exit(pdev, gmac); +dwmac_deconfig: + stmmac_remove_config_dt(pdev, plat_dat); + + return ret; } static const struct of_device_id sun8i_dwmac_match[] = { -- cgit 1.4.1 From 529254216773acd5039c07aa18cf06fd1f9fccdd Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:42 -0600 Subject: net: stmmac: dwmac-sun8i: Balance internal PHY resource references While stmmac_pltfr_remove calls sun8i_dwmac_exit, the sun8i_dwmac_init and sun8i_dwmac_exit functions are also called by the stmmac_platform suspend/resume callbacks. They may be called many times during the device's lifetime and should not release resources used by the driver. Furthermore, there was no error handling in case registering the MDIO mux failed during probe, and the EPHY clock was never released at all. Fix all of these issues by moving the deinitialization code to a driver removal callback. Also ensure the EPHY is powered down before removal. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Samuel Holland Reviewed-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 27 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index b20f261fce5b..a05dee5d4584 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1004,17 +1004,12 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) struct sunxi_priv_data *gmac = priv; if (gmac->variant->soc_has_internal_phy) { - /* sun8i_dwmac_exit could be called with mdiomux uninit */ - if (gmac->mux_handle) - mdio_mux_uninit(gmac->mux_handle); if (gmac->internal_phy_powered) sun8i_dwmac_unpower_internal_phy(gmac); } sun8i_dwmac_unset_syscon(gmac); - reset_control_put(gmac->rst_ephy); - clk_disable_unprepare(gmac->tx_clk); if (gmac->regulator) @@ -1244,6 +1239,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) return ret; dwmac_mux: + reset_control_put(gmac->rst_ephy); + clk_put(gmac->ephy_clk); sun8i_dwmac_unset_syscon(gmac); dwmac_remove: stmmac_dvr_remove(&pdev->dev); @@ -1255,6 +1252,24 @@ dwmac_deconfig: return ret; } +static int sun8i_dwmac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct sunxi_priv_data *gmac = priv->plat->bsp_priv; + + if (gmac->variant->soc_has_internal_phy) { + mdio_mux_uninit(gmac->mux_handle); + sun8i_dwmac_unpower_internal_phy(gmac); + reset_control_put(gmac->rst_ephy); + clk_put(gmac->ephy_clk); + } + + stmmac_pltfr_remove(pdev); + + return 0; +} + static const struct of_device_id sun8i_dwmac_match[] = { { .compatible = "allwinner,sun8i-h3-emac", .data = &emac_variant_h3 }, @@ -1274,7 +1289,7 @@ MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); static struct platform_driver sun8i_dwmac_driver = { .probe = sun8i_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove = sun8i_dwmac_remove, .driver = { .name = "dwmac-sun8i", .pm = &stmmac_pltfr_pm_ops, -- cgit 1.4.1 From b8239638853e3e37b287e4bd4d57b41f14c78550 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:43 -0600 Subject: net: stmmac: dwmac-sun8i: Balance internal PHY power sun8i_dwmac_exit calls sun8i_dwmac_unpower_internal_phy, but sun8i_dwmac_init did not call sun8i_dwmac_power_internal_phy. This caused PHY power to remain off after a suspend/resume cycle. Fix this by recording if PHY power should be restored, and if so, restoring it. Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Samuel Holland Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 31 +++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index a05dee5d4584..e2c25c1c702a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -64,6 +64,7 @@ struct emac_variant { * @variant: reference to the current board variant * @regmap: regmap for using the syscon * @internal_phy_powered: Does the internal PHY is enabled + * @use_internal_phy: Is the internal PHY selected for use * @mux_handle: Internal pointer used by mdio-mux lib */ struct sunxi_priv_data { @@ -74,6 +75,7 @@ struct sunxi_priv_data { const struct emac_variant *variant; struct regmap_field *regmap_field; bool internal_phy_powered; + bool use_internal_phy; void *mux_handle; }; @@ -539,8 +541,11 @@ static const struct stmmac_dma_ops sun8i_dwmac_dma_ops = { .dma_interrupt = sun8i_dwmac_dma_interrupt, }; +static int sun8i_dwmac_power_internal_phy(struct stmmac_priv *priv); + static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) { + struct net_device *ndev = platform_get_drvdata(pdev); struct sunxi_priv_data *gmac = priv; int ret; @@ -554,13 +559,25 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv) ret = clk_prepare_enable(gmac->tx_clk); if (ret) { - if (gmac->regulator) - regulator_disable(gmac->regulator); dev_err(&pdev->dev, "Could not enable AHB clock\n"); - return ret; + goto err_disable_regulator; + } + + if (gmac->use_internal_phy) { + ret = sun8i_dwmac_power_internal_phy(netdev_priv(ndev)); + if (ret) + goto err_disable_clk; } return 0; + +err_disable_clk: + clk_disable_unprepare(gmac->tx_clk); +err_disable_regulator: + if (gmac->regulator) + regulator_disable(gmac->regulator); + + return ret; } static void sun8i_dwmac_core_init(struct mac_device_info *hw, @@ -831,7 +848,6 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, struct sunxi_priv_data *gmac = priv->plat->bsp_priv; u32 reg, val; int ret = 0; - bool need_power_ephy = false; if (current_child ^ desired_child) { regmap_field_read(gmac->regmap_field, ®); @@ -839,13 +855,12 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, case DWMAC_SUN8I_MDIO_MUX_INTERNAL_ID: dev_info(priv->device, "Switch mux to internal PHY"); val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SELECT; - - need_power_ephy = true; + gmac->use_internal_phy = true; break; case DWMAC_SUN8I_MDIO_MUX_EXTERNAL_ID: dev_info(priv->device, "Switch mux to external PHY"); val = (reg & ~H3_EPHY_MUX_MASK) | H3_EPHY_SHUTDOWN; - need_power_ephy = false; + gmac->use_internal_phy = false; break; default: dev_err(priv->device, "Invalid child ID %x\n", @@ -853,7 +868,7 @@ static int mdio_mux_syscon_switch_fn(int current_child, int desired_child, return -EINVAL; } regmap_field_write(gmac->regmap_field, val); - if (need_power_ephy) { + if (gmac->use_internal_phy) { ret = sun8i_dwmac_power_internal_phy(priv); if (ret) return ret; -- cgit 1.4.1 From 9b1e39cf5dd81f33186cdb950fcf75a121f1a9a7 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 3 Jan 2021 05:17:44 -0600 Subject: net: stmmac: dwmac-sun8i: Balance syscon (de)initialization Previously, sun8i_dwmac_set_syscon was called from a chain of functions in several different files: sun8i_dwmac_probe stmmac_dvr_probe stmmac_hw_init stmmac_hwif_init sun8i_dwmac_setup sun8i_dwmac_set_syscon which made the lifetime of the syscon values hard to reason about. Part of the problem is that there is no similar platform driver callback from stmmac_dvr_remove. As a result, the driver unset the syscon value in sun8i_dwmac_exit, but this leaves it uninitialized after a suspend/ resume cycle. It was also unset a second time (outside sun8i_dwmac_exit) in the probe error path. Move the init to the earliest available place in sun8i_dwmac_probe (after stmmac_probe_config_dt, which initializes plat_dat), and the deinit to the corresponding position in the cleanup order. Since priv is not filled in until stmmac_dvr_probe, this requires changing the sun8i_dwmac_set_syscon parameters to priv's two relevant members. Fixes: 9f93ac8d4085 ("net-next: stmmac: Add dwmac-sun8i") Fixes: 634db83b8265 ("net: stmmac: dwmac-sun8i: Handle integrated/external MDIOs") Signed-off-by: Samuel Holland Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 50 +++++++++++------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index e2c25c1c702a..a5e0eff4a387 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -898,22 +898,23 @@ static int sun8i_dwmac_register_mdio_mux(struct stmmac_priv *priv) return ret; } -static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) +static int sun8i_dwmac_set_syscon(struct device *dev, + struct plat_stmmacenet_data *plat) { - struct sunxi_priv_data *gmac = priv->plat->bsp_priv; - struct device_node *node = priv->device->of_node; + struct sunxi_priv_data *gmac = plat->bsp_priv; + struct device_node *node = dev->of_node; int ret; u32 reg, val; ret = regmap_field_read(gmac->regmap_field, &val); if (ret) { - dev_err(priv->device, "Fail to read from regmap field.\n"); + dev_err(dev, "Fail to read from regmap field.\n"); return ret; } reg = gmac->variant->default_syscon_value; if (reg != val) - dev_warn(priv->device, + dev_warn(dev, "Current syscon value is not the default %x (expect %x)\n", val, reg); @@ -926,9 +927,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) /* Force EPHY xtal frequency to 24MHz. */ reg |= H3_EPHY_CLK_SEL; - ret = of_mdio_parse_addr(priv->device, priv->plat->phy_node); + ret = of_mdio_parse_addr(dev, plat->phy_node); if (ret < 0) { - dev_err(priv->device, "Could not parse MDIO addr\n"); + dev_err(dev, "Could not parse MDIO addr\n"); return ret; } /* of_mdio_parse_addr returns a valid (0 ~ 31) PHY @@ -944,17 +945,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { if (val % 100) { - dev_err(priv->device, "tx-delay must be a multiple of 100\n"); + dev_err(dev, "tx-delay must be a multiple of 100\n"); return -EINVAL; } val /= 100; - dev_dbg(priv->device, "set tx-delay to %x\n", val); + dev_dbg(dev, "set tx-delay to %x\n", val); if (val <= gmac->variant->tx_delay_max) { reg &= ~(gmac->variant->tx_delay_max << SYSCON_ETXDC_SHIFT); reg |= (val << SYSCON_ETXDC_SHIFT); } else { - dev_err(priv->device, "Invalid TX clock delay: %d\n", + dev_err(dev, "Invalid TX clock delay: %d\n", val); return -EINVAL; } @@ -962,17 +963,17 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (!of_property_read_u32(node, "allwinner,rx-delay-ps", &val)) { if (val % 100) { - dev_err(priv->device, "rx-delay must be a multiple of 100\n"); + dev_err(dev, "rx-delay must be a multiple of 100\n"); return -EINVAL; } val /= 100; - dev_dbg(priv->device, "set rx-delay to %x\n", val); + dev_dbg(dev, "set rx-delay to %x\n", val); if (val <= gmac->variant->rx_delay_max) { reg &= ~(gmac->variant->rx_delay_max << SYSCON_ERXDC_SHIFT); reg |= (val << SYSCON_ERXDC_SHIFT); } else { - dev_err(priv->device, "Invalid RX clock delay: %d\n", + dev_err(dev, "Invalid RX clock delay: %d\n", val); return -EINVAL; } @@ -983,7 +984,7 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) if (gmac->variant->support_rmii) reg &= ~SYSCON_RMII_EN; - switch (priv->plat->interface) { + switch (plat->interface) { case PHY_INTERFACE_MODE_MII: /* default */ break; @@ -997,8 +998,8 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) reg |= SYSCON_RMII_EN | SYSCON_ETCS_EXT_GMII; break; default: - dev_err(priv->device, "Unsupported interface mode: %s", - phy_modes(priv->plat->interface)); + dev_err(dev, "Unsupported interface mode: %s", + phy_modes(plat->interface)); return -EINVAL; } @@ -1023,8 +1024,6 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) sun8i_dwmac_unpower_internal_phy(gmac); } - sun8i_dwmac_unset_syscon(gmac); - clk_disable_unprepare(gmac->tx_clk); if (gmac->regulator) @@ -1059,16 +1058,11 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) { struct mac_device_info *mac; struct stmmac_priv *priv = ppriv; - int ret; mac = devm_kzalloc(priv->device, sizeof(*mac), GFP_KERNEL); if (!mac) return NULL; - ret = sun8i_dwmac_set_syscon(priv); - if (ret) - return NULL; - mac->pcsr = priv->ioaddr; mac->mac = &sun8i_dwmac_ops; mac->dma = &sun8i_dwmac_dma_ops; @@ -1224,10 +1218,14 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) plat_dat->exit = sun8i_dwmac_exit; plat_dat->setup = sun8i_dwmac_setup; - ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); + ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat); if (ret) goto dwmac_deconfig; + ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv); + if (ret) + goto dwmac_syscon; + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) goto dwmac_exit; @@ -1256,11 +1254,12 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) dwmac_mux: reset_control_put(gmac->rst_ephy); clk_put(gmac->ephy_clk); - sun8i_dwmac_unset_syscon(gmac); dwmac_remove: stmmac_dvr_remove(&pdev->dev); dwmac_exit: sun8i_dwmac_exit(pdev, gmac); +dwmac_syscon: + sun8i_dwmac_unset_syscon(gmac); dwmac_deconfig: stmmac_remove_config_dt(pdev, plat_dat); @@ -1281,6 +1280,7 @@ static int sun8i_dwmac_remove(struct platform_device *pdev) } stmmac_pltfr_remove(pdev); + sun8i_dwmac_unset_syscon(gmac); return 0; } -- cgit 1.4.1 From 9f9d41f03bb07069e6e83ff4720cfea74a63898d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 4 Jan 2021 17:22:24 -0800 Subject: docs: net: fix documentation on .ndo_get_stats Fix calling context. Signed-off-by: Jakub Kicinski Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- Documentation/networking/netdevices.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index 5a85fcc80c76..e65665c5ab50 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -64,8 +64,8 @@ ndo_do_ioctl: Context: process ndo_get_stats: - Synchronization: dev_base_lock rwlock. - Context: nominally process, but don't sleep inside an rwlock + Synchronization: rtnl_lock() semaphore, dev_base_lock rwlock, or RCU. + Context: atomic (can't sleep under rwlock or RCU) ndo_start_xmit: Synchronization: __netif_tx_lock spinlock. -- cgit 1.4.1 From f04bbcbf1e38d192e94bbfa126731a52332c40b1 Mon Sep 17 00:00:00 2001 From: Yonglong Liu Date: Tue, 5 Jan 2021 11:37:26 +0800 Subject: net: hns3: fix a phy loopback fail issue When phy driver does not implement the set_loopback interface, phy loopback test will return -EOPNOTSUPP, and the loopback test will fail. So when phy driver does not implement the set_loopback interface, don't do phy loopback test. Fixes: c9765a89d142 ("net: hns3: add phy selftest function") Signed-off-by: Yonglong Liu Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e6f37f91c489..135bd0a0dcaf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -752,7 +752,8 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset) handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK; handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK; - if (hdev->hw.mac.phydev) { + if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv && + hdev->hw.mac.phydev->drv->set_loopback) { count += 1; handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK; } -- cgit 1.4.1 From 65e61e3c2a619c4d4b873885b2d5394025ed117b Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Tue, 5 Jan 2021 11:37:27 +0800 Subject: net: hns3: fix the number of queues actually used by ARQ HCLGE_MBX_MAX_ARQ_MSG_NUM is used to apply memory for the number of queues used by ARQ(Asynchronous Receive Queue), so the head and tail pointers should also use this macro. Fixes: 07a0556a3a73 ("net: hns3: Changes to support ARQ(Asynchronous Receive Queue)") Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index fb5e8842983c..33defa4c180a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -169,7 +169,7 @@ struct hclgevf_mbx_arq_ring { #define hclge_mbx_ring_ptr_move_crq(crq) \ (crq->next_to_use = (crq->next_to_use + 1) % crq->desc_num) #define hclge_mbx_tail_ptr_move_arq(arq) \ - (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) + (arq.tail = (arq.tail + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #define hclge_mbx_head_ptr_move_arq(arq) \ - (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_SIZE) + (arq.head = (arq.head + 1) % HCLGE_MBX_MAX_ARQ_MSG_NUM) #endif -- cgit 1.4.1 From ab6e32d2913a594bc8f822ce4a75c400190b2ecc Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Tue, 5 Jan 2021 11:37:28 +0800 Subject: net: hns3: fix incorrect handling of sctp6 rss tuple For DEVICE_VERSION_V2, the hardware only supports src-ip, dst-ip and verification-tag for rss tuple set of sctp6 packet. For DEVICE_VERSION_V3, the hardware supports src-port and dst-port as well. Currently, when user queries the sctp6 rss tuples info, some unsupported information will be showed on V2. So add a check for hardware version when initializing and queries sctp6 rss tuple to fix this issue. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 ++ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 9 ++++++--- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 ++ 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 135bd0a0dcaf..c242883fea5d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4538,8 +4538,8 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, req->ipv4_sctp_en = tuple_sets; break; case SCTP_V6_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 && + (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3))) return -EINVAL; req->ipv6_sctp_en = tuple_sets; @@ -4731,6 +4731,8 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev) vport[i].rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; vport[i].rss_tuple_sets.ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT : HCLGE_RSS_INPUT_TUPLE_SCTP; vport[i].rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 50a294dfaff5..ca46bc9110d7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -107,6 +107,8 @@ #define HCLGE_D_IP_BIT BIT(2) #define HCLGE_S_IP_BIT BIT(3) #define HCLGE_V_TAG_BIT BIT(4) +#define HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT \ + (HCLGE_D_IP_BIT | HCLGE_S_IP_BIT | HCLGE_V_TAG_BIT) #define HCLGE_RSS_TC_SIZE_0 1 #define HCLGE_RSS_TC_SIZE_1 2 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 145757cb70f9..674b3a22e91f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -917,8 +917,8 @@ static int hclgevf_set_rss_tuple(struct hnae3_handle *handle, req->ipv4_sctp_en = tuple_sets; break; case SCTP_V6_FLOW: - if ((nfc->data & RXH_L4_B_0_1) || - (nfc->data & RXH_L4_B_2_3)) + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 && + (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3))) return -EINVAL; req->ipv6_sctp_en = tuple_sets; @@ -2502,7 +2502,10 @@ static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) tuple_sets->ipv4_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; tuple_sets->ipv6_tcp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; tuple_sets->ipv6_udp_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; - tuple_sets->ipv6_sctp_en = HCLGEVF_RSS_INPUT_TUPLE_SCTP; + tuple_sets->ipv6_sctp_en = + hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ? + HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT : + HCLGEVF_RSS_INPUT_TUPLE_SCTP; tuple_sets->ipv6_fragment_en = HCLGEVF_RSS_INPUT_TUPLE_OTHER; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 1b183bc35604..f6d817a3edcb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -122,6 +122,8 @@ #define HCLGEVF_D_IP_BIT BIT(2) #define HCLGEVF_S_IP_BIT BIT(3) #define HCLGEVF_V_TAG_BIT BIT(4) +#define HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT \ + (HCLGEVF_D_IP_BIT | HCLGEVF_S_IP_BIT | HCLGEVF_V_TAG_BIT) #define HCLGEVF_STATS_TIMER_INTERVAL 36U -- cgit 1.4.1 From 7a68d725e4ea384977445e0bcaed3d7de83ab5b3 Mon Sep 17 00:00:00 2001 From: "Jouni K. Seppänen" Date: Tue, 5 Jan 2021 06:52:49 +0200 Subject: net: cdc_ncm: correct overhead in delayed_ndp_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligning to tx_ndp_modulus is not sufficient because the next align call can be cdc_ncm_align_tail, which can add up to ctx->tx_modulus + ctx->tx_remainder - 1 bytes. This used to lead to occasional crashes on a Huawei 909s-120 LTE module as follows: - the condition marked /* if there is a remaining skb [...] */ is true so the swaps happen - skb_out is set from ctx->tx_curr_skb - skb_out->len is exactly 0x3f52 - ctx->tx_curr_size is 0x4000 and delayed_ndp_size is 0xac (note that the sum of skb_out->len and delayed_ndp_size is 0x3ffe) - the for loop over n is executed once - the cdc_ncm_align_tail call marked /* align beginning of next frame */ increases skb_out->len to 0x3f56 (the sum is now 0x4002) - the condition marked /* check if we had enough room left [...] */ is false so we break out of the loop - the condition marked /* If requested, put NDP at end of frame. */ is true so the NDP is written into skb_out - now skb_out->len is 0x4002, so padding_count is minus two interpreted as an unsigned number, which is used as the length argument to memset, leading to a crash with various symptoms but usually including > Call Trace: > > cdc_ncm_fill_tx_frame+0x83a/0x970 [cdc_ncm] > cdc_mbim_tx_fixup+0x1d9/0x240 [cdc_mbim] > usbnet_start_xmit+0x5d/0x720 [usbnet] The cdc_ncm_align_tail call first aligns on a ctx->tx_modulus boundary (adding at most ctx->tx_modulus-1 bytes), then adds ctx->tx_remainder bytes. Alternatively, the next alignment call can occur in cdc_ncm_ndp16 or cdc_ncm_ndp32, in which case at most ctx->tx_ndp_modulus-1 bytes are added. A similar problem has occurred before, and the code is nontrivial to reason about, so add a guard before the crashing call. By that time it is too late to prevent any memory corruption (we'll have written past the end of the buffer already) but we can at least try to get a warning written into an on-disk log by avoiding the hard crash caused by padding past the buffer with a huge number of zeros. Signed-off-by: Jouni K. Seppänen Fixes: 4a0e3e989d66 ("cdc_ncm: Add support for moving NDP to end of NCM frame") Link: https://bugzilla.kernel.org/show_bug.cgi?id=209407 Reported-by: kernel test robot Reviewed-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 3b816a4731f2..5a78848db93f 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1199,7 +1199,10 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) * accordingly. Otherwise, we should check here. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) - delayed_ndp_size = ALIGN(ctx->max_ndp_size, ctx->tx_ndp_modulus); + delayed_ndp_size = ctx->max_ndp_size + + max_t(u32, + ctx->tx_ndp_modulus, + ctx->tx_modulus + ctx->tx_remainder) - 1; else delayed_ndp_size = 0; @@ -1410,7 +1413,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && skb_out->len > ctx->min_tx_pkt) { padding_count = ctx->tx_curr_size - skb_out->len; - skb_put_zero(skb_out, padding_count); + if (!WARN_ON(padding_count > ctx->tx_curr_size)) + skb_put_zero(skb_out, padding_count); } else if (skb_out->len < ctx->tx_curr_size && (skb_out->len % dev->maxpacket) == 0) { skb_put_u8(skb_out, 0); /* force short packet */ -- cgit 1.4.1 From 4beb17e553b49c3dd74505c9f361e756aaae653e Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Tue, 5 Jan 2021 13:57:54 +0800 Subject: net: qrtr: fix null-ptr-deref in qrtr_ns_remove A null-ptr-deref bug is reported by Hulk Robot like this: -------------- KASAN: null-ptr-deref in range [0x0000000000000128-0x000000000000012f] Call Trace: qrtr_ns_remove+0x22/0x40 [ns] qrtr_proto_fini+0xa/0x31 [qrtr] __x64_sys_delete_module+0x337/0x4e0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x468ded -------------- When qrtr_ns_init fails in qrtr_proto_init, qrtr_ns_remove which would be called later on would raise a null-ptr-deref because qrtr_ns.workqueue has been destroyed. Fix it by making qrtr_ns_init have a return value and adding a check in qrtr_proto_init. Reported-by: Hulk Robot Signed-off-by: Qinglang Miao Signed-off-by: David S. Miller --- net/qrtr/ns.c | 7 ++++--- net/qrtr/qrtr.c | 16 +++++++++++----- net/qrtr/qrtr.h | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 56aaf8cb6527..8d00dfe8139e 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -755,7 +755,7 @@ static void qrtr_ns_data_ready(struct sock *sk) queue_work(qrtr_ns.workqueue, &qrtr_ns.work); } -void qrtr_ns_init(void) +int qrtr_ns_init(void) { struct sockaddr_qrtr sq; int ret; @@ -766,7 +766,7 @@ void qrtr_ns_init(void) ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM, PF_QIPCRTR, &qrtr_ns.sock); if (ret < 0) - return; + return ret; ret = kernel_getsockname(qrtr_ns.sock, (struct sockaddr *)&sq); if (ret < 0) { @@ -797,12 +797,13 @@ void qrtr_ns_init(void) if (ret < 0) goto err_wq; - return; + return 0; err_wq: destroy_workqueue(qrtr_ns.workqueue); err_sock: sock_release(qrtr_ns.sock); + return ret; } EXPORT_SYMBOL_GPL(qrtr_ns_init); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index f4ab3ca6d73b..b34358282f37 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1287,13 +1287,19 @@ static int __init qrtr_proto_init(void) return rc; rc = sock_register(&qrtr_family); - if (rc) { - proto_unregister(&qrtr_proto); - return rc; - } + if (rc) + goto err_proto; - qrtr_ns_init(); + rc = qrtr_ns_init(); + if (rc) + goto err_sock; + return 0; + +err_sock: + sock_unregister(qrtr_family.family); +err_proto: + proto_unregister(&qrtr_proto); return rc; } postcore_initcall(qrtr_proto_init); diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h index dc2b67f17927..3f2d28696062 100644 --- a/net/qrtr/qrtr.h +++ b/net/qrtr/qrtr.h @@ -29,7 +29,7 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep); int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len); -void qrtr_ns_init(void); +int qrtr_ns_init(void); void qrtr_ns_remove(void); -- cgit 1.4.1 From 445c6198fe7be03b7d38e66fe8d4b3187bc251d4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Jan 2021 20:15:15 +1100 Subject: net: ethernet: fs_enet: Add missing MODULE_LICENSE Since commit 1d6cd3929360 ("modpost: turn missing MODULE_LICENSE() into error") the ppc32_allmodconfig build fails with: ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-fec.o ERROR: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/freescale/fs_enet/mii-bitbang.o Add the missing MODULE_LICENSEs to fix the build. Both files include a copyright header indicating they are GPL v2. Signed-off-by: Michael Ellerman Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c | 1 + drivers/net/ethernet/freescale/fs_enet/mii-fec.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c index c8e5d889bd81..21de56345503 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-bitbang.c @@ -223,3 +223,4 @@ static struct platform_driver fs_enet_bb_mdio_driver = { }; module_platform_driver(fs_enet_bb_mdio_driver); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index 8b51ee142fa3..152f4d83765a 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -224,3 +224,4 @@ static struct platform_driver fs_enet_fec_mdio_driver = { }; module_platform_driver(fs_enet_fec_mdio_driver); +MODULE_LICENSE("GPL"); -- cgit 1.4.1 From 3503ee6c0bec5f173d606359e6384a5ef85492fb Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Tue, 5 Jan 2021 18:17:40 +0800 Subject: selftests: fix the return value for UDP GRO test The udpgro.sh will always return 0 (unless the bpf selftest was not build first) even if there are some failed sub test-cases. Therefore the kselftest framework will report this case is OK. Check and return the exit status of each test to make it easier to spot real failures. Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index ac2a30be9b32..f8a19f548ae9 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,14 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +# set global exit status, but never reset nonzero one. +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -44,7 +52,9 @@ run_one() { # Hack: let bg programs complete the startup sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_test() { @@ -87,8 +97,10 @@ run_one_nat() { sleep 0.1 ./udpgso_bench_tx ${tx_args} + ret=$? kill -INT $pid wait $(jobs -p) + return $ret } run_one_2sock() { @@ -110,7 +122,9 @@ run_one_2sock() { sleep 0.1 # first UDP GSO socket should be closed at this point ./udpgso_bench_tx ${tx_args} + ret=$? wait $(jobs -p) + return $ret } run_nat_test() { @@ -131,36 +145,54 @@ run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + ret=0 echo "ipv4" run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + check_err $? # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) # when GRO does not take place run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + check_err $? # the GSO packets are aggregated because: # * veth schedule napi after each xmit # * segmentation happens in BH context, veth napi poll is delayed after # the transmission of the last segment run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + check_err $? run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + check_err $? echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + check_err $? run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + check_err $? run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + check_err $? run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + check_err $? run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + check_err $? run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + check_err $? run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + check_err $? run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" + check_err $? + return $ret } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -180,3 +212,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then shift run_one_2sock $@ fi + +exit $? -- cgit 1.4.1 From 67208692802ce3cacfa00fe586dc0cb1bef0a51c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Jan 2021 00:42:19 +0100 Subject: tools/resolve_btfids: Warn when having multiple IDs for single type The kernel image can contain multiple types (structs/unions) with the same name. This causes distinct type hierarchies in BTF data and makes resolve_btfids fail with error like: BTFIDS vmlinux FAILED unresolved symbol udp6_sock as reported by Qais Yousef [1]. This change adds warning when multiple types of the same name are detected: BTFIDS vmlinux WARN: multiple IDs found for 'file': 526, 113351 - using 526 WARN: multiple IDs found for 'sk_buff': 2744, 113958 - using 2744 We keep the lower ID for the given type instance and let the build continue. Also changing the 'nr' variable name to 'nr_types' to avoid confusion. [1] https://lore.kernel.org/lkml/20201229151352.6hzmjvu3qh6p2qgg@e107158-lin/ Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210105234219.970039-1-jolsa@kernel.org --- tools/bpf/resolve_btfids/main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index e3ea569ee125..7409d7860aa6 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -139,6 +139,8 @@ int eprintf(int level, int var, const char *fmt, ...) #define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) static bool is_btf_id(const char *name) { @@ -472,7 +474,7 @@ static int symbols_resolve(struct object *obj) int nr_funcs = obj->nr_funcs; int err, type_id; struct btf *btf; - __u32 nr; + __u32 nr_types; btf = btf__parse(obj->btf ?: obj->path, NULL); err = libbpf_get_error(btf); @@ -483,12 +485,12 @@ static int symbols_resolve(struct object *obj) } err = -1; - nr = btf__get_nr_types(btf); + nr_types = btf__get_nr_types(btf); /* * Iterate all the BTF types and search for collected symbol IDs. */ - for (type_id = 1; type_id <= nr; type_id++) { + for (type_id = 1; type_id <= nr_types; type_id++) { const struct btf_type *type; struct rb_root *root; struct btf_id *id; @@ -526,8 +528,13 @@ static int symbols_resolve(struct object *obj) id = btf_id__find(root, str); if (id) { - id->id = type_id; - (*nr)--; + if (id->id) { + pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n", + str, id->id, type_id, id->id); + } else { + id->id = type_id; + (*nr)--; + } } } -- cgit 1.4.1 From 1d53864c3617f5235f891ca0fbe9347c4cd35d46 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 22 Dec 2020 15:29:04 +0800 Subject: scsi: ufs: Fix possible power drain during system suspend Currently if device needs to do flush or BKOP operations, the device VCC power is kept during runtime-suspend period. However, if system suspend is happening while device is runtime-suspended, such power may not be disabled successfully. The reasons may be, 1. If current PM level is the same as SPM level, device will keep runtime-suspended by ufshcd_system_suspend(). 2. Flush recheck work may not be scheduled successfully during system suspend period. If it can wake up the system, this is also not the intention of the recheck work. To fix this issue, simply runtime-resume the device if the flush is allowed during runtime suspend period. Flush capability will be disabled while leaving runtime suspend, and also not be allowed in system suspend period. Link: https://lore.kernel.org/r/20201222072905.32221-2-stanley.chu@mediatek.com Fixes: 51dd905bd2f6 ("scsi: ufs: Fix WriteBooster flush during runtime suspend") Reviewed-by: Chaotian Jing Reviewed-by: Can Guo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 82ad31781bc9..9db348650f17 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8938,7 +8938,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba) if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) == hba->curr_dev_pwr_mode) && (ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) == - hba->uic_link_state)) + hba->uic_link_state) && + !hba->dev_info.b_rpm_dev_flush_capable) goto out; if (pm_runtime_suspended(hba->dev)) { -- cgit 1.4.1 From 21acf4601cc63cf564c6fc1a74d81b191313c929 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 22 Dec 2020 15:29:05 +0800 Subject: scsi: ufs: Relax the condition of UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL is intended to skip enabling fWriteBoosterBufferFlushEn while WriteBooster is initializing. Therefore it is better to apply the checking during WriteBooster initialization only. Link: https://lore.kernel.org/r/20201222072905.32221-3-stanley.chu@mediatek.com Reviewed-by: Can Guo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9db348650f17..04ab6f2ccac6 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -289,7 +289,8 @@ static inline void ufshcd_wb_config(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: En WB flush during H8: failed: %d\n", __func__, ret); - ufshcd_wb_toggle_flush(hba, true); + if (!(hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL)) + ufshcd_wb_toggle_flush(hba, true); } static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba) @@ -5436,9 +5437,6 @@ static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) { - if (hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL) - return; - if (enable) ufshcd_wb_buf_flush_enable(hba); else -- cgit 1.4.1 From 4ceb06e7c336f4a8d3f3b6ac9a4fea2e9c97dc07 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Tue, 1 Dec 2020 14:03:29 +0800 Subject: drm/i915/gvt: Fix vfio_edid issue for BXT/APL BXT/APL has different isr/irr/hpd regs compared with other GEN9. If not setting these regs bits correctly according to the emulated monitor (currently a DP on PORT_B), although gvt still triggers a virtual HPD event, the guest driver won't detect a valid HPD pulse thus no full display detection will be executed to read the updated EDID. With this patch, the vfio_edid is enabled again on BXT/APL, which is previously disabled. Fixes: 642403e3599e ("drm/i915/gvt: Temporarily disable vfio_edid for BXT/APL") Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20201201060329.142375-1-colin.xu@intel.com Reviewed-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 81 +++++++++++++++++++++++++++----------- drivers/gpu/drm/i915/gvt/vgpu.c | 5 +-- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index a15f87539657..62a5b0dd2003 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -217,6 +217,15 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) DDI_BUF_CTL_ENABLE); vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE; } + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTA_HOTPLUG_ENABLE | PORTA_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTB_HOTPLUG_ENABLE | PORTB_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTC_HOTPLUG_ENABLE | PORTC_HOTPLUG_STATUS_MASK); + /* No hpd_invert set in vgpu vbt, need to clear invert mask */ + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= ~BXT_DDI_HPD_INVERT_MASK; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HOTPLUG_MASK; vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1)); vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &= @@ -273,6 +282,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } @@ -301,6 +312,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } @@ -329,6 +342,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); } @@ -661,44 +676,62 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) PORTD_HOTPLUG_STATUS_MASK; intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); } else if (IS_BROXTON(i915)) { - if (connected) { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + } else { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIB_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTA_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_A_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIC_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= - GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); - } - } else { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + SFUSE_STRAP_DDIB_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIB_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) &= - ~SFUSE_STRAP_DDIC_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTB_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { + if (connected) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= + SFUSE_STRAP_DDIC_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) &= + ~SFUSE_STRAP_DDIC_DETECTED; } + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTC_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_C_HOTPLUG); } - vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= - PORTB_HOTPLUG_STATUS_MASK; - intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); } } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e49944fde333..cbe5931906e0 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -437,10 +437,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_BROXTON(dev_priv)) ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B); - /* FixMe: Re-enable APL/BXT once vfio_edid enabled */ - else if (!IS_BROXTON(dev_priv)) + else ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); if (ret) goto out_clean_sched_policy; -- cgit 1.4.1 From 6948a96a0d69b7e8203758f44849ce4ab06ff788 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Sat, 19 Dec 2020 15:40:39 +0900 Subject: scsi: ufs: Relocate flush of exceptional event The current flush location does not guarantee disabling BKOPS for the case of requesting device power off. 1) The exceptional event handler is queued 2) ufs suspend starts with a request of device power off 3) BKOPS is disabled in ufs suspend 4) The queued work for the handler is done and BKOPS is re-enabled Relocate the flush statement to ensure BKOPS remain disabled. Link: https://lore.kernel.org/r/1608360039-16390-1-git-send-email-kwmad.kim@samsung.com Reviewed-by: Can Guo Reviewed-by: Stanley Chu Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 04ab6f2ccac6..00ee8cabf8e0 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8696,6 +8696,8 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_wb_need_flush(hba)); } + flush_work(&hba->eeh_work); + if (req_dev_pwr_mode != hba->curr_dev_pwr_mode) { if (!ufshcd_is_runtime_pm(pm_op)) /* ensure that bkops is disabled */ @@ -8708,8 +8710,6 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) } } - flush_work(&hba->eeh_work); - /* * In the case of DeepSleep, the device is expected to remain powered * with the link off, so do not check for bkops. -- cgit 1.4.1 From 35fc4cd34426c242ab015ef280853b7bff101f48 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 28 Dec 2020 04:04:36 -0800 Subject: scsi: ufs: Correct the LUN used in eh_device_reset_handler() callback Users can initiate resets to specific SCSI device/target/host through IOCTL. When this happens, the SCSI cmd passed to eh_device/target/host _reset_handler() callbacks is initialized with a request whose tag is -1. In this case it is not right for eh_device_reset_handler() callback to count on the LUN get from hba->lrb[-1]. Fix it by getting LUN from the SCSI device associated with the SCSI cmd. Link: https://lore.kernel.org/r/1609157080-26283-1-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Reviewed-by: Stanley Chu Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 00ee8cabf8e0..e31d2c5c7b23 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6659,19 +6659,16 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) { struct Scsi_Host *host; struct ufs_hba *hba; - unsigned int tag; u32 pos; int err; - u8 resp = 0xF; - struct ufshcd_lrb *lrbp; + u8 resp = 0xF, lun; unsigned long flags; host = cmd->device->host; hba = shost_priv(host); - tag = cmd->request->tag; - lrbp = &hba->lrb[tag]; - err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp); + lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); + err = ufshcd_issue_tm_cmd(hba, lun, 0, UFS_LOGICAL_RESET, &resp); if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) { if (!err) err = resp; @@ -6680,7 +6677,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) /* clear the commands that were pending for corresponding LUN */ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { - if (hba->lrb[pos].lun == lrbp->lun) { + if (hba->lrb[pos].lun == lun) { err = ufshcd_clear_cmd(hba, pos); if (err) break; -- cgit 1.4.1 From d50c7986fbf0e2167279e110a2ed5bd8e811c660 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 17 Dec 2020 02:51:44 -0800 Subject: scsi: qedi: Correct max length of CHAP secret The CHAP secret displayed garbage characters causing iSCSI login authentication failure. Correct the CHAP password max length. Link: https://lore.kernel.org/r/20201217105144.8055-1-njavali@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index f5fc7f518f8a..47ad64b06623 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2245,7 +2245,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: @@ -2253,7 +2253,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: -- cgit 1.4.1 From 39718fe7adb1a79f78be23f058299bc038cbe161 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Dec 2020 17:20:19 +0000 Subject: scsi: mpt3sas: Fix spelling mistake in Kconfig "compatiblity" -> "compatibility" There is a spelling mistake in the Kconfig help text. Fix it. Link: https://lore.kernel.org/r/20201217172019.57768-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig index 86209455172d..c299f7e078fb 100644 --- a/drivers/scsi/mpt3sas/Kconfig +++ b/drivers/scsi/mpt3sas/Kconfig @@ -79,5 +79,5 @@ config SCSI_MPT2SAS select SCSI_MPT3SAS depends on PCI && SCSI help - Dummy config option for backwards compatiblity: configure the MPT3SAS + Dummy config option for backwards compatibility: configure the MPT3SAS driver instead. -- cgit 1.4.1 From 3b01d7ea4dae907d34fa0eeb3f17bacd714c6d0c Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 14:15:03 +0800 Subject: scsi: scsi_debug: Fix memleak in scsi_debug_init() When sdeb_zbc_model does not match BLK_ZONED_NONE, BLK_ZONED_HA or BLK_ZONED_HM, we should free sdebug_q_arr to prevent memleak. Also there is no need to execute sdebug_erase_store() on failure of sdeb_zbc_model_str(). Link: https://lore.kernel.org/r/20201226061503.20050-1-dinghao.liu@zju.edu.cn Acked-by: Douglas Gilbert Signed-off-by: Dinghao Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 24c0f7ec0351..4a08c450b756 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6740,7 +6740,7 @@ static int __init scsi_debug_init(void) k = sdeb_zbc_model_str(sdeb_zbc_model_s); if (k < 0) { ret = k; - goto free_vm; + goto free_q_arr; } sdeb_zbc_model = k; switch (sdeb_zbc_model) { @@ -6753,7 +6753,8 @@ static int __init scsi_debug_init(void) break; default: pr_err("Invalid ZBC model\n"); - return -EINVAL; + ret = -EINVAL; + goto free_q_arr; } } if (sdeb_zbc_model != BLK_ZONED_NONE) { -- cgit 1.4.1 From e5cc9002caafacbaa8dab878d17a313192c3b03b Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 7 Dec 2020 17:10:21 -0500 Subject: scsi: sd: Suppress spurious errors when WRITE SAME is being disabled The block layer code will split a large zeroout request into multiple bios and if WRITE SAME is disabled because the storage device reports that it does not support it (or support the length used), we can get an error message from the block layer despite the setting of RQF_QUIET on the first request. This is because more than one request may have already been submitted. Fix this by setting RQF_QUIET when BLK_STS_TARGET is returned to fail the request early, we don't need to log a message because we did not actually submit the command to the device, and the block layer code will handle the error by submitting individual write bios. Link: https://lore.kernel.org/r/20201207221021.28243-1-emilne@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 679c2c025047..b766ad54e4a5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -984,8 +984,10 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) } } - if (sdp->no_write_same) + if (sdp->no_write_same) { + rq->rq_flags |= RQF_QUIET; return BLK_STS_TARGET; + } if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); -- cgit 1.4.1 From be2553358cd40c0db11d1aa96f819c07413b2aae Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 14 Dec 2020 10:54:24 +0100 Subject: scsi: sd: Remove obsolete variable in sd_remove() Commit 996e509bbc95 ("sd: use __register_blkdev to avoid a modprobe for an unregistered dev_t") removed blk_register_region(devt, ...) in sd_remove() and since then, devt is unused in sd_remove(). Hence, make W=1 warns: drivers/scsi/sd.c:3516:8: warning: variable 'devt' set but not used [-Wunused-but-set-variable] Simply remove this obsolete variable. [mkp: fixed commit sha] Link: https://lore.kernel.org/r/20201214095424.12479-1-lukas.bulwahn@gmail.com Reviewed-by: Christoph Hellwig Reviewed-by: Nathan Chancellor Acked-by: Martin K. Petersen Signed-off-by: Lukas Bulwahn Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b766ad54e4a5..a3d2d4bc4a3d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3512,10 +3512,8 @@ static int sd_probe(struct device *dev) static int sd_remove(struct device *dev) { struct scsi_disk *sdkp; - dev_t devt; sdkp = dev_get_drvdata(dev); - devt = disk_devt(sdkp->disk); scsi_autopm_get_device(sdkp->device); async_synchronize_full_domain(&scsi_sd_pm_domain); -- cgit 1.4.1 From 19fce0470f05031e6af36e49ce222d0f0050d432 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 1 Dec 2020 17:52:43 -0800 Subject: nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios from interrupt context Recent patches changed calling sequences. nvme_fc_abort_outstanding_ios used to be called from a timeout or work context. Now it is being called in an io completion context, which can be an interrupt handler. Unfortunately, the abort outstanding ios routine attempts to stop nvme queues and nested routines that may try to sleep, which is in conflict with the interrupt handler. Correct replacing the direct call with a work element scheduling, and the abort outstanding ios routine will be called in the work element. Fixes: 95ced8a2c72d ("nvme-fc: eliminate terminate_io use by nvme_fc_error_recovery") Signed-off-by: James Smart Reported-by: Daniel Wagner Tested-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 38373a0e86ef..5f36cfa8136c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -166,6 +166,7 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; + struct work_struct ioerr_work; struct delayed_work connect_work; struct kref ref; @@ -1888,6 +1889,15 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, } } +static void +nvme_fc_ctrl_ioerr_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ioerr_work); + + nvme_fc_error_recovery(ctrl, "transport detected io error"); +} + static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { @@ -2046,7 +2056,7 @@ done: check_error: if (terminate_assoc) - nvme_fc_error_recovery(ctrl, "transport detected io error"); + queue_work(nvme_reset_wq, &ctrl->ioerr_work); } static int @@ -3233,6 +3243,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + cancel_work_sync(&ctrl->ioerr_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -3449,6 +3460,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3540,6 +3552,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ioerr_work); cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); -- cgit 1.4.1 From 2b54996b7d56badc563755840838614f2fa9c4de Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 7 Dec 2020 12:29:40 -0800 Subject: nvme-fcloop: Fix sscanf type and list_first_entry_or_null warnings Kernel robot had the following warnings: >> fcloop.c:1506:6: warning: %x in format string (no. 1) requires >> 'unsigned int *' but the argument type is 'signed int *'. >> [invalidScanfArgType_int] >> if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) >> ^ Resolve by changing opcode from and int to an unsigned int and >> fcloop.c:1632:32: warning: Uninitialized variable: lport [uninitvar] >> ret = __wait_localport_unreg(lport); >> ^ >> fcloop.c:1615:28: warning: Uninitialized variable: nport [uninitvar] >> ret = __remoteport_unreg(nport, rport); >> ^ These aren't actual issues as the values are assigned prior to use. It appears the tool doesn't understand list_first_entry_or_null(). Regardless, quiet the tool by initializing the pointers to NULL at declaration. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 733d9363900e..68213f0a052b 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1501,7 +1501,8 @@ static ssize_t fcloop_set_cmd_drop(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int opcode, starting, amount; + unsigned int opcode; + int starting, amount; if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) return -EBADRQC; @@ -1588,8 +1589,8 @@ out_destroy_class: static void __exit fcloop_exit(void) { - struct fcloop_lport *lport; - struct fcloop_nport *nport; + struct fcloop_lport *lport = NULL; + struct fcloop_nport *nport = NULL; struct fcloop_tport *tport; struct fcloop_rport *rport; unsigned long flags; -- cgit 1.4.1 From 7ee5c78ca3895d44e918c38332921983ed678be0 Mon Sep 17 00:00:00 2001 From: Gopal Tiwari Date: Fri, 4 Dec 2020 21:46:57 +0530 Subject: nvme-pci: mark Samsung PM1725a as IGNORE_DEV_SUBNQN A system with more than one of these SSDs will only have one usable. Hence the kernel fails to detect nvme devices due to duplicate cntlids. [ 6.274554] nvme nvme1: Duplicate cntlid 33 with nvme0, rejecting [ 6.274566] nvme nvme1: Removing after probe failure status: -22 Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue. Signed-off-by: Gopal Tiwari Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b4385cb0ff60..553871e6962b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3196,7 +3196,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ -- cgit 1.4.1 From 5c11f7d9f843bdd24cd29b95401938bc3f168070 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 21 Dec 2020 00:03:39 -0800 Subject: nvme-tcp: Fix possible race of io_work and direct send We may send a request (with or without its data) from two paths: 1. From our I/O context nvme_tcp_io_work which is triggered from: - queue_rq - r2t reception - socket data_ready and write_space callbacks 2. Directly from queue_rq if the send_list is empty (because we want to save the context switch associated with scheduling our io_work). However, given that now we have the send_mutex, we may run into a race condition where none of these contexts will send the pending payload to the controller. Both io_work send path and queue_rq send path opportunistically attempt to acquire the send_mutex however queue_rq only attempts to send a single request, and if io_work context fails to acquire the send_mutex it will complete without rescheduling itself. The race can trigger with the following sequence: 1. queue_rq sends request (no incapsule data) and blocks 2. RX path receives r2t - prepares data PDU to send, adds h2cdata PDU to the send_list and schedules io_work 3. io_work triggers and cannot acquire the send_mutex - because of (1), ends without self rescheduling 4. queue_rq completes the send, and completes ==> no context will send the h2cdata - timeout. Fix this by having queue_rq sending as much as it can from the send_list such that if it still has any left, its because the socket buffer is full and the socket write_space callback will trigger, thus guaranteeing that a context will be scheduled to send the h2cdata PDU. Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Reported-by: Potnuri Bharat Teja Reported-by: Samuel Jones Signed-off-by: Sagi Grimberg Tested-by: Potnuri Bharat Teja Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1ba659927442..979ee31b8dd1 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -262,6 +262,16 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } +static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) +{ + int ret; + + /* drain the send queue as much as we can... */ + do { + ret = nvme_tcp_try_send(queue); + } while (ret > 0); +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -279,7 +289,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, if (queue->io_cpu == smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; - nvme_tcp_try_send(queue); + nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); } else if (last) { -- cgit 1.4.1 From 62df80165d7f197c9c0652e7416164f294a96661 Mon Sep 17 00:00:00 2001 From: Lalithambika Krishnakumar Date: Wed, 23 Dec 2020 14:09:00 -0800 Subject: nvme: avoid possible double fetch in handling CQE While handling the completion queue, keep a local copy of the command id from the DMA-accessible completion entry. This silences a time-of-check to time-of-use (TOCTOU) warning from KF/x[1], with respect to a Thunderclap[2] vulnerability analysis. The double-read impact appears benign. There may be a theoretical window for @command_id to be used as an adversary-controlled array-index-value for mounting a speculative execution attack, but that mitigation is saved for a potential follow-on. A man-in-the-middle attack on the data payload is out of scope for this analysis and is hopefully mitigated by filesystem integrity mechanisms. [1] https://github.com/intel/kernel-fuzzer-for-xen-project [2] http://thunderclap.io/thunderclap-paper-ndss2019.pdf Signed-off-by: Lalithambika Krishna Kumar Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 553871e6962b..50d9a20568a2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -967,6 +967,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { struct nvme_completion *cqe = &nvmeq->cqes[idx]; + __u16 command_id = READ_ONCE(cqe->command_id); struct request *req; /* @@ -975,17 +976,17 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) { + if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; } - req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id); if (unlikely(!req)) { dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", - cqe->command_id, le16_to_cpu(cqe->sq_id)); + command_id, le16_to_cpu(cqe->sq_id)); return; } -- cgit 1.4.1 From 9b66fc02bec0ca613bc6d4c1d0049f727a95567d Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 30 Dec 2020 20:22:44 +0900 Subject: nvme: unexport functions with no external caller There are no callers for nvme_reset_ctrl_sync() and nvme_alloc_request_qid() so that we keep the symbols exported. Unexport those functions, mark them static and update the header file respectively. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ++---- drivers/nvme/host/nvme.h | 3 --- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ce1b61519441..70a63d7c1d02 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -179,7 +179,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_reset_ctrl); -int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) +static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) { int ret; @@ -192,7 +192,6 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) return ret; } -EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) { @@ -578,7 +577,7 @@ struct request *nvme_alloc_request(struct request_queue *q, } EXPORT_SYMBOL_GPL(nvme_alloc_request); -struct request *nvme_alloc_request_qid(struct request_queue *q, +static struct request *nvme_alloc_request_qid(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) { struct request *req; @@ -589,7 +588,6 @@ struct request *nvme_alloc_request_qid(struct request_queue *q, nvme_init_request(req, cmd); return req; } -EXPORT_SYMBOL_GPL(nvme_alloc_request_qid); static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7e49f61f81df..9c4fbfe44c00 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -610,8 +610,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags); -struct request *nvme_alloc_request_qid(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); @@ -630,7 +628,6 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); -int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); -- cgit 1.4.1 From 9ceb7863537748c67fa43ac4f2f565819bbd36e4 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 5 Jan 2021 10:46:54 +0200 Subject: nvmet-rdma: Fix list_del corruption on queue establishment failure When a queue is in NVMET_RDMA_Q_CONNECTING state, it may has some requests at rsp_wait_list. In case a disconnect occurs at this state, no one will empty this list and will return the requests to free_rsps list. Normally nvmet_rdma_queue_established() free those requests after moving the queue to NVMET_RDMA_Q_LIVE state, but in this case __nvmet_rdma_queue_disconnect() is called before. The crash happens at nvmet_rdma_free_rsps() when calling list_del(&rsp->free_list), because the request exists only at the wait list. To fix the issue, simply clear rsp_wait_list when destroying the queue. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5c1e7cb7fe0d..bdfc22eb2a10 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1641,6 +1641,16 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) spin_lock_irqsave(&queue->state_lock, flags); switch (queue->state) { case NVMET_RDMA_Q_CONNECTING: + while (!list_empty(&queue->rsp_wait_list)) { + struct nvmet_rdma_rsp *rsp; + + rsp = list_first_entry(&queue->rsp_wait_list, + struct nvmet_rdma_rsp, + wait_list); + list_del(&rsp->wait_list); + nvmet_rdma_put_rsp(rsp); + } + fallthrough; case NVMET_RDMA_Q_LIVE: queue->state = NVMET_RDMA_Q_DISCONNECTING; disconnect = true; -- cgit 1.4.1 From 2b59787a223b79228fed9ade1bf6936194ddb8cd Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 5 Jan 2021 10:34:02 +0000 Subject: nvme: remove the unused status argument from nvme_trace_bio_complete The only used argument in this function is the "req". Signed-off-by: Max Gurtovoy Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/nvme.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 70a63d7c1d02..f320273fc672 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -330,7 +330,7 @@ static inline void nvme_end_req(struct request *req) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); - nvme_trace_bio_complete(req, status); + nvme_trace_bio_complete(req); blk_mq_end_request(req, status); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9c4fbfe44c00..88a6b97247f5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -672,8 +672,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) kblockd_schedule_work(&head->requeue_work); } -static inline void nvme_trace_bio_complete(struct request *req, - blk_status_t status) +static inline void nvme_trace_bio_complete(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -728,8 +727,7 @@ static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) { } -static inline void nvme_trace_bio_complete(struct request *req, - blk_status_t status) +static inline void nvme_trace_bio_complete(struct request *req) { } static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, -- cgit 1.4.1 From 3ce47d95b7346dcafd9bed3556a8d072cb2b8571 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 4 Jan 2021 13:59:53 -0700 Subject: powerpc: Handle .text.{hot,unlikely}.* in linker script Commit eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input sections") added ".text.unlikely.*" and ".text.hot.*" due to an LLVM change [1]. After another LLVM change [2], these sections are seen in some PowerPC builds, where there is a orphan section warning then build failure: $ make -skj"$(nproc)" \ ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- LLVM=1 O=out \ distclean powernv_defconfig zImage.epapr ld.lld: warning: kernel/built-in.a(panic.o):(.text.unlikely.) is being placed in '.text.unlikely.' ... ld.lld: warning: address (0xc000000000009314) of section .text is not a multiple of alignment (256) ... ERROR: start_text address is c000000000009400, should be c000000000008000 ERROR: try to enable LD_HEAD_STUB_CATCH config option ERROR: see comments in arch/powerpc/tools/head_check.sh ... Explicitly handle these sections like in the main linker script so there is no more build failure. [1]: https://reviews.llvm.org/D79600 [2]: https://reviews.llvm.org/D92493 Fixes: 83a092cf95f2 ("powerpc: Link warning for orphan sections") Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1218 Link: https://lore.kernel.org/r/20210104205952.1399409-1-natechancellor@gmail.com --- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0318ba436f34..8e0b1298bf19 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS ALIGN_FUNCTION(); #endif /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); + *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif -- cgit 1.4.1 From ad0a6bad44758afa3b440c254a24999a0c7e35d5 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 5 Jan 2021 17:50:43 +0000 Subject: x86/hyperv: check cpu mask after interrupt has been disabled We've observed crashes due to an empty cpu mask in hyperv_flush_tlb_others. Obviously the cpu mask in question is changed between the cpumask_empty call at the beginning of the function and when it is actually used later. One theory is that an interrupt comes in between and a code path ends up changing the mask. Move the check after interrupt has been disabled to see if it fixes the issue. Signed-off-by: Wei Liu Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210105175043.28325-1-wei.liu@kernel.org Reviewed-by: Michael Kelley --- arch/x86/hyperv/mmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 5208ba49c89a..2c87350c1fb0 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -66,11 +66,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, if (!hv_hypercall_pg) goto do_native; - if (cpumask_empty(cpus)) - return; - local_irq_save(flags); + /* + * Only check the mask _after_ interrupt has been disabled to avoid the + * mask changing under our feet. + */ + if (cpumask_empty(cpus)) { + local_irq_restore(flags); + return; + } + flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); -- cgit 1.4.1 From cb7f4a8b1fb426a175d1708f05581939c61329d4 Mon Sep 17 00:00:00 2001 From: Ying-Tsun Huang Date: Tue, 15 Dec 2020 15:07:20 +0800 Subject: x86/mtrr: Correct the range check before performing MTRR type lookups In mtrr_type_lookup(), if the input memory address region is not in the MTRR, over 4GB, and not over the top of memory, a write-back attribute is returned. These condition checks are for ensuring the input memory address region is actually mapped to the physical memory. However, if the end address is just aligned with the top of memory, the condition check treats the address is over the top of memory, and write-back attribute is not returned. And this hits in a real use case with NVDIMM: the nd_pmem module tries to map NVDIMMs as cacheable memories when NVDIMMs are connected. If a NVDIMM is the last of the DIMMs, the performance of this NVDIMM becomes very low since it is aligned with the top of memory and its memory type is uncached-minus. Move the input end address change to inclusive up into mtrr_type_lookup(), before checking for the top of memory in either mtrr_type_lookup_{variable,fixed}() helpers. [ bp: Massage commit message. ] Fixes: 0cc705f56e40 ("x86/mm/mtrr: Clean up mtrr_type_lookup()") Signed-off-by: Ying-Tsun Huang Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201215070721.4349-1-ying-tsun.huang@amd.com --- arch/x86/kernel/cpu/mtrr/generic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 23ad8e953dfb..a29997e6cf9e 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, *repeat = 0; *uniform = 1; - /* Make end inclusive instead of exclusive */ - end--; - prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) int repeat; u64 partial_end; + /* Make end inclusive instead of exclusive */ + end--; + if (!mtrr_state_set) return MTRR_TYPE_INVALID; -- cgit 1.4.1 From 3e2224c5867fead6c0b94b84727cc676ac6353a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 6 Jan 2021 16:09:26 +0000 Subject: io_uring: Fix return value from alloc_fixed_file_ref_node alloc_fixed_file_ref_node() currently returns an ERR_PTR on failure. io_sqe_files_unregister() expects it to return NULL and since it can only return -ENOMEM, it makes more sense to change alloc_fixed_file_ref_node() to behave that way. Fixes: 1ffc54220c44 ("io_uring: fix io_sqe_files_unregister() hangs") Reported-by: Dan Carpenter Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index dc92ca5090a3..27a8c226abf8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7696,12 +7696,12 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); if (!ref_node) - return ERR_PTR(-ENOMEM); + return NULL; if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero, 0, GFP_KERNEL)) { kfree(ref_node); - return ERR_PTR(-ENOMEM); + return NULL; } INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->file_list); @@ -7795,9 +7795,9 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, } ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) { + if (!ref_node) { io_sqe_files_unregister(ctx); - return PTR_ERR(ref_node); + return -ENOMEM; } io_sqe_files_set_node(file_data, ref_node); @@ -7897,8 +7897,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, return -EINVAL; ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) - return PTR_ERR(ref_node); + if (!ref_node) + return -ENOMEM; done = 0; fds = u64_to_user_ptr(up->fds); -- cgit 1.4.1 From 00b8c557d096f0930d5c07df618223d3d06902d6 Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Wed, 6 Jan 2021 15:52:01 +0000 Subject: staging: ION: remove some references to CONFIG_ION With commit e722a295cf49 ("staging: ion: remove from the tree"), ION and its corresponding config CONFIG_ION is gone. Remove stale references from drivers/staging/media/atomisp/pci and from the recommended Android kernel config. Fixes: e722a295cf49 ("staging: ion: remove from the tree") Cc: Hridya Valsaraju Cc: Rob Herring Cc: linux-media@vger.kernel.org Cc: devel@driverdev.osuosl.org Signed-off-by: Matthias Maennich Link: https://lore.kernel.org/r/20210106155201.2845319-1-maennich@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/atomisp/pci/atomisp_subdev.c | 20 -------------------- kernel/configs/android-recommended.config | 1 - 2 files changed, 21 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c index 52b9fb18c87f..b666cb23e5ca 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c @@ -1062,26 +1062,6 @@ static const struct v4l2_ctrl_config ctrl_select_isp_version = { .def = 0, }; -#if 0 /* #ifdef CONFIG_ION */ -/* - * Control for ISP ion device fd - * - * userspace will open ion device and pass the fd to kernel. - * this fd will be used to map shared fd to buffer. - */ -/* V4L2_CID_ATOMISP_ION_DEVICE_FD is not defined */ -static const struct v4l2_ctrl_config ctrl_ion_dev_fd = { - .ops = &ctrl_ops, - .id = V4L2_CID_ATOMISP_ION_DEVICE_FD, - .type = V4L2_CTRL_TYPE_INTEGER, - .name = "Ion Device Fd", - .min = -1, - .max = 1024, - .step = 1, - .def = ION_FD_UNSET -}; -#endif - static void atomisp_init_subdev_pipe(struct atomisp_sub_device *asd, struct atomisp_video_pipe *pipe, enum v4l2_buf_type buf_type) { diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 53d688bdd894..eb0029c9a6a6 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -81,7 +81,6 @@ CONFIG_INPUT_JOYSTICK=y CONFIG_INPUT_MISC=y CONFIG_INPUT_TABLET=y CONFIG_INPUT_UINPUT=y -CONFIG_ION=y CONFIG_JOYSTICK_XPAD=y CONFIG_JOYSTICK_XPAD_FF=y CONFIG_JOYSTICK_XPAD_LEDS=y -- cgit 1.4.1 From 3d1a90ab0ed93362ec8ac85cf291243c87260c21 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 11 Dec 2020 05:12:51 -0500 Subject: NFS4: Fix use-after-free in trace_event_raw_event_nfs4_set_lock It is only safe to call the tracepoint before rpc_put_task() because 'data' is freed inside nfs4_lock_release (rpc_release). Fixes: 48c9579a1afe ("Adding stateid information to tracepoints") Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0ce04e0e5d82..14acd2f79107 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7111,9 +7111,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.new_lock_owner, ret); } else data->cancelled = true; + trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); dprintk("%s: done, ret = %d!\n", __func__, ret); - trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); return ret; } -- cgit 1.4.1 From 0e61f09af48beb41be0954e7be7d3ba2d18c9946 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Mon, 14 Dec 2020 17:05:55 +0800 Subject: drm/amd/pm: correct the sensor value of power for vangogh This patch is to correct the sensor value of power for vangogh. Signed-off-by: Xiaojian Du Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 8cb4fcee9a2c..5c1482d4ca43 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -252,7 +252,8 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, *value = metrics->UvdActivity; break; case METRICS_AVERAGE_SOCKETPOWER: - *value = metrics->CurrentSocketPower; + *value = (metrics->CurrentSocketPower << 8) / + 1000 ; break; case METRICS_TEMPERATURE_EDGE: *value = metrics->GfxTemperature / 100 * -- cgit 1.4.1 From 37030aba0f362cf8b16eb2347c7430b2e9ef719e Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Fri, 18 Dec 2020 14:32:02 +0800 Subject: drm/amd/pm: improve the fine grain tuning function for RV/RV2/PCO This patch is to improve the fine grain tuning function for RV/RV2/PCO. This patch adds two new commands: "restore" and "commit". This function uses the pp_od_clk_voltage sysfs file to configure the min and max value of gfx clock frequency manually or restore the default value. Command guide: echo "s level value" > pp_od_clk_voltage "s" - set the sclk frequency "level" - 0 or 1, "0" represents the min value, "1" represents the max value "value" - the target value of sclk frequency, it should be limited in the safe range echo "r" > pp_od_clk_voltage "r" - reset the sclk frequency, restore the default value instantly echo "c" > pp_od_clk_voltage "c" - commit the min and max value of sclk frequency to the system only after the commit command, the target values set by "s" command will take effect. Example: 1)change power profile from "auto" to "manual" $ cat power_dpm_force_performance_level auto $ echo "manual" > power_dpm_force_performance_level $ cat power_dpm_force_performance_level manual 2)check the default sclk frequency $ cat pp_od_clk_voltage OD_SCLK: 0: 200Mhz 1: 1400Mhz OD_RANGE: SCLK: 200MHz 1400MHz 3)use "s" -- set command to configure the min and max sclk frequency $ echo "s 0 600" > pp_od_clk_voltage $ echo "s 1 1000" > pp_od_clk_voltage $ echo "c" > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 600Mhz 1: 1000Mhz OD_RANGE: SCLK: 200MHz 1400MHz 4)use "r" -- reset command to restore the min or max sclk frequency $ echo "r" > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 200Mhz 1: 1400Mhz OD_RANGE: SCLK: 200MHz 1400MHz Signed-off-by: Xiaojian Du Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 114 ++++++++++++++++++--- .../gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h | 1 + 2 files changed, 98 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index e57e64bbacdc..0cf899566e31 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -251,7 +251,7 @@ static int smu10_set_hard_min_gfxclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cl smu10_data->gfx_actual_soft_min_freq = clock; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - smu10_data->gfx_actual_soft_min_freq, + clock, NULL); } return 0; @@ -948,6 +948,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu10_voltage_dependency_table *mclk_table = data->clock_vol_info.vdd_dep_on_fclk; uint32_t i, now, size = 0; + uint32_t min_freq, max_freq = 0; + uint32_t ret = 0; switch (type) { case PP_SCLK: @@ -983,18 +985,28 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; + size = sprintf(buf, "%s:\n", "OD_SCLK"); size += sprintf(buf + size, "0: %10uMhz\n", - (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : data->gfx_min_freq_limit/100); - size += sprintf(buf + size, "1: %10uMhz\n", data->gfx_max_freq_limit/100); + (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq); + size += sprintf(buf + size, "1: %10uMhz\n", + (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq); } break; case OD_RANGE: if (hwmgr->od_enabled) { - uint32_t min_freq, max_freq = 0; - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; size = sprintf(buf, "%s:\n", "OD_RANGE"); size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", @@ -1414,23 +1426,91 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type, long *input, uint32_t size) { + uint32_t min_freq, max_freq = 0; + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + int ret = 0; + if (!hwmgr->od_enabled) { pr_err("Fine grain not support\n"); return -EINVAL; } - if (size != 2) { - pr_err("Input parameter number not correct\n"); - return -EINVAL; - } - if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { - if (input[0] == 0) - smu10_set_hard_min_gfxclk_by_freq(hwmgr, input[1]); - else if (input[0] == 1) - smu10_set_soft_max_gfxclk_by_freq(hwmgr, input[1]); - else + if (size != 2) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (input[0] == 0) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (input[1] < min_freq) { + pr_err("Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n", + input[1], min_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_min_freq = input[1]; + } else if (input[0] == 1) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (input[1] > max_freq) { + pr_err("Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n", + input[1], max_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_max_freq = input[1]; + } else { + return -EINVAL; + } + } else if (type == PP_OD_RESTORE_DEFAULT_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + + smu10_data->gfx_actual_soft_min_freq = min_freq; + smu10_data->gfx_actual_soft_max_freq = max_freq; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + max_freq, + NULL); + if (ret) + return ret; + } else if (type == PP_OD_COMMIT_DPM_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (smu10_data->gfx_actual_soft_min_freq > smu10_data->gfx_actual_soft_max_freq) { + pr_err("The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + smu10_data->gfx_actual_soft_min_freq, smu10_data->gfx_actual_soft_max_freq); return -EINVAL; + } + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + smu10_data->gfx_actual_soft_min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + smu10_data->gfx_actual_soft_max_freq, + NULL); + if (ret) + return ret; + } else { + return -EINVAL; } return 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 6c9b5f060902..28d86d354d50 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -283,6 +283,7 @@ struct smu10_hwmgr { uint32_t vclk_soft_min; uint32_t dclk_soft_min; uint32_t gfx_actual_soft_min_freq; + uint32_t gfx_actual_soft_max_freq; uint32_t gfx_min_freq_limit; uint32_t gfx_max_freq_limit; /* in 10Khz*/ -- cgit 1.4.1 From fc996f952df1c63b57e3a08ac612db53bf8abadc Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 25 Dec 2020 12:22:51 +0800 Subject: drm/amd/pm: updated PM to I2C controller port on sienna cichlid sienna cichlid interfaces with RAS eeprom on I2C controller port 1 Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 9608745d732f..12b36eb0ff6a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2372,7 +2372,7 @@ static void sienna_cichlid_fill_i2c_req(SwI2cRequest_t *req, bool write, { int i; - req->I2CcontrollerPort = 0; + req->I2CcontrollerPort = 1; req->I2CSpeed = 2; req->SlaveAddress = address; req->NumCmds = numbytes; -- cgit 1.4.1 From a7b5d9dd57298333e6e9f4c167f01385d922bbfb Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 29 Dec 2020 14:10:28 +0800 Subject: drm/amd/display: fix sysfs amdgpu_current_backlight_pwm NULL pointer issue fix NULL pointer issue when read sysfs amdgpu_current_backlight_pwm sysfs node. Call Trace: [ 248.273833] BUG: kernel NULL pointer dereference, address: 0000000000000130 [ 248.273930] #PF: supervisor read access in kernel mode [ 248.273993] #PF: error_code(0x0000) - not-present page [ 248.274054] PGD 0 P4D 0 [ 248.274092] Oops: 0000 [#1] SMP PTI [ 248.274138] CPU: 2 PID: 1377 Comm: cat Tainted: G OE 5.9.0-rc5-drm-next-5.9+ #1 [ 248.274233] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 3802 03/15/2018 [ 248.274641] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu] [ 248.274718] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01 [ 248.274919] RSP: 0018:ffffb5ad809b3df0 EFLAGS: 00010203 [ 248.274982] RAX: ffffa0f77d1c0010 RBX: ffffa0f793ae9168 RCX: 0000000000000001 [ 248.275064] RDX: ffffa0f79753db00 RSI: 0000000000000001 RDI: 0000000000000000 [ 248.275145] RBP: ffffb5ad809b3e00 R08: ffffb5ad809b3da0 R09: 0000000000000000 [ 248.275225] R10: ffffb5ad809b3e68 R11: 0000000000000000 R12: ffffa0f793ae9190 [ 248.275306] R13: ffffb5ad809b3ef0 R14: 0000000000000001 R15: ffffa0f793ae9168 [ 248.275388] FS: 00007f5f1ec4d540(0000) GS:ffffa0f79ec80000(0000) knlGS:0000000000000000 [ 248.275480] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 248.275547] CR2: 0000000000000130 CR3: 000000042a03c005 CR4: 00000000003706e0 [ 248.275628] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 248.275708] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 248.275789] Call Trace: [ 248.276124] ? current_backlight_read+0x24/0x40 [amdgpu] [ 248.276194] seq_read+0xc3/0x3f0 [ 248.276240] full_proxy_read+0x5c/0x90 [ 248.276290] vfs_read+0xa7/0x190 [ 248.276334] ksys_read+0xa7/0xe0 [ 248.276379] __x64_sys_read+0x1a/0x20 [ 248.276429] do_syscall_64+0x37/0x80 [ 248.276477] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 248.276538] RIP: 0033:0x7f5f1e75c191 [ 248.276585] Code: fe ff ff 48 8d 3d b7 9d 0a 00 48 83 ec 08 e8 46 4d 02 00 66 0f 1f 44 00 00 48 8d 05 71 07 2e 00 8b 00 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53Hw [ 248.276784] RSP: 002b:00007ffcb1fc3f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 248.276872] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f5f1e75c191 [ 248.276953] RDX: 0000000000020000 RSI: 00007f5f1ec2b000 RDI: 0000000000000003 [ 248.277034] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 [ 248.277115] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f5f1ec2b000 [ 248.277195] R13: 0000000000000003 R14: 00007f5f1ec2b00f R15: 0000000000020000 [ 248.277279] Modules linked in: amdgpu(OE) iommu_v2 gpu_sched ttm(OE) drm_kms_helper cec drm i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache nls_iso8859_1 snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic ledtrig_audio intel_rapl_msr intel_rapl_common snd_hda_intel snd_intel_dspcfg x86_pkg_temp_thermal intel_powerclamp snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event mei_hdcp coretemp snd_rawmidi snd_seq kvm_intel kvm snd_seq_device snd_timer irqbypass joydev snd input_leds soundcore crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper rapl intel_cstate mac_hid mei_me serio_raw mei eeepc_wmi wmi_bmof asus_wmi mxm_wmi intel_wmi_thunderbolt acpi_pad sparse_keymap efi_pstore sch_fq_codel parport_pc ppdev lp parport sunrpc ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid e1000e psmouse ahci libahci wmi video [ 248.278211] CR2: 0000000000000130 [ 248.278221] ---[ end trace 1fbe72fe6f91091d ]--- [ 248.357226] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu] [ 248.357272] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01 Signed-off-by: Kevin Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9e1071b2181f..f4a2088ab179 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2487,9 +2487,14 @@ enum dc_status dc_link_validate_mode_timing( static struct abm *get_abm_from_stream_res(const struct dc_link *link) { int i; - struct dc *dc = link->ctx->dc; + struct dc *dc = NULL; struct abm *abm = NULL; + if (!link || !link->ctx) + return NULL; + + dc = link->ctx->dc; + for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i]; struct dc_stream_state *stream = pipe_ctx.stream; -- cgit 1.4.1 From 8ae291cc95e49011b736b641b0cfad502b7a1526 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Jan 2021 13:13:27 +0200 Subject: RDMA/ucma: Do not miss ctx destruction steps in some cases The destruction flow is very complicated here because the cm_id can be destroyed from the event handler at any time if the device is hot-removed. This leaves behind a partial ctx with no cm_id in the xarray, and will let user space leak memory. Make everything consistent in this flow in all places: - Return the xarray back to XA_ZERO_ENTRY before beginning any destruction. The thread that reaches this first is responsible to kfree, everyone else does nothing. - Test the xarray during the special hot-removal case to block the queue_work, this has much simpler locking and doesn't require a 'destroying' - Fix the ref initialization so that it is only positive if cm_id != NULL, then rely on that to guide the destruction process in all cases. Now the new ucma_destroy_private_ctx() can be called in all places that want to free the ctx, including all the error unwinds, and none of the details are missed. Fixes: a1d33b70dbbc ("RDMA/ucma: Rework how new connections are passed through event delivery") Link: https://lore.kernel.org/r/20210105111327.230270-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 135 ++++++++++++++++++++++------------------- 1 file changed, 72 insertions(+), 63 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 7dab9a27a145..da2512c30ffd 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,8 +95,6 @@ struct ucma_context { u64 uid; struct list_head list; - /* sync between removal event and id destroy, protected by file mut */ - int destroying; struct work_struct close_work; }; @@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table); static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; -static int __destroy_id(struct ucma_context *ctx); +static int ucma_destroy_private_ctx(struct ucma_context *ctx); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) @@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work) /* once all inflight tasks are finished, we close all underlying * resources. The context is still alive till its explicit destryoing - * by its creator. + * by its creator. This puts back the xarray's reference. */ ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); - /* - * At this point ctx->ref is zero so the only place the ctx can be is in - * a uevent or in __destroy_id(). Since the former doesn't touch - * ctx->cm_id and the latter sync cancels this, there is no races with - * this store. - */ + /* Reading the cm_id without holding a positive ref is not allowed */ ctx->cm_id = NULL; } @@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); - refcount_set(&ctx->ref, 1); init_completion(&ctx->comp); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); @@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return ctx; } +static void ucma_set_ctx_cm_id(struct ucma_context *ctx, + struct rdma_cm_id *cm_id) +{ + refcount_set(&ctx->ref, 1); + ctx->cm_id = cm_id; +} + static void ucma_finish_ctx(struct ucma_context *ctx) { lockdep_assert_held(&ctx->file->mut); @@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, ctx = ucma_alloc_ctx(listen_ctx->file); if (!ctx) goto err_backlog; - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); uevent = ucma_create_uevent(listen_ctx, event); if (!uevent) @@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, return 0; err_alloc: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); err_backlog: atomic_inc(&listen_ctx->backlog); /* Returning error causes the new ID to be destroyed */ @@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, wake_up_interruptible(&ctx->file->poll_wait); } - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) - queue_work(system_unbound_wq, &ctx->close_work); + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { + xa_lock(&ctx_table); + if (xa_load(&ctx_table, ctx->id) == ctx) + queue_work(system_unbound_wq, &ctx->close_work); + xa_unlock(&ctx_table); + } return 0; } @@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ret = PTR_ERR(cm_id); goto err1; } - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); resp.id = ctx->id; if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + ucma_destroy_private_ctx(ctx); return -EFAULT; } @@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return 0; err1: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); return ret; } @@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) rdma_unlock_handler(mc->ctx->cm_id); } -/* - * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At - * this point, no new events will be reported from the hardware. However, we - * still need to cleanup the UCMA context for this ID. Specifically, there - * might be events that have not yet been consumed by the user space software. - * mutex. After that we release them as needed. - */ -static int ucma_free_ctx(struct ucma_context *ctx) +static int ucma_cleanup_ctx_events(struct ucma_context *ctx) { int events_reported; struct ucma_event *uevent, *tmp; LIST_HEAD(list); - ucma_cleanup_multicast(ctx); - - /* Cleanup events not yet reported to the user. */ + /* Cleanup events not yet reported to the user.*/ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) + if (uevent->ctx != ctx) + continue; + + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && + xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, + uevent->conn_req_ctx, XA_ZERO_ENTRY, + GFP_KERNEL) == uevent->conn_req_ctx) { list_move_tail(&uevent->list, &list); + continue; + } + list_del(&uevent->list); + kfree(uevent); } list_del(&ctx->list); events_reported = ctx->events_reported; mutex_unlock(&ctx->file->mut); /* - * If this was a listening ID then any connections spawned from it - * that have not been delivered to userspace are cleaned up too. - * Must be done outside any locks. + * If this was a listening ID then any connections spawned from it that + * have not been delivered to userspace are cleaned up too. Must be done + * outside any locks. */ list_for_each_entry_safe(uevent, tmp, &list, list) { - list_del(&uevent->list); - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && - uevent->conn_req_ctx != ctx) - __destroy_id(uevent->conn_req_ctx); + ucma_destroy_private_ctx(uevent->conn_req_ctx); kfree(uevent); } - - mutex_destroy(&ctx->mutex); - kfree(ctx); return events_reported; } -static int __destroy_id(struct ucma_context *ctx) +/* + * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie + * the ctx is not public to the user). This either because: + * - ucma_finish_ctx() hasn't been called + * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) + */ +static int ucma_destroy_private_ctx(struct ucma_context *ctx) { + int events_reported; + /* - * If the refcount is already 0 then ucma_close_id() has already - * destroyed the cm_id, otherwise holding the refcount keeps cm_id - * valid. Prevent queue_work() from being called. + * Destroy the underlying cm_id. New work queuing is prevented now by + * the removal from the xarray. Once the work is cancled ref will either + * be 0 because the work ran to completion and consumed the ref from the + * xarray, or it will be positive because we still have the ref from the + * xarray. This can also be 0 in cases where cm_id was never set */ - if (refcount_inc_not_zero(&ctx->ref)) { - rdma_lock_handler(ctx->cm_id); - ctx->destroying = 1; - rdma_unlock_handler(ctx->cm_id); - ucma_put_ctx(ctx); - } - cancel_work_sync(&ctx->close_work); - /* At this point it's guaranteed that there is no inflight closing task */ - if (ctx->cm_id) + if (refcount_read(&ctx->ref)) ucma_close_id(&ctx->close_work); - return ucma_free_ctx(ctx); + + events_reported = ucma_cleanup_ctx_events(ctx); + ucma_cleanup_multicast(ctx); + + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, + GFP_KERNEL) != NULL); + mutex_destroy(&ctx->mutex); + kfree(ctx); + return events_reported; } static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, @@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, xa_lock(&ctx_table); ctx = _ucma_find_context(cmd.id, file); - if (!IS_ERR(ctx)) - __xa_erase(&ctx_table, ctx->id); + if (!IS_ERR(ctx)) { + if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx) + ctx = ERR_PTR(-ENOENT); + } xa_unlock(&ctx_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); - resp.events_reported = __destroy_id(ctx); + resp.events_reported = ucma_destroy_private_ctx(ctx); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp) * prevented by this being a FD release function. The list_add_tail() in * ucma_connect_event_handler() can run concurrently, however it only * adds to the list *after* a listening ID. By only reading the first of - * the list, and relying on __destroy_id() to block + * the list, and relying on ucma_destroy_private_ctx() to block * ucma_connect_event_handler(), no additional locking is needed. */ while (!list_empty(&file->ctx_list)) { struct ucma_context *ctx = list_first_entry( &file->ctx_list, struct ucma_context, list); - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx); + ucma_destroy_private_ctx(ctx); } kfree(file); return 0; -- cgit 1.4.1 From ed1df58585632dff96cc01e14857175dfdf67376 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 31 Dec 2020 13:05:09 +0800 Subject: drm/amdgpu: switched to cached noretry setting for vangogh global noretry setting is cached to gmc.noretry Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index b72c8e4ca36b..07104a1de308 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -310,7 +310,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, -- cgit 1.4.1 From 9a029a3facc4d333100308a8e283d9210a36b94c Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 30 Dec 2020 10:27:42 +0800 Subject: drm/amdgpu: fix a memory protection fault when remove amdgpu device ASD and TA share the same firmware in SIENNA_CICHLID and only TA firmware is requested during boot, so only need release TA firmware when remove device. [ 83.877150] general protection fault, probably for non-canonical address 0x1269f97e6ed04095: 0000 [#1] SMP PTI [ 83.888076] CPU: 0 PID: 1312 Comm: modprobe Tainted: G W OE 5.9.0-rc5-deli-amd-vangogh-0.0.6.6-114-gdd99d5669a96-dirty #2 [ 83.901160] Hardware name: System manufacturer System Product Name/TUF Z370-PLUS GAMING II, BIOS 0411 09/21/2018 [ 83.912353] RIP: 0010:free_fw_priv+0xc/0x120 [ 83.917531] Code: e8 99 cd b0 ff b8 a1 ff ff ff eb 9f 4c 89 f7 e8 8a cd b0 ff b8 f4 ff ff ff eb 90 0f 1f 00 0f 1f 44 00 00 55 48 89 e5 41 54 53 <4c> 8b 67 18 48 89 fb 4c 89 e7 e8 45 94 41 00 b8 ff ff ff ff f0 0f [ 83.937576] RSP: 0018:ffffbc34c13a3ce0 EFLAGS: 00010206 [ 83.943699] RAX: ffffffffbb681850 RBX: ffffa047f117eb60 RCX: 0000000080800055 [ 83.951879] RDX: ffffbc34c1d5f000 RSI: 0000000080800055 RDI: 1269f97e6ed04095 [ 83.959955] RBP: ffffbc34c13a3cf0 R08: 0000000000000000 R09: 0000000000000001 [ 83.968107] R10: ffffbc34c13a3cc8 R11: 00000000ffffff00 R12: ffffa047d6b23378 [ 83.976166] R13: ffffa047d6b23338 R14: ffffa047d6b240c8 R15: 0000000000000000 [ 83.984295] FS: 00007f74f6712540(0000) GS:ffffa047fbe00000(0000) knlGS:0000000000000000 [ 83.993323] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 84.000056] CR2: 0000556a1cca4e18 CR3: 000000021faa8004 CR4: 00000000003706f0 [ 84.008128] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 84.016155] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 84.024174] Call Trace: [ 84.027514] release_firmware.part.11+0x4b/0x70 [ 84.033017] release_firmware+0x13/0x20 [ 84.037803] psp_sw_fini+0x77/0xb0 [amdgpu] [ 84.042857] amdgpu_device_fini+0x38c/0x5d0 [amdgpu] [ 84.048815] amdgpu_driver_unload_kms+0x43/0x70 [amdgpu] [ 84.055055] drm_dev_unregister+0x73/0xb0 [drm] [ 84.060499] drm_dev_unplug+0x28/0x30 [drm] [ 84.065598] amdgpu_dev_uninit+0x1b/0x40 [amdgpu] [ 84.071223] amdgpu_pci_remove+0x4e/0x70 [amdgpu] [ 84.076835] pci_device_remove+0x3e/0xc0 [ 84.081609] device_release_driver_internal+0xfb/0x1c0 [ 84.087558] driver_detach+0x4d/0xa0 [ 84.092041] bus_remove_driver+0x5f/0xe0 [ 84.096854] driver_unregister+0x2f/0x50 [ 84.101594] pci_unregister_driver+0x22/0xa0 [ 84.106806] amdgpu_exit+0x15/0x2b [amdgpu] Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 523d22db094b..5d6fc369e32c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -563,7 +563,7 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || !psp->asd_fw) + if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size) return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); @@ -2589,11 +2589,10 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_fw_version = le32_to_cpu(desc->fw_version); psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); psp->asd_start_addr = ucode_start_addr; - psp->asd_fw = psp->ta_fw; break; case TA_FW_TYPE_PSP_XGMI: psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); -- cgit 1.4.1 From 88e21af1b3f887d217f2fb14fc7e7d3cd87ebf57 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 30 Dec 2020 19:45:15 +0800 Subject: drm/amdgpu: fix a GPU hang issue when remove device When GFXOFF is enabled and GPU is idle, driver will fail to access some registers. Therefore change to disable power gating before all access registers with MMIO. Dmesg log is as following: amdgpu 0000:03:00.0: amdgpu: amdgpu: finishing device. amdgpu: cp queue pipe 4 queue 0 preemption failed amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2 amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706 amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2 amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706 Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cb7d73f7317..b69c34074d8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2548,11 +2548,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); - amdgpu_amdkfd_device_fini(adev); - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_device_fini(adev); + /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) -- cgit 1.4.1 From 44cb39e19a05ca711bcb6e776e0a4399223204a0 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Wed, 30 Dec 2020 18:08:23 +0800 Subject: drm/amd/pm: fix the failure when change power profile for renoir This patch is to fix the failure when change power profile to "profile_peak" for renoir. Signed-off-by: Xiaojian Du Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index dc75db8af371..f743685a20e8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -188,6 +188,7 @@ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type return -EINVAL; *freq = clk_table->SocClocks[dpm_level].Freq; break; + case SMU_UCLK: case SMU_MCLK: if (dpm_level >= NUM_FCLK_DPM_LEVELS) return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 522d55004655..06abf2a7ce9e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -225,6 +225,7 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ break; case SMU_FCLK: case SMU_MCLK: + case SMU_UCLK: ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min, NULL); if (ret) return ret; -- cgit 1.4.1 From 98b64762080b96b0f8608da5fe161f1a7ab6f5de Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Tue, 29 Dec 2020 17:19:37 +0800 Subject: drm/amd/pm: improve the fine grain tuning function for RV/RV2/PCO This patch is to improve the fine grain tuning function for RV/RV2/PCO. The fine grain tuning function uses the sysfs node -- pp_od_clk_voltage to config gfxclk. Meanwhile, another sysfs node -- power_dpm_force_perfomance_level also affects the gfx clk. It will cause confusion when these two sysfs nodes works together. So this patch adds one flag to avoid this confusion, the flag will make these two sysfs nodes work separately. The flag is set as "disabled" by default, so the fine grain tuning function will be disabled by default. Only when power_dpm_force_perfomance_level is changed to "manual" mode, the flag will be set as "enabled", and the fine grain tuning function will be enabled. In other profile modes, including "auto", "high", "low", "profile_peak", "profile_standard", "profile_min_sclk", "profile_min_mclk", the flag will be set as "disabled", and the od range of fine grain tuning function will be restored default value. Signed-off-by: Xiaojian Du Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- .../gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 58 +++++++++++++++++++++- .../gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h | 2 + 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 0cf899566e31..88322781e447 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -558,7 +558,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) /* enable the pp_od_clk_voltage sysfs file */ hwmgr->od_enabled = 1; - + /* disabled fine grain tuning function by default */ + data->fine_grain_enabled = 0; return result; } @@ -597,6 +598,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1; + uint32_t fine_grain_min_freq = 0, fine_grain_max_freq = 0; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -613,6 +615,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_max_freq_limit/100, @@ -648,6 +658,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -658,6 +676,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, min_mclk, @@ -668,6 +694,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, SMU10_UMD_PSTATE_GFXCLK, @@ -703,6 +737,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_AUTO: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -741,6 +783,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_LOW: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_min_freq_limit/100, @@ -759,6 +809,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_MANUAL: + data->fine_grain_enabled = 1; case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; @@ -1435,6 +1486,11 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, return -EINVAL; } + if (!smu10_data->fine_grain_enabled) { + pr_err("Fine grain not started\n"); + return -EINVAL; + } + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { if (size != 2) { pr_err("Input parameter number not correct\n"); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 28d86d354d50..808e0ecbe1f0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -300,6 +300,8 @@ struct smu10_hwmgr { bool need_min_deep_sleep_dcefclk; uint32_t deep_sleep_dcefclk; uint32_t num_active_display; + + bool fine_grain_enabled; }; struct pp_hwmgr; -- cgit 1.4.1 From 4f6a05501eb9c57fb4c9efed70840aee523a393b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 15:02:32 +0100 Subject: drm/amd/display: Fix unused variable warning Some of the newly added code is hidden inside of #ifdef blocks, but one variable is unused when debugfs is disabled: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8370:8: error: unused variable 'configure_crc' [-Werror,-Wunused-variable] Change the #ifdef to an if(IS_ENABLED()) check to fix the warning and avoid adding more #ifdefs. Fixes: c920888c604d ("drm/amd/display: Expose new CRC window property") Reviewed-by: Wayne Lin Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 519080e9a233..3fb6baf9b0ba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8379,8 +8379,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); } -#ifdef CONFIG_DEBUG_FS - if (new_crtc_state->active && + if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active && amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture @@ -8401,7 +8400,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_crtc_configure_crc_source( crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); } -#endif } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 0235bfb246e5..eba2f1d35d07 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source } /* amdgpu_dm_crc.c */ -#ifdef CONFIG_DEBUG_FS bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); +#ifdef CONFIG_DEBUG_FS int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, -- cgit 1.4.1 From e6d5c64efaa34aae3815a9afeb1314a976142e83 Mon Sep 17 00:00:00 2001 From: Jiawei Gu Date: Tue, 29 Dec 2020 20:35:33 +0800 Subject: drm/amdgpu: fix potential memory leak during navi12 deinitialization Navi12 HDCP & DTM deinitialization needs continue to free bo if already created though initialized flag is not set. Reviewed-by: Alex Deucher Signed-off-by: Jiawei Gu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 5d6fc369e32c..347fec669424 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1315,8 +1315,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->hdcp_context.hdcp_initialized) - return 0; + if (!psp->hdcp_context.hdcp_initialized) { + if (psp->hdcp_context.hdcp_shared_buf) + goto out; + else + return 0; + } ret = psp_hdcp_unload(psp); if (ret) @@ -1324,6 +1328,7 @@ static int psp_hdcp_terminate(struct psp_context *psp) psp->hdcp_context.hdcp_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, &psp->hdcp_context.hdcp_shared_mc_addr, @@ -1462,8 +1467,12 @@ static int psp_dtm_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->dtm_context.dtm_initialized) - return 0; + if (!psp->dtm_context.dtm_initialized) { + if (psp->dtm_context.dtm_shared_buf) + goto out; + else + return 0; + } ret = psp_dtm_unload(psp); if (ret) @@ -1471,6 +1480,7 @@ static int psp_dtm_terminate(struct psp_context *psp) psp->dtm_context.dtm_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, &psp->dtm_context.dtm_shared_mc_addr, -- cgit 1.4.1 From 8a82b347e8732fd2b68d26a6e9f0d9a1c397560d Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Tue, 5 Jan 2021 08:37:21 +0800 Subject: drm/amdgpu: fix no bad_pages issue after umc ue injection old code wrongly used the bad page status as the function return value, which cause amdgpu_ras_badpages_read always return failed. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c136bd449744..82e952696d24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1518,7 +1518,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; int i = 0; - int ret = 0; + int ret = 0, status; if (!con || !con->eh_data || !bps || !count) return -EINVAL; @@ -1543,12 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; - ret = amdgpu_vram_mgr_query_page_status( + status = amdgpu_vram_mgr_query_page_status( ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), data->bps[i].retired_page); - if (ret == -EBUSY) + if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (ret == -ENOENT) + else if (status == -ENOENT) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } -- cgit 1.4.1 From 3851c90b7aa8f0c275d14636f0e7ccca69a2bf84 Mon Sep 17 00:00:00 2001 From: John Clements Date: Tue, 5 Jan 2021 14:53:14 +0800 Subject: drm/amdgpu: enable ras eeprom support for sienna cichlid added I2C address and asic support flag Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 1dd040166c63..19d9aa76cfbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -30,6 +30,7 @@ #define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 +#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -62,7 +63,8 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { if ((adev->asic_type == CHIP_VEGA20) || - (adev->asic_type == CHIP_ARCTURUS)) + (adev->asic_type == CHIP_ARCTURUS) || + (adev->asic_type == CHIP_SIENNA_CICHLID)) return true; return false; @@ -100,6 +102,10 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, case CHIP_ARCTURUS: return __get_eeprom_i2c_addr_arct(adev, i2c_addr); + case CHIP_SIENNA_CICHLID: + *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID; + break; + default: return false; } -- cgit 1.4.1 From c241ed2f0ea549c18cff62a3708b43846b84dae3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 4 Jan 2021 11:24:20 -0500 Subject: drm/amdgpu/display: drop DCN support for aarch64 From Ard: "Simply disabling -mgeneral-regs-only left and right is risky, given that the standard AArch64 ABI permits the use of FP/SIMD registers anywhere, and GCC is known to use SIMD registers for spilling, and may invent other uses of the FP/SIMD register file that have nothing to do with the floating point code in question. Note that putting kernel_neon_begin() and kernel_neon_end() around the code that does use FP is not sufficient here, the problem is in all the other code that may be emitted with references to SIMD registers in it. So the only way to do this properly is to put all floating point code in a separate compilation unit, and only compile that unit with -mgeneral-regs-only." Disable support until the code can be properly refactored to support this properly on aarch64. Acked-by: Will Deacon Reported-by: Ard Biesheuvel Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 +- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile | 21 --------------------- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 7 ------- .../gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 7 ------- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 5 ----- drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dcn302/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dsc/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/os_types.h | 4 ---- 13 files changed, 1 insertion(+), 73 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 797b5d4b43e5..e509a175ed17 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON)) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 64f515d74410..f3c00f479e1c 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 calcs_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -calcs_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index d59b380e7b7f..ff96bee57bfc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -104,13 +104,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) @@ -125,13 +118,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) @@ -146,13 +132,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 733e6e6e43bd..62ad1a11bff9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -31,11 +31,4 @@ DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) -# fix: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# aarch64 does not support soft-float, so use hard-float and handle this in code -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn10/dcn10_resource.o := -mgeneral-regs-only -endif - AMD_DISPLAY_FILES += $(AMD_DAL_DCN10) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index bdc37831535e..36745193c391 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1534,15 +1534,8 @@ static bool dcn10_resource_construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_ARM64) - /* Aarch64 does not support -msoft-float/-mfloat-abi=soft */ - DC_FP_START(); - dcn10_resource_construct_fp(dc); - DC_FP_END(); -#else /* Other architectures we build for build this with soft-float */ dcn10_resource_construct_fp(dc); -#endif pool->base.pp_smu = dcn10_pp_smu_create(ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 624cb1341ef1..5fcaf78334ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -17,10 +17,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 1ee5fc03b7b3..bb8c95141082 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -13,10 +13,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 248c2711aace..c20331eb62e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -41,11 +41,6 @@ CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 2fd5d34e4ba6..3ca7d911d25c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -21,10 +21,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index 36e44e1b07fa..8d4924b7dc22 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -20,10 +20,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index a02a33dcd70b..6bb7f2905821 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 dml_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dml_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index f2624a1156e5..8d31eb75c6a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -10,10 +10,6 @@ ifdef CONFIG_PPC64 dsc_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dsc_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 95cb56929e79..126c2f3a4dd3 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -55,10 +55,6 @@ #include #define DC_FP_START() kernel_fpu_begin() #define DC_FP_END() kernel_fpu_end() -#elif defined(CONFIG_ARM64) -#include -#define DC_FP_START() kernel_neon_begin() -#define DC_FP_END() kernel_neon_end() #elif defined(CONFIG_PPC64) #include #include -- cgit 1.4.1 From 5efc1f4b454c6179d35e7b0c3eda0ad5763a00fc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 11:42:08 -0500 Subject: Revert "drm/amd/display: Fix memory leaks in S3 resume" This reverts commit a135a1b4c4db1f3b8cbed9676a40ede39feb3362. This leads to blank screens on some boards after replugging a display. Revert until we understand the root cause and can fix both the leak and the blank screen after replug. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211033 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1427 Cc: Stylon Wang Cc: Harry Wentland Cc: Nicholas Kazlauskas Cc: Andre Tomt Cc: Oleksandr Natalenko Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3fb6baf9b0ba..146486071d01 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2386,8 +2386,7 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); - aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid); - drm_connector_list_update(connector); + drm_add_edid_modes(connector, aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, -- cgit 1.4.1 From 67a5a68013056cbcf0a647e36cb6f4622fb6a470 Mon Sep 17 00:00:00 2001 From: Valdis Klētnieks Date: Sat, 26 Dec 2020 13:21:58 -0500 Subject: gcc-plugins: fix gcc 11 indigestion with plugins... Fedora Rawhide has started including gcc 11,and the g++ compiler throws a wobbly when it hits scripts/gcc-plugins: HOSTCXX scripts/gcc-plugins/latent_entropy_plugin.so In file included from /usr/include/c++/11/type_traits:35, from /usr/lib/gcc/x86_64-redhat-linux/11/plugin/include/system.h:244, from /usr/lib/gcc/x86_64-redhat-linux/11/plugin/include/gcc-plugin.h:28, from scripts/gcc-plugins/gcc-common.h:7, from scripts/gcc-plugins/latent_entropy_plugin.c:78: /usr/include/c++/11/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support must be enabled with the -std=c++11 or -std=gnu++11 compiler options. 32 | #error This file requires compiler and library support \ In fact, it works just fine with c++11, which has been in gcc since 4.8, and we now require 4.9 as a minimum. Signed-off-by: Valdis Kletnieks Acked-by: Josh Poimboeuf Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/82487.1609006918@turing-police --- scripts/gcc-plugins/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index d66949bfeba4..b5487cce69e8 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -22,9 +22,9 @@ always-y += $(GCC_PLUGIN) GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin) plugin_cxxflags = -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \ - -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++98 \ + -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \ -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \ - -ggdb -Wno-narrowing -Wno-unused-variable -Wno-c++11-compat \ + -ggdb -Wno-narrowing -Wno-unused-variable \ -Wno-format-diag plugin_ldflags = -shared -- cgit 1.4.1 From 6f02b540d7597f357bc6ee711346761045d4e108 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 6 Jan 2021 15:59:06 +0000 Subject: bpftool: Fix compilation failure for net.o with older glibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For older glibc ~2.17, #include'ing both linux/if.h and net/if.h fails due to complaints about redefinition of interface flags: CC net.o In file included from net.c:13:0: /usr/include/linux/if.h:71:2: error: redeclaration of enumerator ‘IFF_UP’ IFF_UP = 1<<0, /* sysfs */ ^ /usr/include/net/if.h:44:5: note: previous definition of ‘IFF_UP’ was here IFF_UP = 0x1, /* Interface is up. */ The issue was fixed in kernel headers in [1], but since compilation of net.c picks up system headers the problem can recur. Dropping #include resolves the issue and it is not needed for compilation anyhow. [1] https://lore.kernel.org/netdev/1461512707-23058-1-git-send-email-mikko.rapeli__34748.27880641$1462831734$gmane$org@iki.fi/ Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support") Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/1609948746-15369-1-git-send-email-alan.maguire@oracle.com --- tools/bpf/bpftool/net.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 3fae61ef6339..ff3aa0cf3997 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include -- cgit 1.4.1 From fcc42338375a1e67b8568dbb558f8b784d0f3b01 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 28 Dec 2020 07:14:07 +0000 Subject: dm snapshot: flush merged data before committing metadata If the origin device has a volatile write-back cache and the following events occur: 1: After finishing merge operation of one set of exceptions, merge_callback() is invoked. 2: Update the metadata in COW device tracking the merge completion. This update to COW device is flushed cleanly. 3: System crashes and the origin device's cache where the recent merge was completed has not been flushed. During the next cycle when we read the metadata from the COW device, we will skip reading those metadata whose merge was completed in step (1). This will lead to data loss/corruption. To address this, flush the origin device post merge IO before updating the metadata. Cc: stable@vger.kernel.org Signed-off-by: Akilesh Kailash Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4668b2cd98f4..11890db71f3f 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -141,6 +141,11 @@ struct dm_snapshot { * for them to be committed. */ struct bio_list bios_queued_during_merge; + + /* + * Flush data after merge. + */ + struct bio flush_bio; }; /* @@ -1121,6 +1126,17 @@ shut: static void error_bios(struct bio *bio); +static int flush_data(struct dm_snapshot *s) +{ + struct bio *flush_bio = &s->flush_bio; + + bio_reset(flush_bio); + bio_set_dev(flush_bio, s->origin->bdev); + flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + + return submit_bio_wait(flush_bio); +} + static void merge_callback(int read_err, unsigned long write_err, void *context) { struct dm_snapshot *s = context; @@ -1134,6 +1150,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) goto shut; } + if (flush_data(s) < 0) { + DMERR("Flush after merge failed: shutting down merge"); + goto shut; + } + if (s->store->type->commit_merge(s->store, s->num_merging_chunks) < 0) { DMERR("Write error in exception store: shutting down merge"); @@ -1318,6 +1339,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); + bio_init(&s->flush_bio, NULL, 0); /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -1504,6 +1526,8 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); + bio_uninit(&s->flush_bio); + dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); -- cgit 1.4.1 From 0d136f5cd9a7ba6ded7f8ff17e8b1ba680f37625 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Tue, 5 Jan 2021 18:23:33 +0100 Subject: net: mvneta: fix error message when MTU too large for XDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error message says that "Jumbo frames are not supported on XDP", but the code checks for mtu > MVNETA_MAX_RX_BUF_SIZE, not mtu > 1500. Fix this error message. Signed-off-by: Marek Behún Fixes: 0db51da7a8e9 ("net: mvneta: add basic XDP support") Cc: Lorenzo Bianconi Cc: Thomas Petazzoni Link: https://lore.kernel.org/r/20210105172333.21613-1-kabel@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 3369ec717a51..bc4d8d144401 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4432,7 +4432,7 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, struct bpf_prog *old_prog; if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { - NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP"); + NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); return -EOPNOTSUPP; } -- cgit 1.4.1 From 94bcfdbff0c210b17b27615f4952cc6ece7d5f5f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Jan 2021 11:07:25 -0800 Subject: net: bareudp: add missing error handling for bareudp_link_config() .dellink does not get called after .newlink fails, bareudp_newlink() must undo what bareudp_configure() has done if bareudp_link_config() fails. v2: call bareudp_dellink(), like bareudp_dev_create() does Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Link: https://lore.kernel.org/r/20210105190725.1736246-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 708171c0d628..85de5f96c02b 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -645,11 +645,20 @@ static int bareudp_link_config(struct net_device *dev, return 0; } +static void bareudp_dellink(struct net_device *dev, struct list_head *head) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + list_del(&bareudp->next); + unregister_netdevice_queue(dev, head); +} + static int bareudp_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bareudp_conf conf; + LIST_HEAD(list_kill); int err; err = bareudp2info(data, &conf, extack); @@ -662,17 +671,14 @@ static int bareudp_newlink(struct net *net, struct net_device *dev, err = bareudp_link_config(dev, tb); if (err) - return err; + goto err_unconfig; return 0; -} - -static void bareudp_dellink(struct net_device *dev, struct list_head *head) -{ - struct bareudp_dev *bareudp = netdev_priv(dev); - list_del(&bareudp->next); - unregister_netdevice_queue(dev, head); +err_unconfig: + bareudp_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return err; } static size_t bareudp_get_size(const struct net_device *dev) -- cgit 1.4.1 From 7f847db3040897f3ee25ce97265c545b5561f6c2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Jan 2021 18:18:15 -0800 Subject: net: dsa: fix led_classdev build errors Fix build errors when LEDS_CLASS=m and NET_DSA_HIRSCHMANN_HELLCREEK=y. This limits the latter to =m when LEDS_CLASS=m. microblaze-linux-ld: drivers/net/dsa/hirschmann/hellcreek_ptp.o: in function `hellcreek_ptp_setup': (.text+0xf80): undefined reference to `led_classdev_register_ext' microblaze-linux-ld: (.text+0xf94): undefined reference to `led_classdev_register_ext' microblaze-linux-ld: drivers/net/dsa/hirschmann/hellcreek_ptp.o: in function `hellcreek_ptp_free': (.text+0x1018): undefined reference to `led_classdev_unregister' microblaze-linux-ld: (.text+0x1024): undefined reference to `led_classdev_unregister' Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202101060655.iUvMJqS2-lkp@intel.com Cc: Kurt Kanzenbach Link: https://lore.kernel.org/r/20210106021815.31796-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/hirschmann/Kconfig b/drivers/net/dsa/hirschmann/Kconfig index 222dd35e2c9d..e01191107a4b 100644 --- a/drivers/net/dsa/hirschmann/Kconfig +++ b/drivers/net/dsa/hirschmann/Kconfig @@ -4,6 +4,7 @@ config NET_DSA_HIRSCHMANN_HELLCREEK depends on HAS_IOMEM depends on NET_DSA depends on PTP_1588_CLOCK + depends on LEDS_CLASS select NET_DSA_TAG_HELLCREEK help This driver adds support for Hirschmann Hellcreek TSN switches. -- cgit 1.4.1 From 1f685e6adbbe3c7b1bd9053be771b898d9efa655 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Jan 2021 20:25:31 -0800 Subject: ptp: ptp_ines: prevent build when HAS_IOMEM is not set ptp_ines.c uses devm_platform_ioremap_resource(), which is only built/available when CONFIG_HAS_IOMEM is enabled. CONFIG_HAS_IOMEM is not enabled for arch/s390/, so builds on S390 have a build error: s390-linux-ld: drivers/ptp/ptp_ines.o: in function `ines_ptp_ctrl_probe': ptp_ines.c:(.text+0x17e6): undefined reference to `devm_platform_ioremap_resource' Prevent builds of ptp_ines.c when HAS_IOMEM is not set. Fixes: bad1eaa6ac31 ("ptp: Add a driver for InES time stamping IP core.") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202101031125.ZEFCUiKi-lkp@intel.com Acked-by: Richard Cochran Link: https://lore.kernel.org/r/20210106042531.1351-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- drivers/ptp/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index d2bf05ccbbe2..f2edef0df40f 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -79,6 +79,7 @@ config DP83640_PHY config PTP_1588_CLOCK_INES tristate "ZHAW InES PTP time stamping IP core" depends on NETWORK_PHY_TIMESTAMPING + depends on HAS_IOMEM depends on PHYLIB depends on PTP_1588_CLOCK help -- cgit 1.4.1 From c4aec381ab98c9189d47b935832541d520f1f67f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 15 Dec 2020 11:32:37 +0100 Subject: can: m_can: m_can_class_unregister(): remove erroneous m_can_clk_stop() In m_can_class_register() the clock is started, but stopped on exit. When calling m_can_class_unregister(), the clock is stopped a second time. This patch removes the erroneous m_can_clk_stop() in m_can_class_unregister(). Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework") Cc: Dan Murphy Cc: Sriram Dash Reviewed-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20201215103238.524029-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2c9f12401276..da551fd0f502 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1852,8 +1852,6 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { unregister_candev(cdev->net); - - m_can_clk_stop(cdev); } EXPORT_SYMBOL_GPL(m_can_class_unregister); -- cgit 1.4.1 From aee2b3ccc8a63d1cd7da6a8a153d1f3712d40826 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 15 Dec 2020 11:32:38 +0100 Subject: can: tcan4x5x: fix bittiming const, use common bittiming from m_can driver According to the TCAN4550 datasheet "SLLSF91 - DECEMBER 2018" the tcan4x5x has the same bittiming constants as a m_can revision 3.2.x/3.3.0. The tcan4x5x chip I'm using identifies itself as m_can revision 3.2.1, so remove the tcan4x5x specific bittiming values and rely on the values in the m_can driver, which are selected according to core revision. Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel") Cc: Dan Murphy Reviewed-by: Sean Nyekjaer Link: https://lore.kernel.org/r/20201215103238.524029-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index 24c737c4fc44..970f0e9d19bf 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -131,30 +131,6 @@ static inline struct tcan4x5x_priv *cdev_to_priv(struct m_can_classdev *cdev) } -static struct can_bittiming_const tcan4x5x_bittiming_const = { - .name = DEVICE_NAME, - .tseg1_min = 2, - .tseg1_max = 31, - .tseg2_min = 2, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - -static struct can_bittiming_const tcan4x5x_data_bittiming_const = { - .name = DEVICE_NAME, - .tseg1_min = 1, - .tseg1_max = 32, - .tseg2_min = 1, - .tseg2_max = 16, - .sjw_max = 16, - .brp_min = 1, - .brp_max = 32, - .brp_inc = 1, -}; - static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv) { int wake_state = 0; @@ -469,8 +445,6 @@ static int tcan4x5x_can_probe(struct spi_device *spi) mcan_class->dev = &spi->dev; mcan_class->ops = &tcan4x5x_ops; mcan_class->is_peripheral = true; - mcan_class->bit_timing = &tcan4x5x_bittiming_const; - mcan_class->data_timing = &tcan4x5x_data_bittiming_const; mcan_class->net->irq = spi->irq; spi_set_drvdata(spi, priv); -- cgit 1.4.1 From a876e7e2a8e62712425be178d483ffdff09f0853 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 06:54:54 -0800 Subject: HID: uclogic: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. h should no longer be used in the format specifier for printk. Signed-off-by: Tom Rix Signed-off-by: Jiri Kosina --- drivers/hid/hid-uclogic-params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index d26d8cd98efc..56406cee401f 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -90,7 +90,7 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev, goto cleanup; } else if (rc < 0) { hid_err(hdev, - "failed retrieving string descriptor #%hhu: %d\n", + "failed retrieving string descriptor #%u: %d\n", idx, rc); goto cleanup; } -- cgit 1.4.1 From 4d2b71634b5ad142617e430bc6ef659331a576d0 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 06:59:28 -0800 Subject: HID: wiimote: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. h should no longer be used in the format specifier for printk. Signed-off-by: Tom Rix Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-wiimote-core.c b/drivers/hid/hid-wiimote-core.c index 41012681cafd..4399d6c6afef 100644 --- a/drivers/hid/hid-wiimote-core.c +++ b/drivers/hid/hid-wiimote-core.c @@ -1482,7 +1482,7 @@ static void handler_return(struct wiimote_data *wdata, const __u8 *payload) wdata->state.cmd_err = err; wiimote_cmd_complete(wdata); } else if (err) { - hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err, + hid_warn(wdata->hdev, "Remote error %u on req %u\n", err, cmd); } } -- cgit 1.4.1 From 6086f02a18aeae795a61a3fc6566920891ea3b52 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 5 Jan 2021 22:41:37 +0100 Subject: can: mcp251xfd: mcp251xfd_handle_tefif(): fix TEF vs. TX race condition The mcp251xfd driver uses a TX FIFO for sending CAN frames and a TX Event FIFO (TEF) for completed TX-requests. The TEF event handling in the mcp251xfd_handle_tefif() function has a race condition. It first increments the tx-ring's tail counter to signal that there's room in the TX and TEF FIFO, then it increments the TEF FIFO in hardware. A running mcp251xfd_start_xmit() on a different CPU might not stop the txqueue (as the tx-ring still shows free space). The next mcp251xfd_start_xmit() will push a message into the chip and the TX complete event might overflow the TEF FIFO. This patch changes the order to fix the problem. Fixes: 68c0c1c7f966 ("can: mcp251xfd: tef-path: reduce number of SPI core requests to set UINC bit") Link: https://lore.kernel.org/r/20210105214138.3150886-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 77129d5f410b..85a1a8b7c0e7 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1368,13 +1368,10 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) struct mcp251xfd_tx_ring *tx_ring = priv->tx; struct spi_transfer *last_xfer; - tx_ring->tail += len; - /* Increment the TEF FIFO tail pointer 'len' times in * a single SPI message. - */ - - /* Note: + * + * Note: * * "cs_change == 1" on the last transfer results in an * active chip select after the complete SPI @@ -1391,6 +1388,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) if (err) return err; + tx_ring->tail += len; + err = mcp251xfd_check_tef_tail(priv); if (err) return err; -- cgit 1.4.1 From 2fbb397f584077e3c90abd06829f5a1f66fdd5f4 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 5 Jan 2021 22:41:38 +0100 Subject: can: mcp251xfd: mcp251xfd_handle_rxif_ring(): first increment RX tail pointer in HW, then in driver The previous patch fixes a TEF vs. TX race condition, by first updating the TEF tail pointer in hardware, and then updating the driver internal pointer. The same pattern exists in the RX-path, too. This should be no problem, as the driver accesses the RX-FIFO from the interrupt handler only, thus the access is properly serialized. Fix the order here, too, so that the TEF- and RX-path look similar. Fixes: 1f652bb6bae7 ("can: mcp25xxfd: rx-path: reduce number of SPI core requests to set UINC bit") Link: https://lore.kernel.org/r/20210105214138.3150886-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 85a1a8b7c0e7..36235afb0bc6 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1552,10 +1552,8 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, /* Increment the RX FIFO tail pointer 'len' times in a * single SPI message. - */ - ring->tail += len; - - /* Note: + * + * Note: * * "cs_change == 1" on the last transfer results in an * active chip select after the complete SPI @@ -1571,6 +1569,8 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, last_xfer->cs_change = 1; if (err) return err; + + ring->tail += len; } return 0; -- cgit 1.4.1 From 1169ec8f5d71044082a9898bbd1f1bf4a690c5a4 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 4 Jan 2021 09:03:27 +0000 Subject: can: rcar: Kconfig: update help description for CAN_RCAR config The rcar_can driver also supports RZ/G SoC's, update the description to reflect this. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210104090327.6547-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/rcar/Kconfig b/drivers/net/can/rcar/Kconfig index 8d36101b78e3..29cabc20109e 100644 --- a/drivers/net/can/rcar/Kconfig +++ b/drivers/net/can/rcar/Kconfig @@ -1,10 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 config CAN_RCAR - tristate "Renesas R-Car CAN controller" + tristate "Renesas R-Car and RZ/G CAN controller" depends on ARCH_RENESAS || ARM help Say Y here if you want to use CAN controller found on Renesas R-Car - SoCs. + or RZ/G SoCs. To compile this driver as a module, choose M here: the module will be called rcar_can. -- cgit 1.4.1 From 6ee49118f87cf02b36f68812bc49855b7b627a2b Mon Sep 17 00:00:00 2001 From: Sriram Dash Date: Mon, 4 Jan 2021 18:01:34 +0530 Subject: MAINTAINERS: Update MCAN MMIO device driver maintainer Update Pankaj Sharma as maintainer for mcan mmio device driver as I will be moving to a different role. Signed-off-by: Sriram Dash Acked-by: Pankaj Sharma Link: https://lore.kernel.org/r/20210104123134.16930-1-sriram.dash@samsung.com Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7c1e45c416b1..b15514a770e3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10847,7 +10847,7 @@ F: drivers/media/radio/radio-maxiradio* MCAN MMIO DEVICE DRIVER M: Dan Murphy -M: Sriram Dash +M: Pankaj Sharma L: linux-can@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/can/bosch,m_can.yaml -- cgit 1.4.1 From 83b5bd628f65e6b4d1924b307d6a88a57827bdb0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 4 Jan 2021 12:36:14 +0000 Subject: arm64: Move PSTATE.TCO setting to separate functions For consistency with __uaccess_{disable,enable}_hw_pan(), move the PSTATE.TCO setting into dedicated __uaccess_{disable,enable}_tco() functions. Signed-off-by: Catalin Marinas Acked-by: Vincenzo Frascino Acked-by: Mark Rutland --- arch/arm64/include/asm/uaccess.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6f986e09a781..f0fe0cc6abe0 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -176,10 +176,21 @@ static inline void __uaccess_enable_hw_pan(void) * The Tag check override (TCO) bit disables temporarily the tag checking * preventing the issue. */ -static inline void uaccess_disable_privileged(void) +static inline void __uaccess_disable_tco(void) { asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0), ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +static inline void __uaccess_enable_tco(void) +{ + asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), + ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +static inline void uaccess_disable_privileged(void) +{ + __uaccess_disable_tco(); if (uaccess_ttbr0_disable()) return; @@ -189,8 +200,7 @@ static inline void uaccess_disable_privileged(void) static inline void uaccess_enable_privileged(void) { - asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), - ARM64_MTE, CONFIG_KASAN_HW_TAGS)); + __uaccess_enable_tco(); if (uaccess_ttbr0_enable()) return; -- cgit 1.4.1 From 05cd84691eafcd7959a1e120d5e72c0dd98c5d91 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 5 Jan 2021 20:06:39 +0530 Subject: dmabuf: fix use-after-free of dmabuf's file->f_inode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is observed 'use-after-free' on the dmabuf's file->f_inode with the race between closing the dmabuf file and reading the dmabuf's debug info. Consider the below scenario where P1 is closing the dma_buf file and P2 is reading the dma_buf's debug info in the system: P1 P2 dma_buf_debug_show() dma_buf_put() __fput() file->f_op->release() dput() .... dentry_unlink_inode() iput(dentry->d_inode) (where the inode is freed) mutex_lock(&db_list.lock) read 'dma_buf->file->f_inode' (the same inode is freed by P1) mutex_unlock(&db_list.lock) dentry->d_op->d_release()--> dma_buf_release() ..... mutex_lock(&db_list.lock) removes the dmabuf from the list mutex_unlock(&db_list.lock) In the above scenario, when dma_buf_put() is called on a dma_buf, it first frees the dma_buf's file->f_inode(=dentry->d_inode) and then removes this dma_buf from the system db_list. In between P2 traversing the db_list tries to access this dma_buf's file->f_inode that was freed by P1 which is a use-after-free case. Since, __fput() calls f_op->release first and then later calls the d_op->d_release, move the dma_buf's db_list removal from d_release() to f_op->release(). This ensures that dma_buf's file->f_inode is not accessed after it is released. Cc: # 5.4.x- Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops") Acked-by: Christian König Signed-off-by: Charan Teja Reddy Signed-off-by: Sumit Semwal Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/1609857399-31549-1-git-send-email-charante@codeaurora.org --- drivers/dma-buf/dma-buf.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index e63684d4cd90..9ad6397aaa97 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -76,10 +76,6 @@ static void dma_buf_release(struct dentry *dentry) dmabuf->ops->release(dmabuf); - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); - if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) dma_resv_fini(dmabuf->resv); @@ -88,6 +84,22 @@ static void dma_buf_release(struct dentry *dentry) kfree(dmabuf); } +static int dma_buf_file_release(struct inode *inode, struct file *file) +{ + struct dma_buf *dmabuf; + + if (!is_dma_buf_file(file)) + return -EINVAL; + + dmabuf = file->private_data; + + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + + return 0; +} + static const struct dentry_operations dma_buf_dentry_ops = { .d_dname = dmabuffs_dname, .d_release = dma_buf_release, @@ -413,6 +425,7 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file) } static const struct file_operations dma_buf_fops = { + .release = dma_buf_file_release, .mmap = dma_buf_mmap_internal, .llseek = dma_buf_llseek, .poll = dma_buf_poll, -- cgit 1.4.1 From e89eed02a5f1b864fa5abafc8e8e71bd9fd66d1f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 5 Jan 2021 20:53:42 +0100 Subject: kcov, usb: hide in_serving_softirq checks in __usb_hcd_giveback_urb Done opencode in_serving_softirq() checks in in_serving_softirq() to avoid cluttering the code, hide them in kcov helpers instead. Fixes: aee9ddb1d371 ("kcov, usb: only collect coverage from __usb_hcd_giveback_urb in softirq") Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/aeb430c5bb90b0ccdf1ec302c70831c1a47b9c45.1609876340.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 +++----- include/linux/kcov.h | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 60886a7464c3..ad5a0f405a75 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1649,14 +1649,12 @@ static void __usb_hcd_giveback_urb(struct urb *urb) urb->status = status; /* * This function can be called in task context inside another remote - * coverage collection section, but KCOV doesn't support that kind of + * coverage collection section, but kcov doesn't support that kind of * recursion yet. Only collect coverage in softirq context for now. */ - if (in_serving_softirq()) - kcov_remote_start_usb((u64)urb->dev->bus->busnum); + kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - if (in_serving_softirq()) - kcov_remote_stop(); + kcov_remote_stop_softirq(); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index a10e84707d82..4e3037dc1204 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -52,6 +52,25 @@ static inline void kcov_remote_start_usb(u64 id) kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); } +/* + * The softirq flavor of kcov_remote_*() functions is introduced as a temporary + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding suport for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 + */ + +static inline void kcov_remote_start_usb_softirq(u64 id) +{ + if (in_serving_softirq()) + kcov_remote_start_usb(id); +} + +static inline void kcov_remote_stop_softirq(void) +{ + if (in_serving_softirq()) + kcov_remote_stop(); +} + #else static inline void kcov_task_init(struct task_struct *t) {} @@ -66,6 +85,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- cgit 1.4.1 From e2459108b5a0604c4b472cae2b3cb8d3444c77fb Mon Sep 17 00:00:00 2001 From: "taehyun.cho" Date: Thu, 7 Jan 2021 00:46:25 +0900 Subject: usb: gadget: enable super speed plus Enable Super speed plus in configfs to support USB3.1 Gen2. This ensures that when a USB gadget is plugged in, it is enumerated as Gen 2 and connected at 10 Gbps if the host and cable are capable of it. Many in-tree gadget functions (fs, midi, acm, ncm, mass_storage, etc.) already have SuperSpeed Plus support. Tested: plugged gadget into Linux host and saw: [284907.385986] usb 8-2: new SuperSpeedPlus Gen 2 USB device number 3 using xhci_hcd Tested-by: Lorenzo Colitti Acked-by: Felipe Balbi Signed-off-by: taehyun.cho Signed-off-by: Lorenzo Colitti Link: https://lore.kernel.org/r/20210106154625.2801030-1-lorenzo@google.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 408a5332a975..36ffb43f9c1a 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1543,7 +1543,7 @@ static const struct usb_gadget_driver configfs_driver_template = { .suspend = configfs_composite_suspend, .resume = configfs_composite_resume, - .max_speed = USB_SPEED_SUPER, + .max_speed = USB_SPEED_SUPER_PLUS, .driver = { .owner = THIS_MODULE, .name = "configfs-gadget", @@ -1583,7 +1583,7 @@ static struct config_group *gadgets_make( gi->composite.unbind = configfs_do_nothing; gi->composite.suspend = NULL; gi->composite.resume = NULL; - gi->composite.max_speed = USB_SPEED_SUPER; + gi->composite.max_speed = USB_SPEED_SUPER_PLUS; spin_lock_init(&gi->spinlock); mutex_init(&gi->lock); -- cgit 1.4.1 From 41952a66015466c3208aac96b14ffd92e0943589 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 6 Jan 2021 00:16:05 +0000 Subject: usb: typec: Fix copy paste error for NVIDIA alt-mode description The name of the module for the NVIDIA alt-mode is incorrect as it looks to be a copy-paste error from the entry above, update it to the correct typec_nvidia module name. Cc: Ajay Gupta Cc: Heikki Krogerus Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20210106001605.167917-1-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/altmodes/Kconfig b/drivers/usb/typec/altmodes/Kconfig index 187690fd1a5b..60d375e9c3c7 100644 --- a/drivers/usb/typec/altmodes/Kconfig +++ b/drivers/usb/typec/altmodes/Kconfig @@ -20,6 +20,6 @@ config TYPEC_NVIDIA_ALTMODE to enable support for VirtualLink devices with NVIDIA GPUs. To compile this driver as a module, choose M here: the - module will be called typec_displayport. + module will be called typec_nvidia. endmenu -- cgit 1.4.1 From 6c75c2bad36cfb43b144e6a0a76a69993c72097f Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 6 Jan 2021 19:49:04 -0800 Subject: usb: typec: Send uevent for num_altmodes update Generate a change uevent when the "number_of_alternate_modes" sysfs file for partners and plugs is updated by a port driver. Cc: Heikki Krogerus Cc: Benson Leung Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20210107034904.4112029-1-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index ebfd3113a9a8..8f77669f9cf4 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -766,6 +766,7 @@ int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmod return ret; sysfs_notify(&partner->dev.kobj, NULL, "number_of_alternate_modes"); + kobject_uevent(&partner->dev.kobj, KOBJ_CHANGE); return 0; } @@ -923,6 +924,7 @@ int typec_plug_set_num_altmodes(struct typec_plug *plug, int num_altmodes) return ret; sysfs_notify(&plug->dev.kobj, NULL, "number_of_alternate_modes"); + kobject_uevent(&plug->dev.kobj, KOBJ_CHANGE); return 0; } -- cgit 1.4.1 From a5c7682aaaa10e42928d73de1c9e1e02d2b14c2e Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 4 Jan 2021 22:42:39 -0800 Subject: usb: dwc3: gadget: Clear wait flag on dequeue If an active transfer is dequeued, then the endpoint is freed to start a new transfer. Make sure to clear the endpoint's transfer wait flag for this case. Fixes: e0d19563eb6c ("usb: dwc3: gadget: Wait for transfer completion") Cc: stable@vger.kernel.org Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/b81cd5b5281cfbfdadb002c4bcf5c9be7c017cfd.1609828485.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 25f654b79e48..ee44321fee38 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1763,6 +1763,8 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, list_for_each_entry_safe(r, t, &dep->started_list, list) dwc3_gadget_move_cancelled_request(r); + dep->flags &= ~DWC3_EP_WAIT_TRANSFER_COMPLETE; + goto out; } } -- cgit 1.4.1 From e0658f970a7f3d85431c6803b7d5169444fb11b0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 5 Jan 2021 18:55:47 +0100 Subject: drm/radeon: stop re-init the TTM page pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers are not supposed to init the page pool directly any more. Signed-off-by: Christian König Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/412153/ --- drivers/gpu/drm/radeon/radeon_ttm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d59ef6e92a40..23195d5d4e91 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -730,9 +730,6 @@ int radeon_ttm_init(struct radeon_device *rdev) } rdev->mman.initialized = true; - ttm_pool_init(&rdev->mman.bdev.pool, rdev->dev, rdev->need_swiotlb, - dma_addressing_limited(&rdev->pdev->dev)); - r = radeon_ttm_init_vram(rdev); if (r) { DRM_ERROR("Failed initializing VRAM heap.\n"); -- cgit 1.4.1 From a73858ef4d5e1d425e171f0f6a52864176a6a979 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 5 Jan 2021 18:56:56 +0100 Subject: drm/ttm: unexport ttm_pool_init/fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers are not supposed to use this directly any more. Signed-off-by: Christian König Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/412156/ --- drivers/gpu/drm/ttm/ttm_pool.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 7b2f60616750..a00b7ab9c14c 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -507,7 +507,6 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } -EXPORT_SYMBOL(ttm_pool_init); /** * ttm_pool_fini - Cleanup a pool @@ -525,7 +524,6 @@ void ttm_pool_fini(struct ttm_pool *pool) for (j = 0; j < MAX_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } -EXPORT_SYMBOL(ttm_pool_fini); #ifdef CONFIG_DEBUG_FS /* Count the number of pages available in a pool_type */ -- cgit 1.4.1 From 1efd17e7acb6692bffc6c58718f41f27fdfd62f5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:19 +0800 Subject: iommu/vt-d: Fix misuse of ALIGN in qi_flush_piotlb() Use IS_ALIGNED() instead. Otherwise, an unaligned address will be ignored. Fixes: 33cd6e642d6a ("iommu/vt-d: Flush PASID-based iotlb for iova over first level") Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-1-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/dmar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b46dbfa6d0ed..004feaed3c72 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1461,8 +1461,8 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, int mask = ilog2(__roundup_pow_of_two(npages)); unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - if (WARN_ON_ONCE(!ALIGN(addr, align))) - addr &= ~(align - 1); + if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) + addr = ALIGN_DOWN(addr, align); desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | -- cgit 1.4.1 From 4df7b2268ad81a74168130e1fb04550a8bc980e1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:22 +0800 Subject: Revert "iommu: Add quirk for Intel graphic devices in map_sg" This reverts commit 65f746e8285f0a67d43517d86fedb9e29ead49f2. As commit 8a473dbadccf ("drm/i915: Fix DMA mapped scatterlist walks") and commit 934941ed5a30 ("drm/i915: Fix DMA mapped scatterlist lookup") fixed the DMA scatterlist limitations in the i915 driver, remove this temporary workaround. Cc: Tvrtko Ursulin Cc: Tom Murphy Cc: Logan Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/dma-iommu.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f0305e6aac1b..4078358ed66e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -863,33 +863,6 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); int i, count = 0; - /* - * The Intel graphic driver is used to assume that the returned - * sg list is not combound. This blocks the efforts of converting - * Intel IOMMU driver to dma-iommu api's. Add this quirk to make the - * device driver work and should be removed once it's fixed in i915 - * driver. - */ - if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) && - to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL && - (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - for_each_sg(sg, s, nents, i) { - unsigned int s_iova_off = sg_dma_address(s); - unsigned int s_length = sg_dma_len(s); - unsigned int s_iova_len = s->length; - - s->offset += s_iova_off; - s->length = s_length; - sg_dma_address(s) = dma_addr + s_iova_off; - sg_dma_len(s) = s_length; - dma_addr += s_iova_len; - - pr_info_once("sg combining disabled due to i915 driver\n"); - } - - return nents; - } - for_each_sg(sg, s, nents, i) { /* Restore this segment's original unaligned fields first */ unsigned int s_iova_off = sg_dma_address(s); -- cgit 1.4.1 From 420d42f6f9db27d88bc4f83e3e668fcdacbf7e29 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:23 +0800 Subject: iommu/vt-d: Fix lockdep splat in sva bind()/unbind() Lock(&iommu->lock) without disabling irq causes lockdep warnings. ======================================================== WARNING: possible irq lock inversion dependency detected 5.11.0-rc1+ #828 Not tainted -------------------------------------------------------- kworker/0:1H/120 just changed the state of lock: ffffffffad9ea1b8 (device_domain_lock){..-.}-{2:2}, at: iommu_flush_dev_iotlb.part.0+0x32/0x120 but this lock took another, SOFTIRQ-unsafe lock in the past: (&iommu->lock){+.+.}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&iommu->lock); local_irq_disable(); lock(device_domain_lock); lock(&iommu->lock); lock(device_domain_lock); *** DEADLOCK *** Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4fa248b98031..9bcedd360235 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -281,6 +281,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct dmar_domain *dmar_domain; struct device_domain_info *info; struct intel_svm *svm = NULL; + unsigned long iflags; int ret = 0; if (WARN_ON(!iommu) || !data) @@ -381,12 +382,12 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, * each bind of a new device even with an existing PASID, we need to * call the nested mode setup function here. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_nested(iommu, dev, (pgd_t *)(uintptr_t)data->gpgd, data->hpasid, &data->vendor.vtd, dmar_domain, data->addr_width); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", data->hpasid, ret); @@ -486,6 +487,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; + unsigned long iflags; int pasid_max; int ret; @@ -605,14 +607,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); @@ -632,14 +634,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, * Binding a new device with existing PASID, need to setup * the PASID entry. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { kfree(sdev); goto out; -- cgit 1.4.1 From aded8c7c2b72f846a07a2c736b8e75bb8cf50a87 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 5 Jan 2021 16:50:38 -0800 Subject: iommu/arm-smmu-qcom: Initialize SCTLR of the bypass context On SM8150 it's occasionally observed that the boot hangs in between the writing of SMEs and context banks in arm_smmu_device_reset(). The problem seems to coincide with a display refresh happening after updating the stream mapping, but before clearing - and there by disabling translation - the context bank picked to emulate translation bypass. Resolve this by explicitly disabling the bypass context already in cfg_probe. Fixes: f9081b8ff593 ("iommu/arm-smmu-qcom: Implement S2CR quirk") Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210106005038.4152731-1-bjorn.andersson@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 5dff7ffbef11..1b83d140742f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -196,6 +196,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) set_bit(qsmmu->bypass_cbndx, smmu->context_map); + arm_smmu_cb_write(smmu, qsmmu->bypass_cbndx, ARM_SMMU_CB_SCTLR, 0); + reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS); arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg); } -- cgit 1.4.1 From 9ad9f45b3b91162b33abfe175ae75ab65718dbf5 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:55 +0800 Subject: iommu/vt-d: Move intel_iommu info from struct intel_svm to struct intel_svm_dev 'struct intel_svm' is shared by all devices bound to a give process, but records only a single pointer to a 'struct intel_iommu'. Consequently, cache invalidations may only be applied to a single DMAR unit, and are erroneously skipped for the other devices. In preparation for fixing this, rework the structures so that the iommu pointer resides in 'struct intel_svm_dev', allowing 'struct intel_svm' to track them in its device list. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Cc: Lu Baolu Cc: Jacob Pan Cc: Raj Ashok Cc: David Woodhouse Reported-by: Guo Kaijie Reported-by: Xin Zeng Signed-off-by: Guo Kaijie Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Tested-by: Guo Kaijie Cc: stable@vger.kernel.org # v5.0+ Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-2-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 9 +++++---- include/linux/intel-iommu.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9bcedd360235..790ef3497e7e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -142,7 +142,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); if (sdev->dev_iotlb) { desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | @@ -166,7 +166,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); } } @@ -211,7 +211,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_pasid_tear_down_entry(svm->iommu, sdev->dev, + intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, svm->pasid, true); rcu_read_unlock(); @@ -364,6 +364,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, } sdev->dev = dev; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->iommu = iommu; /* Only count users if device has aux domains */ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) @@ -548,6 +549,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, goto out; } sdev->dev = dev; + sdev->iommu = iommu; ret = intel_iommu_enable_pasid(iommu, dev); if (ret) { @@ -577,7 +579,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, kfree(sdev); goto out; } - svm->iommu = iommu; if (pasid_max > intel_pasid_max_id) pasid_max = intel_pasid_max_id; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d956987ed032..94522685a0d9 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -758,6 +758,7 @@ struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; + struct intel_iommu *iommu; struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; @@ -771,7 +772,6 @@ struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; - struct intel_iommu *iommu; unsigned int flags; u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ -- cgit 1.4.1 From 18abda7a2d555783d28ea1701f3ec95e96237a86 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:56 +0800 Subject: iommu/vt-d: Fix general protection fault in aux_detach_device() The aux-domain attach/detach are not tracked, some data structures might be used after free. This causes general protection faults when multiple subdevices are created and assigned to a same guest machine: | general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] SMP NOPTI | RIP: 0010:intel_iommu_aux_detach_device+0x12a/0x1f0 | [...] | Call Trace: | iommu_aux_detach_device+0x24/0x70 | vfio_mdev_detach_domain+0x3b/0x60 | ? vfio_mdev_set_domain+0x50/0x50 | iommu_group_for_each_dev+0x4f/0x80 | vfio_iommu_detach_group.isra.0+0x22/0x30 | vfio_iommu_type1_detach_group.cold+0x71/0x211 | ? find_exported_symbol_in_section+0x4a/0xd0 | ? each_symbol_section+0x28/0x50 | __vfio_group_unset_container+0x4d/0x150 | vfio_group_try_dissolve_container+0x25/0x30 | vfio_group_put_external_user+0x13/0x20 | kvm_vfio_group_put_external_user+0x27/0x40 [kvm] | kvm_vfio_destroy+0x45/0xb0 [kvm] | kvm_put_kvm+0x1bb/0x2e0 [kvm] | kvm_vm_release+0x22/0x30 [kvm] | __fput+0xcc/0x260 | ____fput+0xe/0x10 | task_work_run+0x8f/0xb0 | do_exit+0x358/0xaf0 | ? wake_up_state+0x10/0x20 | ? signal_wake_up_state+0x1a/0x30 | do_group_exit+0x47/0xb0 | __x64_sys_exit_group+0x18/0x20 | do_syscall_64+0x57/0x1d0 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix the crash by tracking the subdevices when attaching and detaching aux-domains. Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach") Co-developed-by: Xin Zeng Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-3-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 95 +++++++++++++++++++++++++++++++++------------ include/linux/intel-iommu.h | 16 +++++--- 2 files changed, 82 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 788119c5b021..d7720a836268 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1877,6 +1877,7 @@ static struct dmar_domain *alloc_domain(int flags) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->subdevices); return domain; } @@ -2547,7 +2548,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->iommu = iommu; info->pasid_table = NULL; info->auxd_enabled = 0; - INIT_LIST_HEAD(&info->auxiliary_domains); + INIT_LIST_HEAD(&info->subdevices); if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -4475,33 +4476,61 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) domain->type == IOMMU_DOMAIN_UNMANAGED; } -static void auxiliary_link_device(struct dmar_domain *domain, - struct device *dev) +static inline struct subdev_domain_info * +lookup_subdev_info(struct dmar_domain *domain, struct device *dev) +{ + struct subdev_domain_info *sinfo; + + if (!list_empty(&domain->subdevices)) { + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + if (sinfo->pdev == dev) + return sinfo; + } + } + + return NULL; +} + +static int auxiliary_link_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = get_domain_info(dev); + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) - return; + return -EINVAL; + + if (!sinfo) { + sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); + sinfo->domain = domain; + sinfo->pdev = dev; + list_add(&sinfo->link_phys, &info->subdevices); + list_add(&sinfo->link_domain, &domain->subdevices); + } - domain->auxd_refcnt++; - list_add(&domain->auxd, &info->auxiliary_domains); + return ++sinfo->users; } -static void auxiliary_unlink_device(struct dmar_domain *domain, - struct device *dev) +static int auxiliary_unlink_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = get_domain_info(dev); + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); + int ret; assert_spin_locked(&device_domain_lock); - if (WARN_ON(!info)) - return; + if (WARN_ON(!info || !sinfo || sinfo->users <= 0)) + return -EINVAL; - list_del(&domain->auxd); - domain->auxd_refcnt--; + ret = --sinfo->users; + if (!ret) { + list_del(&sinfo->link_phys); + list_del(&sinfo->link_domain); + kfree(sinfo); + } - if (!domain->auxd_refcnt && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + return ret; } static int aux_domain_add_dev(struct dmar_domain *domain, @@ -4530,6 +4559,19 @@ static int aux_domain_add_dev(struct dmar_domain *domain, } spin_lock_irqsave(&device_domain_lock, flags); + ret = auxiliary_link_device(domain, dev); + if (ret <= 0) + goto link_failed; + + /* + * Subdevices from the same physical device can be attached to the + * same domain. For such cases, only the first subdevice attachment + * needs to go through the full steps in this function. So if ret > + * 1, just goto out. + */ + if (ret > 1) + goto out; + /* * iommu->lock must be held to attach domain to iommu and setup the * pasid entry for second level translation. @@ -4548,10 +4590,9 @@ static int aux_domain_add_dev(struct dmar_domain *domain, domain->default_pasid); if (ret) goto table_failed; - spin_unlock(&iommu->lock); - - auxiliary_link_device(domain, dev); + spin_unlock(&iommu->lock); +out: spin_unlock_irqrestore(&device_domain_lock, flags); return 0; @@ -4560,8 +4601,10 @@ table_failed: domain_detach_iommu(domain, iommu); attach_failed: spin_unlock(&iommu->lock); + auxiliary_unlink_device(domain, dev); +link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); - if (!domain->auxd_refcnt && domain->default_pasid > 0) + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) ioasid_put(domain->default_pasid); return ret; @@ -4581,14 +4624,18 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, info = get_domain_info(dev); iommu = info->iommu; - auxiliary_unlink_device(domain, dev); - - spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); - domain_detach_iommu(domain, iommu); - spin_unlock(&iommu->lock); + if (!auxiliary_unlink_device(domain, dev)) { + spin_lock(&iommu->lock); + intel_pasid_tear_down_entry(iommu, dev, + domain->default_pasid, false); + domain_detach_iommu(domain, iommu); + spin_unlock(&iommu->lock); + } spin_unlock_irqrestore(&device_domain_lock, flags); + + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) + ioasid_put(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 94522685a0d9..09c6a0bf3892 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -533,11 +533,10 @@ struct dmar_domain { /* Domain ids per IOMMU. Use u16 since * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ - unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ bool has_iotlb_device; struct list_head devices; /* all devices' list */ - struct list_head auxd; /* link to device's auxiliary list */ + struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -610,14 +609,21 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; }; +/* Per subdevice private data */ +struct subdev_domain_info { + struct list_head link_phys; /* link to phys device siblings */ + struct list_head link_domain; /* link to domain siblings */ + struct device *pdev; /* physical device derived from */ + struct dmar_domain *domain; /* aux-domain */ + int users; /* user count */ +}; + /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head auxiliary_domains; /* auxiliary domains - * attached to this device - */ + struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ -- cgit 1.4.1 From 7c29ada5e70083805bc3a68daa23441df421fbee Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:57 +0800 Subject: iommu/vt-d: Fix ineffective devTLB invalidation for subdevices iommu_flush_dev_iotlb() is called to invalidate caches on a device but only loops over the devices which are fully-attached to the domain. For sub-devices, this is ineffective and can result in invalid caching entries left on the device. Fix the missing invalidation by adding a loop over the subdevices and ensuring that 'domain->has_iotlb_device' is updated when attaching to subdevices. Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach") Signed-off-by: Liu Yi L Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-4-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 53 +++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d7720a836268..65cf06d70bf4 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -719,6 +719,8 @@ static int domain_update_device_node(struct dmar_domain *domain) return nid; } +static void domain_update_iotlb(struct dmar_domain *domain); + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { @@ -744,6 +746,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); else domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); + + domain_update_iotlb(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -1464,17 +1468,22 @@ static void domain_update_iotlb(struct dmar_domain *domain) assert_spin_locked(&device_domain_lock); - list_for_each_entry(info, &domain->devices, link) { - struct pci_dev *pdev; - - if (!info->dev || !dev_is_pci(info->dev)) - continue; - - pdev = to_pci_dev(info->dev); - if (pdev->ats_enabled) { + list_for_each_entry(info, &domain->devices, link) + if (info->ats_enabled) { has_iotlb_device = true; break; } + + if (!has_iotlb_device) { + struct subdev_domain_info *sinfo; + + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + info = get_domain_info(sinfo->pdev); + if (info && info->ats_enabled) { + has_iotlb_device = true; + break; + } + } } domain->has_iotlb_device = has_iotlb_device; @@ -1555,25 +1564,37 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info) #endif } +static void __iommu_flush_dev_iotlb(struct device_domain_info *info, + u64 addr, unsigned int mask) +{ + u16 sid, qdep; + + if (!info || !info->ats_enabled) + return; + + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); +} + static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { - u16 sid, qdep; unsigned long flags; struct device_domain_info *info; + struct subdev_domain_info *sinfo; if (!domain->has_iotlb_device) return; spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(info, &domain->devices, link) { - if (!info->ats_enabled) - continue; + list_for_each_entry(info, &domain->devices, link) + __iommu_flush_dev_iotlb(info, addr, mask); - sid = info->bus << 8 | info->devfn; - qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, - qdep, addr, mask); + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + info = get_domain_info(sinfo->pdev); + __iommu_flush_dev_iotlb(info, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); } -- cgit 1.4.1 From 80c18e4ac20c9cde420cb3ffab48c936147cf07d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:41 +0000 Subject: io_uring: trigger eventfd for IOPOLL Make sure io_iopoll_complete() tries to wake up eventfd, which currently is skipped together with io_cqring_ev_posted() for non-SQPOLL IOPOLL. Add an iopoll version of io_cqring_ev_posted(), duplicates a bit of code, but they actually use different sets of wait queues may be for better. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 27a8c226abf8..91e517ad1421 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1713,6 +1713,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) eventfd_signal(ctx->cq_ev_fd, 1); } +static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) +{ + if (ctx->flags & IORING_SETUP_SQPOLL) { + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + } + if (io_should_trigger_evfd(ctx)) + eventfd_signal(ctx->cq_ev_fd, 1); +} + /* Returns true if there are no backlogged entries after the flush */ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -2428,8 +2438,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, } io_commit_cqring(ctx); - if (ctx->flags & IORING_SETUP_SQPOLL) - io_cqring_ev_posted(ctx); + io_cqring_ev_posted_iopoll(ctx); io_req_free_batch_finish(ctx, &rb); if (!list_empty(&again)) -- cgit 1.4.1 From 4aa84f2ffa81f71e15e5cffc2cc6090dbee78f8e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:42 +0000 Subject: io_uring: dont kill fasync under completion_lock CPU0 CPU1 ---- ---- lock(&new->fa_lock); local_irq_disable(); lock(&ctx->completion_lock); lock(&new->fa_lock); lock(&ctx->completion_lock); *** DEADLOCK *** Move kill_fasync() out of io_commit_cqring() to io_cqring_ev_posted(), so it doesn't hold completion_lock while doing it. That saves from the reported deadlock, and it's just nice to shorten the locking time and untangle nested locks (compl_lock -> wq_head::lock). Reported-by: syzbot+91ca3f25bd7f795f019c@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 91e517ad1421..401316fe2ae2 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1345,11 +1345,6 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) /* order cqe stores with ring update */ smp_store_release(&rings->cq.tail, ctx->cached_cq_tail); - - if (wq_has_sleeper(&ctx->cq_wait)) { - wake_up_interruptible(&ctx->cq_wait); - kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); - } } static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) @@ -1711,6 +1706,10 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); + if (wq_has_sleeper(&ctx->cq_wait)) { + wake_up_interruptible(&ctx->cq_wait); + kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); + } } static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) @@ -1721,6 +1720,10 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) } if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); + if (wq_has_sleeper(&ctx->cq_wait)) { + wake_up_interruptible(&ctx->cq_wait); + kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); + } } /* Returns true if there are no backlogged entries after the flush */ -- cgit 1.4.1 From b1445e59cc9a10fdb8f83810ae1f4feb941ab36b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:43 +0000 Subject: io_uring: synchronise ev_posted() with waitqueues waitqueue_active() needs smp_mb() to be in sync with waitqueues modification, but we miss it in io_cqring_ev_posted*() apart from cq_wait() case. Take an smb_mb() out of wq_has_sleeper() making it waitqueue_active(), and place it a few lines before, so it can synchronise other waitqueue_active() as well. The patch doesn't add any additional overhead, so even if there are no problems currently, it's just safer to have it this way. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 401316fe2ae2..cb57e0360fcb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1700,13 +1700,16 @@ static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx) static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); - if (wq_has_sleeper(&ctx->cq_wait)) { + if (waitqueue_active(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); } @@ -1714,13 +1717,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (ctx->flags & IORING_SETUP_SQPOLL) { if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); } if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); - if (wq_has_sleeper(&ctx->cq_wait)) { + if (waitqueue_active(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); } -- cgit 1.4.1 From 71008734d27f2276fcef23a5e546d358430f2d52 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Dec 2020 11:18:44 -0500 Subject: btrfs: print the actual offset in btrfs_root_name We're supposed to print the root_key.offset in btrfs_root_name in the case of a reloc root, not the objectid. Fix this helper to take the key so we have access to the offset when we need it. Fixes: 457f1864b569 ("btrfs: pretty print leaked root name") Reviewed-by: Qu Wenruo Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/print-tree.c | 10 +++++----- fs/btrfs/print-tree.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1dfd4b2d0e1e..6b35b7e88136 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1457,7 +1457,7 @@ void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info) root = list_first_entry(&fs_info->allocated_roots, struct btrfs_root, leak_list); btrfs_err(fs_info, "leaked root %s refcount %d", - btrfs_root_name(root->root_key.objectid, buf), + btrfs_root_name(&root->root_key, buf), refcount_read(&root->refs)); while (refcount_read(&root->refs) > 1) btrfs_put_root(root); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fe5e0026129d..aae1027bd76a 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,22 +26,22 @@ static const struct root_name_map root_map[] = { { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, }; -const char *btrfs_root_name(u64 objectid, char *buf) +const char *btrfs_root_name(const struct btrfs_key *key, char *buf) { int i; - if (objectid == BTRFS_TREE_RELOC_OBJECTID) { + if (key->objectid == BTRFS_TREE_RELOC_OBJECTID) { snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, - "TREE_RELOC offset=%llu", objectid); + "TREE_RELOC offset=%llu", key->offset); return buf; } for (i = 0; i < ARRAY_SIZE(root_map); i++) { - if (root_map[i].id == objectid) + if (root_map[i].id == key->objectid) return root_map[i].name; } - snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", objectid); + snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", key->objectid); return buf; } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 78b99385a503..8c3e9319ec4e 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -11,6 +11,6 @@ void btrfs_print_leaf(struct extent_buffer *l); void btrfs_print_tree(struct extent_buffer *c, bool follow); -const char *btrfs_root_name(u64 objectid, char *buf); +const char *btrfs_root_name(const struct btrfs_key *key, char *buf); #endif -- cgit 1.4.1 From 29b665cc51e8b602bf2a275734349494776e3dbc Mon Sep 17 00:00:00 2001 From: Su Yue Date: Sun, 3 Jan 2021 17:28:03 +0800 Subject: btrfs: prevent NULL pointer dereference in extent_io_tree_panic Some extent io trees are initialized with NULL private member (e.g. btrfs_device::alloc_state and btrfs_fs_info::excluded_extents). Dereference of a NULL tree->private as inode pointer will cause panic. Pass tree->fs_info as it's known to be valid in all cases. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208929 Fixes: 05912a3c04eb ("btrfs: drop extent_io_ops::tree_fs_info callback") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6e3b72e63e42..c9cee458e001 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -676,9 +676,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - struct inode *inode = tree->private_data; - - btrfs_panic(btrfs_sb(inode->i_sb), err, + btrfs_panic(tree->fs_info, err, "locking error: extent tree was modified by another thread while locked"); } -- cgit 1.4.1 From 347fb0cfc9bab5195c6701e62eda488310d7938f Mon Sep 17 00:00:00 2001 From: Su Yue Date: Sun, 3 Jan 2021 17:28:04 +0800 Subject: btrfs: tree-checker: check if chunk item end overflows While mounting a crafted image provided by user, kernel panics due to the invalid chunk item whose end is less than start. [66.387422] loop: module loaded [66.389773] loop0: detected capacity change from 262144 to 0 [66.427708] BTRFS: device fsid a62e00e8-e94e-4200-8217-12444de93c2e devid 1 transid 12 /dev/loop0 scanned by mount (613) [66.431061] BTRFS info (device loop0): disk space caching is enabled [66.431078] BTRFS info (device loop0): has skinny extents [66.437101] BTRFS error: insert state: end < start 29360127 37748736 [66.437136] ------------[ cut here ]------------ [66.437140] WARNING: CPU: 16 PID: 613 at fs/btrfs/extent_io.c:557 insert_state.cold+0x1a/0x46 [btrfs] [66.437369] CPU: 16 PID: 613 Comm: mount Tainted: G O 5.11.0-rc1-custom #45 [66.437374] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.437378] RIP: 0010:insert_state.cold+0x1a/0x46 [btrfs] [66.437420] RSP: 0018:ffff93e5414c3908 EFLAGS: 00010286 [66.437427] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [66.437431] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [66.437434] RBP: ffff93e5414c3938 R08: 0000000000000001 R09: 0000000000000001 [66.437438] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d72aa0 [66.437441] R13: ffff8ec78bc71628 R14: 0000000000000000 R15: 0000000002400000 [66.437447] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [66.437451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [66.437455] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [66.437460] PKRU: 55555554 [66.437464] Call Trace: [66.437475] set_extent_bit+0x652/0x740 [btrfs] [66.437539] set_extent_bits_nowait+0x1d/0x20 [btrfs] [66.437576] add_extent_mapping+0x1e0/0x2f0 [btrfs] [66.437621] read_one_chunk+0x33c/0x420 [btrfs] [66.437674] btrfs_read_chunk_tree+0x6a4/0x870 [btrfs] [66.437708] ? kvm_sched_clock_read+0x18/0x40 [66.437739] open_ctree+0xb32/0x1734 [btrfs] [66.437781] ? bdi_register_va+0x1b/0x20 [66.437788] ? super_setup_bdi_name+0x79/0xd0 [66.437810] btrfs_mount_root.cold+0x12/0xeb [btrfs] [66.437854] ? __kmalloc_track_caller+0x217/0x3b0 [66.437873] legacy_get_tree+0x34/0x60 [66.437880] vfs_get_tree+0x2d/0xc0 [66.437888] vfs_kern_mount.part.0+0x78/0xc0 [66.437897] vfs_kern_mount+0x13/0x20 [66.437902] btrfs_mount+0x11f/0x3c0 [btrfs] [66.437940] ? kfree+0x5ff/0x670 [66.437944] ? __kmalloc_track_caller+0x217/0x3b0 [66.437962] legacy_get_tree+0x34/0x60 [66.437974] vfs_get_tree+0x2d/0xc0 [66.437983] path_mount+0x48c/0xd30 [66.437998] __x64_sys_mount+0x108/0x140 [66.438011] do_syscall_64+0x38/0x50 [66.438018] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [66.438023] RIP: 0033:0x7f0138827f6e [66.438033] RSP: 002b:00007ffecd79edf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [66.438040] RAX: ffffffffffffffda RBX: 00007f013894c264 RCX: 00007f0138827f6e [66.438044] RDX: 00005593a4a41360 RSI: 00005593a4a33690 RDI: 00005593a4a3a6c0 [66.438047] RBP: 00005593a4a33440 R08: 0000000000000000 R09: 0000000000000001 [66.438050] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [66.438054] R13: 00005593a4a3a6c0 R14: 00005593a4a41360 R15: 00005593a4a33440 [66.438078] irq event stamp: 18169 [66.438082] hardirqs last enabled at (18175): [] console_unlock+0x4ff/0x5f0 [66.438088] hardirqs last disabled at (18180): [] console_unlock+0x467/0x5f0 [66.438092] softirqs last enabled at (16910): [] asm_call_irq_on_stack+0x12/0x20 [66.438097] softirqs last disabled at (16905): [] asm_call_irq_on_stack+0x12/0x20 [66.438103] ---[ end trace e114b111db64298b ]--- [66.438107] BTRFS error: found node 12582912 29360127 on insert of 37748736 29360127 [66.438127] BTRFS critical: panic in extent_io_tree_panic:679: locking error: extent tree was modified by another thread while locked (errno=-17 Object already exists) [66.441069] ------------[ cut here ]------------ [66.441072] kernel BUG at fs/btrfs/extent_io.c:679! [66.442064] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [66.443018] CPU: 16 PID: 613 Comm: mount Tainted: G W O 5.11.0-rc1-custom #45 [66.444538] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.446223] RIP: 0010:extent_io_tree_panic.isra.0+0x23/0x25 [btrfs] [66.450878] RSP: 0018:ffff93e5414c3948 EFLAGS: 00010246 [66.451840] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [66.453141] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [66.454445] RBP: ffff93e5414c3948 R08: 0000000000000001 R09: 0000000000000001 [66.455743] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d728c0 [66.457055] R13: ffff8ec78bc71628 R14: ffff8ec782d72aa0 R15: 0000000002400000 [66.458356] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [66.459841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [66.460895] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [66.462196] PKRU: 55555554 [66.462692] Call Trace: [66.463139] set_extent_bit.cold+0x30/0x98 [btrfs] [66.464049] set_extent_bits_nowait+0x1d/0x20 [btrfs] [66.490466] add_extent_mapping+0x1e0/0x2f0 [btrfs] [66.514097] read_one_chunk+0x33c/0x420 [btrfs] [66.534976] btrfs_read_chunk_tree+0x6a4/0x870 [btrfs] [66.555718] ? kvm_sched_clock_read+0x18/0x40 [66.575758] open_ctree+0xb32/0x1734 [btrfs] [66.595272] ? bdi_register_va+0x1b/0x20 [66.614638] ? super_setup_bdi_name+0x79/0xd0 [66.633809] btrfs_mount_root.cold+0x12/0xeb [btrfs] [66.652938] ? __kmalloc_track_caller+0x217/0x3b0 [66.671925] legacy_get_tree+0x34/0x60 [66.690300] vfs_get_tree+0x2d/0xc0 [66.708221] vfs_kern_mount.part.0+0x78/0xc0 [66.725808] vfs_kern_mount+0x13/0x20 [66.742730] btrfs_mount+0x11f/0x3c0 [btrfs] [66.759350] ? kfree+0x5ff/0x670 [66.775441] ? __kmalloc_track_caller+0x217/0x3b0 [66.791750] legacy_get_tree+0x34/0x60 [66.807494] vfs_get_tree+0x2d/0xc0 [66.823349] path_mount+0x48c/0xd30 [66.838753] __x64_sys_mount+0x108/0x140 [66.854412] do_syscall_64+0x38/0x50 [66.869673] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [66.885093] RIP: 0033:0x7f0138827f6e [66.945613] RSP: 002b:00007ffecd79edf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [66.977214] RAX: ffffffffffffffda RBX: 00007f013894c264 RCX: 00007f0138827f6e [66.994266] RDX: 00005593a4a41360 RSI: 00005593a4a33690 RDI: 00005593a4a3a6c0 [67.011544] RBP: 00005593a4a33440 R08: 0000000000000000 R09: 0000000000000001 [67.028836] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [67.045812] R13: 00005593a4a3a6c0 R14: 00005593a4a41360 R15: 00005593a4a33440 [67.216138] ---[ end trace e114b111db64298c ]--- [67.237089] RIP: 0010:extent_io_tree_panic.isra.0+0x23/0x25 [btrfs] [67.325317] RSP: 0018:ffff93e5414c3948 EFLAGS: 00010246 [67.347946] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [67.371343] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [67.394757] RBP: ffff93e5414c3948 R08: 0000000000000001 R09: 0000000000000001 [67.418409] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d728c0 [67.441906] R13: ffff8ec78bc71628 R14: ffff8ec782d72aa0 R15: 0000000002400000 [67.465436] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [67.511660] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [67.535047] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [67.558449] PKRU: 55555554 [67.581146] note: mount[613] exited with preempt_count 2 The image has a chunk item which has a logical start 37748736 and length 18446744073701163008 (-8M). The calculated end 29360127 overflows. EEXIST was caught by insert_state() because of the duplicate end and extent_io_tree_panic() was called. Add overflow check of chunk item end to tree checker so it can be detected early at mount time. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208929 CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 028e733e42f3..582061c7b547 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -760,6 +760,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, { struct btrfs_fs_info *fs_info = leaf->fs_info; u64 length; + u64 chunk_end; u64 stripe_len; u16 num_stripes; u16 sub_stripes; @@ -814,6 +815,12 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, "invalid chunk length, have %llu", length); return -EUCLEAN; } + if (unlikely(check_add_overflow(logical, length, &chunk_end))) { + chunk_err(leaf, chunk, logical, +"invalid chunk logical start and length, have logical start %llu length %llu", + logical, length); + return -EUCLEAN; + } if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { chunk_err(leaf, chunk, logical, "invalid chunk stripe length: %llu", -- cgit 1.4.1 From 50e31ef486afe60f128d42fb9620e2a63172c15c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Dec 2020 21:29:34 +0800 Subject: btrfs: reloc: fix wrong file extent type check to avoid false ENOENT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [BUG] There are several bug reports about recent kernel unable to relocate certain data block groups. Sometimes the error just goes away, but there is one reporter who can reproduce it reliably. The dmesg would look like: [438.260483] BTRFS info (device dm-10): balance: start -dvrange=34625344765952..34625344765953 [438.269018] BTRFS info (device dm-10): relocating block group 34625344765952 flags data|raid1 [450.439609] BTRFS info (device dm-10): found 167 extents, stage: move data extents [463.501781] BTRFS info (device dm-10): balance: ended with status: -2 [CAUSE] The ENOENT error is returned from the following call chain: add_data_references() |- delete_v1_space_cache(); |- if (!found) return -ENOENT; The variable @found is set to true if we find a data extent whose disk bytenr matches parameter @data_bytes. With extra debugging, the offending tree block looks like this: leaf bytenr = 42676709441536, data_bytenr = 34626327621632 ctime 1567904822.739884119 (2019-09-08 03:07:02) mtime 0.0 (1970-01-01 01:00:00) otime 0.0 (1970-01-01 01:00:00) item 27 key (51933 EXTENT_DATA 0) itemoff 9854 itemsize 53 generation 1517381 type 2 (prealloc) prealloc data disk byte 34626327621632 nr 262144 <<< prealloc data offset 0 nr 262144 item 28 key (52262 ROOT_ITEM 0) itemoff 9415 itemsize 439 generation 2618893 root_dirid 256 bytenr 42677048360960 level 3 refs 1 lastsnap 2618893 byte_limit 0 bytes_used 5557338112 flags 0x0(none) uuid d0d4361f-d231-6d40-8901-fe506e4b2b53 Although item 27 has disk bytenr 34626327621632, which matches the data_bytenr, its type is prealloc, not reg. This makes the existing code skip that item, and return ENOENT. [FIX] The code is modified in commit 19b546d7a1b2 ("btrfs: relocation: Use btrfs_find_all_leafs to locate data extent parent tree leaves"), before that commit, we use something like "if (type == BTRFS_FILE_EXTENT_INLINE) continue;" But in that offending commit, we use (type == BTRFS_FILE_EXTENT_REG), ignoring BTRFS_FILE_EXTENT_PREALLOC. Fix it by also checking BTRFS_FILE_EXTENT_PREALLOC. Reported-by: Stéphane Lesimple Link: https://lore.kernel.org/linux-btrfs/505cabfa88575ed6dbe7cb922d8914fb@lesimple.fr Fixes: 19b546d7a1b2 ("btrfs: relocation: Use btrfs_find_all_leafs to locate data extent parent tree leaves") CC: stable@vger.kernel.org # 5.6+ Tested-By: Stéphane Lesimple Reviewed-by: Su Yue Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 19b7db8b2117..df63ef64c5c0 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2975,11 +2975,16 @@ static int delete_v1_space_cache(struct extent_buffer *leaf, return 0; for (i = 0; i < btrfs_header_nritems(leaf); i++) { + u8 type; + btrfs_item_key_to_cpu(leaf, &key, i); if (key.type != BTRFS_EXTENT_DATA_KEY) continue; ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG && + type = btrfs_file_extent_type(leaf, ei); + + if ((type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) && btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) { found = true; space_cache_ino = key.objectid; -- cgit 1.4.1 From 04a6a536bc3fd1436fc78c546c6b3ecdccbfaf6d Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Dec 2020 04:49:54 +0000 Subject: fs: Fix freeze_bdev()/thaw_bdev() accounting of bd_fsfreeze_sb freeze/thaw_bdev() currently use bdev->bd_fsfreeze_count to infer whether or not bdev->bd_fsfreeze_sb is valid (it's valid iff bd_fsfreeze_count is non-zero). thaw_bdev() doesn't nullify bd_fsfreeze_sb. But this means a freeze_bdev() call followed by a thaw_bdev() call can leave bd_fsfreeze_sb with a non-null value, while bd_fsfreeze_count is zero. If freeze_bdev() is called again, and this time get_active_super() returns NULL (e.g. because the FS is unmounted), we'll end up with bd_fsfreeze_count > 0, but bd_fsfreeze_sb is *untouched* - it stays the same (now garbage) value. A subsequent thaw_bdev() will decide that the bd_fsfreeze_sb value is legitimate (since bd_fsfreeze_count > 0), and attempt to use it. Fix this by always setting bd_fsfreeze_sb to NULL when bd_fsfreeze_count is successfully decremented to 0 in thaw_sb(). Alternatively, we could set bd_fsfreeze_sb to whatever get_active_super() returns in freeze_bdev() whenever bd_fsfreeze_count is successfully incremented to 1 from 0 (which can be achieved cleanly by moving the line currently setting bd_fsfreeze_sb to immediately after the "sync:" label, but it might be a little too subtle/easily overlooked in future). This fixes the currently panicking xfstests generic/085. Fixes: 040f04bd2e82 ("fs: simplify freeze_bdev/thaw_bdev") Signed-off-by: Satya Tangirala Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3e5b02f6606c..e454c5a81043 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -605,6 +605,8 @@ int thaw_bdev(struct block_device *bdev) error = thaw_super(sb); if (error) bdev->bd_fsfreeze_count++; + else + bdev->bd_fsfreeze_sb = NULL; out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; -- cgit 1.4.1 From 17ffd35809c34b9564edb10727d02eb62958ba5c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Jan 2021 19:20:29 +0100 Subject: cpufreq: intel_pstate: Use HWP capabilities in intel_cpufreq_adjust_perf() If turbo P-states cannot be used, either due to the configuration of the processor, or because intel_pstate is not allowed to used them, the maximum available P-state with HWP enabled corresponds to the HWP_CAP.GUARANTEED value which is not static. It can be adjusted by an out-of-band agent or during an Intel Speed Select performance level change, so long as it remains less than or equal to HWP_CAP.MAX. However, if turbo P-states cannot be used, intel_cpufreq_adjust_perf() always uses pstate.max_pstate (set during the initialization of the driver only) as the maximum available P-state, so it may miss a change of the HWP_CAP.GUARANTEED value. Prevent that from happening by modifyig intel_cpufreq_adjust_perf() to always read the "guaranteed" and "maximum turbo" performance levels from the cached HWP_CAP value. Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1a660466dd75..32bc11851b8d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2653,12 +2653,13 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, unsigned long capacity) { struct cpudata *cpu = all_cpu_data[cpunum]; + u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached); int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; update_turbo_state(); - cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate : - cpu->pstate.turbo_pstate; + cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : + HWP_HIGHEST_PERF(hwp_cap); /* Optimization: Avoid unnecessary divisions. */ -- cgit 1.4.1 From 943bdd0cecad06da8392a33093230e30e501eccc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Jan 2021 10:19:57 +0000 Subject: cpufreq: powernow-k8: pass policy rather than use cpufreq_cpu_get() Currently there is an unlikely case where cpufreq_cpu_get() returns a NULL policy and this will cause a NULL pointer dereference later on. Fix this by passing the policy to transition_frequency_fidvid() from the caller and hence eliminating the need for the cpufreq_cpu_get() and cpufreq_cpu_put(). Thanks to Viresh Kumar for suggesting the fix. Addresses-Coverity: ("Dereference null return") Fixes: b43a7ffbf33b ("cpufreq: Notify all policy->cpus in cpufreq_notify_transition()") Suggested-by: Viresh Kumar Signed-off-by: Colin Ian King Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k8.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 0acc9e241cd7..b9ccb6a3dad9 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -878,9 +878,9 @@ static int get_transition_latency(struct powernow_k8_data *data) /* Take a frequency, and issue the fid/vid transition command */ static int transition_frequency_fidvid(struct powernow_k8_data *data, - unsigned int index) + unsigned int index, + struct cpufreq_policy *policy) { - struct cpufreq_policy *policy; u32 fid = 0; u32 vid = 0; int res; @@ -912,9 +912,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - policy = cpufreq_cpu_get(smp_processor_id()); - cpufreq_cpu_put(policy); - cpufreq_freq_transition_begin(policy, &freqs); res = transition_fid_vid(data, fid, vid); cpufreq_freq_transition_end(policy, &freqs, res); @@ -969,7 +966,7 @@ static long powernowk8_target_fn(void *arg) powernow_k8_acpi_pst_values(data, newstate); - ret = transition_frequency_fidvid(data, newstate); + ret = transition_frequency_fidvid(data, newstate, pol); if (ret) { pr_err("transition frequency failed\n"); -- cgit 1.4.1 From aa7a1bb02bb44399be69b0a1cbb6495d9eec29fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Jan 2021 19:19:18 +0100 Subject: ACPI: PM: s2idle: Drop unused local variables and related code Two local variables in drivers/acpi/x86/s2idle.c are never read, so drop them along with the code updating their values (in vain). Fixes: fef98671194b ("ACPI: PM: s2idle: Move x86-specific code to the x86 directory") Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 25fea34b544c..2b69536cdccb 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -105,18 +105,8 @@ static void lpi_device_get_constraints_amd(void) for (i = 0; i < out_obj->package.count; i++) { union acpi_object *package = &out_obj->package.elements[i]; - struct lpi_device_info_amd info = { }; - if (package->type == ACPI_TYPE_INTEGER) { - switch (i) { - case 0: - info.revision = package->integer.value; - break; - case 1: - info.count = package->integer.value; - break; - } - } else if (package->type == ACPI_TYPE_PACKAGE) { + if (package->type == ACPI_TYPE_PACKAGE) { lpi_constraints_table = kcalloc(package->package.count, sizeof(*lpi_constraints_table), GFP_KERNEL); @@ -135,12 +125,10 @@ static void lpi_device_get_constraints_amd(void) for (k = 0; k < info_obj->package.count; ++k) { union acpi_object *obj = &info_obj->package.elements[k]; - union acpi_object *obj_new; list = &lpi_constraints_table[lpi_constraints_table_size]; list->min_dstate = -1; - obj_new = &obj[k]; switch (k) { case 0: dev_info.enabled = obj->integer.value; -- cgit 1.4.1 From ee61cfd955a64a58ed35cbcfc54068fcbd486945 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 31 Dec 2020 19:35:25 +0800 Subject: ACPI: scan: add stub acpi_create_platform_device() for !CONFIG_ACPI It adds a stub acpi_create_platform_device() for !CONFIG_ACPI build, so that caller doesn't have to deal with !CONFIG_ACPI build issue. Reported-by: kernel test robot Signed-off-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2630c2e953f7..053bf05fb1f7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -885,6 +885,13 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } +static inline struct platform_device * +acpi_create_platform_device(struct acpi_device *adev, + struct property_entry *properties) +{ + return NULL; +} + static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; -- cgit 1.4.1 From 240bdc605e6a9d0309bd003de3413f6f729eca18 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Tue, 29 Dec 2020 11:17:59 +0000 Subject: ACPI: Update Kconfig help text for items that are no longer modular The CONTAINER and HOTPLUG_MEMORY options mention modules but are bool only, so if selected are always built in. Drop the help text about modules. Signed-off-by: Peter Robinson Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index edf1558c1105..ebcf534514be 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -395,9 +395,6 @@ config ACPI_CONTAINER This helps support hotplug of nodes, CPUs, and memory. - To compile this driver as a module, choose M here: - the module will be called container. - config ACPI_HOTPLUG_MEMORY bool "Memory Hotplug" depends on MEMORY_HOTPLUG @@ -411,9 +408,6 @@ config ACPI_HOTPLUG_MEMORY removing memory devices at runtime, you need not enable this driver. - To compile this driver as a module, choose M here: - the module will be called acpi_memhotplug. - config ACPI_HOTPLUG_IOAPIC bool depends on PCI -- cgit 1.4.1 From 47f4469970d8861bc06d2d4d45ac8200ff07c693 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 5 Jan 2021 17:11:45 +0800 Subject: Revert "device property: Keep secondary firmware node secondary by type" While commit d5dcce0c414f ("device property: Keep secondary firmware node secondary by type") describes everything correct in its commit message, the change it made does the opposite and original commit c15e1bdda436 ("device property: Fix the secondary firmware node handling in set_primary_fwnode()") was fully correct. Revert the former one here and improve documentation in the next patch. Fixes: d5dcce0c414f ("device property: Keep secondary firmware node secondary by type") Signed-off-by: Bard Liao Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Cc: 5.10+ # 5.10+ Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 25e08e5f40bd..51b9545a050b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4433,7 +4433,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; if (!(parent && fn == parent->fwnode)) - fn->secondary = ERR_PTR(-ENODEV); + fn->secondary = NULL; } else { dev->fwnode = NULL; } -- cgit 1.4.1 From 3f7bddaf5d5a83aa2eb1e6d72db221d3ec43c813 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 5 Jan 2021 17:11:46 +0800 Subject: device property: add description of fwnode cases There are only four valid fwnode cases which are - primary --> secondary --> -ENODEV - primary --> NULL - secondary --> -ENODEV - NULL dev->fwnode should be converted between the 4 cases above no matter how/when set_primary_fwnode() and set_secondary_fwnode() are called. Describe it in the code so people will keep it in mind. Signed-off-by: Bard Liao Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus [ rjw: Comment edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 51b9545a050b..14f165816742 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4414,6 +4414,12 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) * * Set the device's firmware node pointer to @fwnode, but if a secondary * firmware node of the device is present, preserve it. + * + * Valid fwnode cases are: + * - primary --> secondary --> -ENODEV + * - primary --> NULL + * - secondary --> -ENODEV + * - NULL */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { @@ -4432,6 +4438,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; + /* Set fn->secondary = NULL, so fn remains the primary fwnode */ if (!(parent && fn == parent->fwnode)) fn->secondary = NULL; } else { -- cgit 1.4.1 From 2b5f09cadfc576817c0450e01d454f750909b103 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 18 Dec 2020 09:53:40 -0800 Subject: drm/msm/dp: postpone irq_hpd event during connection pending state irq_hpd event can only be executed at connected state. Therefore irq_hpd event should be postponed if it happened at connection pending state. This patch also make sure both link rate and lane are valid before start link training. Signed-off-by: Kuogee Hsieh Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 7 +++++++ drivers/gpu/drm/msm/dp/dp_panel.c | 12 +++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 6e971d552911..3bc7ed21de28 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -693,6 +693,13 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } + if (state == ST_CONNECT_PENDING) { + /* wait until ST_CONNECTED */ + dp_add_event(dp, EV_IRQ_HPD_INT, 0, 1); /* delay = 1 */ + mutex_unlock(&dp->event_mutex); + return 0; + } + ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); if (ret == -ECONNRESET) { /* cable unplugged */ dp->core_initialized = false; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 97dca3e378b7..d1780bcac8cc 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -167,12 +167,18 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, panel = container_of(dp_panel, struct dp_panel_private, dp_panel); rc = dp_panel_read_dpcd(dp_panel); + if (rc) { + DRM_ERROR("read dpcd failed %d\n", rc); + return rc; + } + bw_code = drm_dp_link_rate_to_bw_code(dp_panel->link_info.rate); - if (rc || !is_link_rate_valid(bw_code) || + if (!is_link_rate_valid(bw_code) || !is_lane_count_valid(dp_panel->link_info.num_lanes) || (bw_code > dp_panel->max_bw_code)) { - DRM_ERROR("read dpcd failed %d\n", rc); - return rc; + DRM_ERROR("Illegal link rate=%d lane=%d\n", dp_panel->link_info.rate, + dp_panel->link_info.num_lanes); + return -EINVAL; } if (dp_panel->dfp_present) { -- cgit 1.4.1 From d863f0c7b536288e2bd40cbc01c10465dd226b11 Mon Sep 17 00:00:00 2001 From: Craig Tatlor Date: Wed, 30 Dec 2020 17:29:42 +0200 Subject: drm/msm: Call msm_init_vram before binding the gpu vram.size is needed when binding a gpu without an iommu and is defined in msm_init_vram(), so run that before binding it. Signed-off-by: Craig Tatlor Reviewed-by: Brian Masney Tested-by: Alexey Minnekhanov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ce9bb6e929c2..549ffb60e9ca 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -457,14 +457,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) drm_mode_config_init(ddev); - /* Bind all our sub-components: */ - ret = component_bind_all(dev, ddev); + ret = msm_init_vram(ddev); if (ret) goto err_destroy_mdss; - ret = msm_init_vram(ddev); + /* Bind all our sub-components: */ + ret = component_bind_all(dev, ddev); if (ret) - goto err_msm_uninit; + goto err_destroy_mdss; dma_set_max_seg_size(dev, UINT_MAX); -- cgit 1.4.1 From 3f7759e7b7585a0bffda06d4eddc6b0b850ef6c3 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Wed, 30 Dec 2020 17:29:43 +0200 Subject: drm/msm: Add modparam to allow vram carveout Using the GPU with a VRAM Carveout is a security vulnerability. Nevertheless it is sometimes required, especially when no IOMMU implementation is available for a certain platform. Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/adreno_device.c | 4 ++++ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 1 + 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 7e82c41a85f1..bdc989183c64 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -534,8 +534,10 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev) if (!gpu->aspace) { dev_err(dev->dev, "No memory protection without MMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } return gpu; diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 93da6683a866..4534633fe7cd 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -564,8 +564,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) * implement a cmdstream validator. */ DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index c0be3a0f36b2..82bebb40234d 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -692,8 +692,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) * implement a cmdstream validator. */ DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 87c8b033ad1a..12e75ba360f9 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -18,6 +18,10 @@ bool snapshot_debugbus = false; MODULE_PARM_DESC(snapshot_debugbus, "Include debugbus sections in GPU devcoredump (if not fused off)"); module_param_named(snapshot_debugbus, snapshot_debugbus, bool, 0600); +bool allow_vram_carveout = false; +MODULE_PARM_DESC(allow_vram_carveout, "Allow using VRAM Carveout, in place of IOMMU"); +module_param_named(allow_vram_carveout, allow_vram_carveout, bool, 0600); + static const struct adreno_info gpulist[] = { { .rev = ADRENO_REV(2, 0, 0, 0), diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index c3775f79525a..fe5444a1482a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -18,6 +18,7 @@ #include "adreno_pm4.xml.h" extern bool snapshot_debugbus; +extern bool allow_vram_carveout; enum { ADRENO_FW_PM4 = 0, -- cgit 1.4.1 From c4151604f0603d5700072183a05828ff87d764e4 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 21 Dec 2020 06:13:20 +0100 Subject: cpufreq: intel_pstate: remove obsolete functions percent_fp() was used in intel_pstate_pid_reset(), which was removed in commit 9d0ef7af1f2d ("cpufreq: intel_pstate: Do not use PID-based P-state selection") and hence, percent_fp() is unused since then. percent_ext_fp() was last used in intel_pstate_update_perf_limits(), which was refactored in commit 1a4fe38add8b ("cpufreq: intel_pstate: Remove max/min fractions to limit performance"), and hence, percent_ext_fp() is unused since then. make CC=clang W=1 points us those unused functions: drivers/cpufreq/intel_pstate.c:79:23: warning: unused function 'percent_fp' [-Wunused-function] static inline int32_t percent_fp(int percent) ^ drivers/cpufreq/intel_pstate.c:94:23: warning: unused function 'percent_ext_fp' [-Wunused-function] static inline int32_t percent_ext_fp(int percent) ^ Remove those obsolete functions. Signed-off-by: Lukas Bulwahn Reviewed-by: Nathan Chancellor Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 32bc11851b8d..be05e038d956 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -76,11 +76,6 @@ static inline int ceiling_fp(int32_t x) return ret; } -static inline int32_t percent_fp(int percent) -{ - return div_fp(percent, 100); -} - static inline u64 mul_ext_fp(u64 x, u64 y) { return (x * y) >> EXT_FRAC_BITS; @@ -91,11 +86,6 @@ static inline u64 div_ext_fp(u64 x, u64 y) return div64_u64(x << EXT_FRAC_BITS, y); } -static inline int32_t percent_ext_fp(int percent) -{ - return div_ext_fp(percent, 100); -} - /** * struct sample - Store performance sample * @core_avg_perf: Ratio of APERF/MPERF which is the actual average -- cgit 1.4.1 From 00fd44a1a4700718d5d962432b55c09820f7e709 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 4 Jan 2021 20:30:41 +0100 Subject: drm/msm: Only enable A6xx LLCC code on A6xx Using this code on A5xx (and probably older too) causes a smmu bug. Fixes: 474dadb8b0d5 ("drm/msm/a6xx: Add support for using system cache(LLC)") Signed-off-by: Konrad Dybcio Tested-by: AngeloGioacchino Del Regno Reviewed-by: Jordan Crouse Reviewed-by: Sai Prakash Ranjan Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 21 ++++++++++++--------- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 6cf9975e951e..f09175698827 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -191,8 +191,6 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct io_pgtable_domain_attr pgtbl_cfg; struct iommu_domain *iommu; struct msm_mmu *mmu; struct msm_gem_address_space *aspace; @@ -202,13 +200,18 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, if (!iommu) return NULL; - /* - * This allows GPU to set the bus attributes required to use system - * cache on behalf of the iommu page table walker. - */ - if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; - iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); + + if (adreno_is_a6xx(adreno_gpu)) { + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct io_pgtable_domain_attr pgtbl_cfg; + /* + * This allows GPU to set the bus attributes required to use system + * cache on behalf of the iommu page table walker. + */ + if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; + iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); + } } mmu = msm_iommu_new(&pdev->dev, iommu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index fe5444a1482a..b3d9a333591b 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -212,6 +212,11 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu) return gpu->revn == 540; } +static inline bool adreno_is_a6xx(struct adreno_gpu *gpu) +{ + return ((gpu->revn < 700 && gpu->revn > 599)); +} + static inline int adreno_is_a618(struct adreno_gpu *gpu) { return gpu->revn == 618; -- cgit 1.4.1 From 3c638cdb8ecc0442552156e0fed8708dd2c7f35b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 16 Dec 2020 12:07:53 +0200 Subject: RDMA/restrack: Don't treat as an error allocation ID wrapping xa_alloc_cyclic() call returns positive number if ID allocation succeeded but wrapped. It is not an error, so normalize the "ret" variable to zero as marker of not-an-error. drivers/infiniband/core/restrack.c:261 rdma_restrack_add() warn: 'ret' can be either negative or positive Fixes: fd47c2f99f04 ("RDMA/restrack: Convert internal DB from hash to XArray") Link: https://lore.kernel.org/r/20201216100753.1127638-1-leon@kernel.org Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/restrack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index e0a41c867002..ff1551b3cf61 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -254,6 +254,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) } else { ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, &rt->next_id, GFP_KERNEL); + ret = (ret < 0) ? ret : 0; } out: -- cgit 1.4.1 From afded6d83aa7b35dab675c730528109cc58d6847 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Dec 2020 20:43:13 +0200 Subject: misc: pvpanic: Check devm_ioport_map() for NULL Inconveniently devm_ioport_map() and devm_ioremap_resource() return errors differently, i.e. former uses simply NULL pointer, while the latter an error pointer. Due to this, we have to check each of them separately. Fixes: f104060813fe ("misc: pvpanic: Combine ACPI and platform drivers") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201228184313.57610-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pvpanic.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 951b37da5e3c..41cab297d66e 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -55,12 +55,23 @@ static int pvpanic_mmio_probe(struct platform_device *pdev) struct resource *res; res = platform_get_mem_or_io(pdev, 0); - if (res && resource_type(res) == IORESOURCE_IO) + if (!res) + return -EINVAL; + + switch (resource_type(res)) { + case IORESOURCE_IO: base = devm_ioport_map(dev, res->start, resource_size(res)); - else + if (!base) + return -ENOMEM; + break; + case IORESOURCE_MEM: base = devm_ioremap_resource(dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); + if (IS_ERR(base)) + return PTR_ERR(base); + break; + default: + return -EINVAL; + } atomic_notifier_chain_register(&panic_notifier_list, &pvpanic_panic_nb); -- cgit 1.4.1 From d8f5c29653c3f6995e8979be5623d263e92f6b86 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 5 Jan 2021 16:22:25 -0800 Subject: net: ipv6: fib: flush exceptions when purging route Route removal is handled by two code paths. The main removal path is via fib6_del_route() which will handle purging any PMTU exceptions from the cache, removing all per-cpu copies of the DST entry used by the route, and releasing the fib6_info struct. The second removal location is during fib6_add_rt2node() during a route replacement operation. This path also calls fib6_purge_rt() to handle cleaning up the per-cpu copies of the DST entries and releasing the fib6_info associated with the older route, but it does not flush any PMTU exceptions that the older route had. Since the older route is removed from the tree during the replacement, we lose any way of accessing it again. As these lingering DSTs and the fib6_info struct are holding references to the underlying netdevice struct as well, unregistering that device from the kernel can never complete. Fixes: 2b760fcf5cfb3 ("ipv6: hook up exception table to store dst cache") Signed-off-by: Sean Tranchetti Reviewed-by: David Ahern Link: https://lore.kernel.org/r/1609892546-11389-1-git-send-email-stranche@quicinc.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_fib.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 605cdd38a919..f43e27555725 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1025,6 +1025,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, { struct fib6_table *table = rt->fib6_table; + /* Flush all cached dst in exception table */ + rt6_flush_exceptions(rt); fib6_drop_pcpu_from(rt, table); if (rt->nh && !list_empty(&rt->nh_list)) @@ -1927,9 +1929,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; - /* Flush all cached dst in exception table */ - rt6_flush_exceptions(rt); - /* Reset round-robin state, if necessary */ if (rcu_access_pointer(fn->rr_ptr) == rt) fn->rr_ptr = NULL; -- cgit 1.4.1 From 5316a7c0130acf09bfc8bb0092407006010fcccc Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Tue, 5 Jan 2021 16:22:26 -0800 Subject: tools: selftests: add test for changing routes with PTMU exceptions Adds new 2 new tests to the PTMU script: pmtu_ipv4/6_route_change. These tests explicitly test for a recently discovered problem in the IPv6 routing framework where PMTU exceptions were not properly released when replacing a route via "ip route change ...". After creating PMTU exceptions, the route from the device A to R1 will be replaced with a new route, then device A will be deleted. If the PMTU exceptions were properly cleaned up by the kernel, this device deletion will succeed. Otherwise, the unregistration of the device will stall, and messages such as the following will be logged in dmesg: unregister_netdevice: waiting for veth_A-R1 to become free. Usage count = 4 Signed-off-by: Sean Tranchetti Reviewed-by: David Ahern Link: https://lore.kernel.org/r/1609892546-11389-2-git-send-email-stranche@quicinc.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/pmtu.sh | 71 +++++++++++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 464e31eabc73..64cd2e23c568 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -162,7 +162,15 @@ # - list_flush_ipv6_exception # Using the same topology as in pmtu_ipv6, create exceptions, and check # they are shown when listing exception caches, gone after flushing them - +# +# - pmtu_ipv4_route_change +# Use the same topology as in pmtu_ipv4, but issue a route replacement +# command and delete the corresponding device afterward. This tests for +# proper cleanup of the PMTU exceptions by the route replacement path. +# Device unregistration should complete successfully +# +# - pmtu_ipv6_route_change +# Same as above but with IPv6 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -224,7 +232,9 @@ tests=" cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1 cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1 list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 - list_flush_ipv6_exception ipv6: list and flush cached exceptions 1" + list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" NS_A="ns-A" NS_B="ns-B" @@ -1782,6 +1792,63 @@ test_list_flush_ipv6_exception() { return ${fail} } +test_pmtu_ipvX_route_change() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + gw="${prefix4}.${a_r1}.2" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + gw="${prefix6}:${a_r1}::2" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} + run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Replace the route from A to R1 + run_cmd ${ns_a} ip route change default via ${gw} + + # Delete the device in A + run_cmd ${ns_a} ip link del "veth_A-R1" +} + +test_pmtu_ipv4_route_change() { + test_pmtu_ipvX_route_change 4 +} + +test_pmtu_ipv6_route_change() { + test_pmtu_ipvX_route_change 6 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." -- cgit 1.4.1 From 384b77fd48fd683a82760bc88bef8611cba997fc Mon Sep 17 00:00:00 2001 From: Amanoel Dawod Date: Sat, 26 Dec 2020 18:58:40 -0500 Subject: Fonts: font_ter16x32: Update font with new upstream Terminus release This is just a maintenance patch to update font_ter16x32.c with changes and minor fixes added in new upstream Terminus v4.49. >From release notes of new version 4.49, this brings: - Altered ascii grave in some sizes to be more useful as a back quote. - Fixed 21B5, added 21B2 and 21B3. Just as my initial submission of the font, above changes were obtained from new ter-i32b.psf font source. Terminus font sources are available for download at SourceForge: https://sourceforge.net/projects/terminus-font/files/terminus-font-4.49/ Simply running `make` in source directory will build the .psf font files. Signed-off-by: Amanoel Dawod Link: https://lore.kernel.org/r/20201226235840.26290-1-kernel@amanoeldawod.com Signed-off-by: Greg Kroah-Hartman --- lib/fonts/font_ter16x32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c index 1955d624177c..5baedc573dd6 100644 --- a/lib/fonts/font_ter16x32.c +++ b/lib/fonts/font_ter16x32.c @@ -774,8 +774,8 @@ static const struct font_data fontdata_ter16x32 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xfc, 0x7f, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 95 */ - 0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00, 0x07, 0x00, - 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00, + 0x07, 0x00, 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1169,7 +1169,7 @@ static const struct font_data fontdata_ter16x32 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7f, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e, + 0x7e, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e, 0x03, 0x8e, 0x3f, 0x8e, 0x7f, 0xfe, 0xf3, 0xfe, 0xe3, 0x80, 0xe3, 0x80, 0xe3, 0x80, 0xf3, 0xce, 0x7f, 0xfe, 0x3e, 0xfc, 0x00, 0x00, 0x00, 0x00, -- cgit 1.4.1 From a306aba9c8d869b1fdfc8ad9237f1ed718ea55e6 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 15:42:48 +0800 Subject: RDMA/usnic: Fix memleak in find_free_vf_and_create_qp_grp If usnic_ib_qp_grp_create() fails at the first call, dev_list will not be freed on error, which leads to memleak. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Link: https://lore.kernel.org/r/20201226074248.2893-1-dinghao.liu@zju.edu.cn Signed-off-by: Dinghao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 38a37770c016..3705c6b8b223 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, } usnic_uiom_free_dev_list(dev_list); + dev_list = NULL; } /* Try to find resources on an unused vf */ @@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, qp_grp_check: if (IS_ERR_OR_NULL(qp_grp)) { usnic_err("Failed to allocate qp_grp\n"); + if (usnic_ib_share_vf) + usnic_uiom_free_dev_list(dev_list); return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); } return qp_grp; -- cgit 1.4.1 From abf8ef953a43e74aac3c54a94975f21bd483199b Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 30 Nov 2020 04:38:11 +0200 Subject: net/mlx5: Check if lag is supported before creating one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a memleak issue by preventing to create a lag and add PFs if lag is not supported. comm “python3”, pid 349349, jiffies 4296985507 (age 1446.976s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ……………. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ……………. backtrace: [<000000005b216ae7>] mlx5_lag_add+0x1d5/0×3f0 [mlx5_core] [<000000000445aa55>] mlx5e_nic_enable+0x66/0×1b0 [mlx5_core] [<00000000c56734c3>] mlx5e_attach_netdev+0x16e/0×200 [mlx5_core] [<0000000030439d1f>] mlx5e_attach+0x5c/0×90 [mlx5_core] [<0000000018fd8615>] mlx5e_add+0x1a4/0×410 [mlx5_core] [<0000000068bc504b>] mlx5_add_device+0x72/0×120 [mlx5_core] [<000000009fce51f9>] mlx5_register_device+0x77/0xb0 [mlx5_core] [<00000000d0d81ff3>] mlx5_load_one+0xc58/0×1eb0 [mlx5_core] [<0000000045077adc>] init_one+0x3ea/0×920 [mlx5_core] [<0000000043287674>] pci_device_probe+0xcd/0×150 [<00000000dafd3279>] really_probe+0x1c9/0×4b0 [<00000000f06bdd84>] driver_probe_device+0x5d/0×140 [<00000000e3d508b6>] device_driver_attach+0x4f/0×60 [<0000000084fba0f0>] bind_store+0xbf/0×120 [<00000000bf6622b3>] kernfs_fop_write+0x114/0×1b0 Fixes: 9b412cc35f00 ("net/mlx5e: Add LAG warning if bond slave is not lag master") Signed-off-by: Mark Zhang Reviewed-by: Leon Romanovsky Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index f3d45ef082cd..83a05371e2aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -564,7 +564,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) struct mlx5_core_dev *tmp_dev; int i, err; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) return; tmp_dev = mlx5_get_next_phys_dev(dev); @@ -582,12 +584,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0) return; - for (i = 0; i < MLX5_MAX_PORTS; i++) { - tmp_dev = ldev->pf[i].dev; - if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) || - MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS) + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (!ldev->pf[i].dev) break; - } if (i >= MLX5_MAX_PORTS) ldev->flags |= MLX5_LAG_FLAG_READY; -- cgit 1.4.1 From 9c9be85f6b59d80efe4705109c0396df18d4e11d Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 24 Nov 2020 22:16:23 +0200 Subject: net/mlx5e: Add missing capability check for uplink follow Expose firmware indication that it supports setting eswitch uplink state to follow (follow the physical link). Condition setting the eswitch uplink admin-state with this capability bit. Older FW may not support the uplink state setting. Fixes: 7d0314b11cdd ("net/mlx5e: Modify uplink state on interface up/down") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7a79d330c075..6a852b4901aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3161,7 +3161,8 @@ static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev, mlx5_set_port_admin_status(mdev, state); - if (mlx5_eswitch_mode(mdev) != MLX5_ESWITCH_LEGACY) + if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_GEN(mdev, uplink_follow)) return; if (state == MLX5_PORT_UP) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8fbddec26eb8..442c0160caab 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1280,7 +1280,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ece_support[0x1]; u8 reserved_at_a4[0x7]; u8 log_max_srq[0x5]; - u8 reserved_at_b0[0x2]; + u8 reserved_at_b0[0x1]; + u8 uplink_follow[0x1]; u8 ts_cqe_to_dest_cqn[0x1]; u8 reserved_at_b3[0xd]; -- cgit 1.4.1 From 0f2dcade69f2af56b74bce432e48ff3957830ce2 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 14 Dec 2020 03:38:40 +0200 Subject: net/mlx5: Use port_num 1 instead of 0 when delete a RoCE address In multi-port mode, FW reports syndrome 0x2ea48 (invalid vhca_port_number) if the port_num is not 1 or 2. Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic") Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 0fc7de4aa572..8e0dddc6383f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -116,7 +116,7 @@ free: static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) { mlx5_core_roce_gid_set(dev, 0, 0, 0, - NULL, NULL, false, 0, 0); + NULL, NULL, false, 0, 1); } static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid) -- cgit 1.4.1 From eed38eeee734756596e2cc163bdc7dac3be501b1 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Mon, 7 Dec 2020 08:15:18 +0000 Subject: net/mlx5e: CT: Use per flow counter when CT flow accounting is enabled Connection counters may be shared for both directions when the counter is used for connection aging purposes. However, if TC flow accounting is enabled then a unique counter is required per direction. Instantiate a unique counter per direction if the conntrack accounting extension is enabled. Use a shared counter when the connection accounting extension is disabled. Fixes: 1edae2335adf ("net/mlx5e: CT: Use the same counter for both directions") Signed-off-by: Oz Shlomo Reported-by: Marcelo Ricardo Leitner Reviewed-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 77 ++++++++++++++-------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index e521254d886e..072363e73f1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -118,16 +118,17 @@ struct mlx5_ct_tuple { u16 zone; }; -struct mlx5_ct_shared_counter { +struct mlx5_ct_counter { struct mlx5_fc *counter; refcount_t refcount; + bool is_shared; }; struct mlx5_ct_entry { struct rhash_head node; struct rhash_head tuple_node; struct rhash_head tuple_nat_node; - struct mlx5_ct_shared_counter *shared_counter; + struct mlx5_ct_counter *counter; unsigned long cookie; unsigned long restore_cookie; struct mlx5_ct_tuple tuple; @@ -394,13 +395,14 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, } static void -mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) +mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) { - if (!refcount_dec_and_test(&entry->shared_counter->refcount)) + if (entry->counter->is_shared && + !refcount_dec_and_test(&entry->counter->refcount)) return; - mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter); - kfree(entry->shared_counter); + mlx5_fc_destroy(ct_priv->dev, entry->counter->counter); + kfree(entry->counter); } static void @@ -699,7 +701,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->dest_ft = ct_priv->post_ct; attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct; attr->outer_match_level = MLX5_MATCH_L4; - attr->counter = entry->shared_counter->counter; + attr->counter = entry->counter->counter; attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); @@ -732,13 +734,34 @@ err_attr: return err; } -static struct mlx5_ct_shared_counter * +static struct mlx5_ct_counter * +mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) +{ + struct mlx5_ct_counter *counter; + int ret; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + counter->is_shared = false; + counter->counter = mlx5_fc_create(ct_priv->dev, true); + if (IS_ERR(counter->counter)) { + ct_dbg("Failed to create counter for ct entry"); + ret = PTR_ERR(counter->counter); + kfree(counter); + return ERR_PTR(ret); + } + + return counter; +} + +static struct mlx5_ct_counter * mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry) { struct mlx5_ct_tuple rev_tuple = entry->tuple; - struct mlx5_ct_shared_counter *shared_counter; - struct mlx5_core_dev *dev = ct_priv->dev; + struct mlx5_ct_counter *shared_counter; struct mlx5_ct_entry *rev_entry; __be16 tmp_port; int ret; @@ -767,25 +790,20 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, tuples_ht_params); if (rev_entry) { - if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) { + if (refcount_inc_not_zero(&rev_entry->counter->refcount)) { mutex_unlock(&ct_priv->shared_counter_lock); - return rev_entry->shared_counter; + return rev_entry->counter; } } mutex_unlock(&ct_priv->shared_counter_lock); - shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL); - if (!shared_counter) - return ERR_PTR(-ENOMEM); - - shared_counter->counter = mlx5_fc_create(dev, true); - if (IS_ERR(shared_counter->counter)) { - ct_dbg("Failed to create counter for ct entry"); - ret = PTR_ERR(shared_counter->counter); - kfree(shared_counter); + shared_counter = mlx5_tc_ct_counter_create(ct_priv); + if (IS_ERR(shared_counter)) { + ret = PTR_ERR(shared_counter); return ERR_PTR(ret); } + shared_counter->is_shared = true; refcount_set(&shared_counter->refcount, 1); return shared_counter; } @@ -798,10 +816,13 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, { int err; - entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); - if (IS_ERR(entry->shared_counter)) { - err = PTR_ERR(entry->shared_counter); - ct_dbg("Failed to create counter for ct entry"); + if (nf_ct_acct_enabled(dev_net(ct_priv->netdev))) + entry->counter = mlx5_tc_ct_counter_create(ct_priv); + else + entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry); + + if (IS_ERR(entry->counter)) { + err = PTR_ERR(entry->counter); return err; } @@ -820,7 +841,7 @@ mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv, err_nat: mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); err_orig: - mlx5_tc_ct_shared_counter_put(ct_priv, entry); + mlx5_tc_ct_counter_put(ct_priv, entry); return err; } @@ -918,7 +939,7 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, tuples_ht_params); mutex_unlock(&ct_priv->shared_counter_lock); - mlx5_tc_ct_shared_counter_put(ct_priv, entry); + mlx5_tc_ct_counter_put(ct_priv, entry); } @@ -956,7 +977,7 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, if (!entry) return -ENOENT; - mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse); + mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); -- cgit 1.4.1 From b544011f0e58ce43c40105468d6dc67f980a0c7a Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 13 Nov 2020 06:06:28 +0200 Subject: net/mlx5e: Fix SWP offsets when vlan inserted by driver In case WQE includes inline header the vlan is inserted by driver even if vlan offload is set. On geneve over vlan interface where software parser is used the SWP offsets should be updated according to the added vlan. Fixes: e3cfc7e6b7bd ("net/mlx5e: TX, Add geneve tunnel stateless offload support") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 9 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 8 +++++--- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 9 +++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7943eb30b837..4880f2179273 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -371,6 +371,15 @@ struct mlx5e_swp_spec { u8 tun_l4_proto; }; +static inline void mlx5e_eseg_swp_offsets_add_vlan(struct mlx5_wqe_eth_seg *eseg) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset += VLAN_HLEN / 2; + eseg->swp_outer_l4_offset += VLAN_HLEN / 2; + eseg->swp_inner_l3_offset += VLAN_HLEN / 2; + eseg->swp_inner_l4_offset += VLAN_HLEN / 2; +} + static inline void mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, struct mlx5e_swp_spec *swp_spec) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 899b98aca0d3..1fae7fab8297 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -51,7 +51,7 @@ static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) } static inline void -mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) +mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ihs) { struct mlx5e_swp_spec swp_spec = {}; unsigned int offset = 0; @@ -85,6 +85,8 @@ mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) } mlx5e_set_eseg_swp(skb, eseg, &swp_spec); + if (skb_vlan_tag_present(skb) && ihs) + mlx5e_eseg_swp_offsets_add_vlan(eseg); } #else @@ -163,7 +165,7 @@ static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5_wqe_eth_seg *eseg) + struct mlx5_wqe_eth_seg *eseg, u16 ihs) { #ifdef CONFIG_MLX5_EN_IPSEC if (xfrm_offload(skb)) @@ -172,7 +174,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, #if IS_ENABLED(CONFIG_GENEVE) if (skb->encapsulation) - mlx5e_tx_tunnel_accel(skb, eseg); + mlx5e_tx_tunnel_accel(skb, eseg, ihs); #endif return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index e47e2a0059d0..61ed671fe741 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -682,9 +682,9 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, - struct mlx5_wqe_eth_seg *eseg) + struct mlx5_wqe_eth_seg *eseg, u16 ihs) { - if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg))) + if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg, ihs))) return false; mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); @@ -714,7 +714,8 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { struct mlx5_wqe_eth_seg eseg = {}; - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, + attr.ihs))) return NETDEV_TX_OK; mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); @@ -731,7 +732,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) /* May update the WQE, but may not post other WQEs. */ mlx5e_accel_tx_finish(sq, wqe, &accel, (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth))) + if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs))) return NETDEV_TX_OK; mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); -- cgit 1.4.1 From 25c904b59aaf4816337acd415514b0c47715f604 Mon Sep 17 00:00:00 2001 From: Alaa Hleihel Date: Mon, 4 Jan 2021 12:54:40 +0200 Subject: net/mlx5: E-Switch, fix changing vf VLANID Adding vf VLANID for the first time, or after having cleared previously defined VLANID works fine, however, attempting to change an existing vf VLANID clears the rules on the firmware, but does not add new rules for the new vf VLANID. Fix this by changing the logic in function esw_acl_egress_lgcy_setup() so that it will always configure egress rules. Fixes: ea651a86d468 ("net/mlx5: E-Switch, Refactor eswitch egress acl codes") Signed-off-by: Alaa Hleihel Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 27 +++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 2b85d4777303..3e19b1721303 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -95,22 +95,21 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, return 0; } - if (!IS_ERR_OR_NULL(vport->egress.acl)) - return 0; - - vport->egress.acl = esw_acl_table_create(esw, vport->vport, - MLX5_FLOW_NAMESPACE_ESW_EGRESS, - table_size); - if (IS_ERR(vport->egress.acl)) { - err = PTR_ERR(vport->egress.acl); - vport->egress.acl = NULL; - goto out; + if (!vport->egress.acl) { + vport->egress.acl = esw_acl_table_create(esw, vport->vport, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + table_size); + if (IS_ERR(vport->egress.acl)) { + err = PTR_ERR(vport->egress.acl); + vport->egress.acl = NULL; + goto out; + } + + err = esw_acl_egress_lgcy_groups_create(esw, vport); + if (err) + goto out; } - err = esw_acl_egress_lgcy_groups_create(esw, vport); - if (err) - goto out; - esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->info.vlan, vport->info.qos); -- cgit 1.4.1 From e13ed0ac064dd6ee964155ba9fdc2f3c3785934c Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Mon, 14 Dec 2020 13:53:03 +0200 Subject: net/mlx5e: In skb build skip setting mark in switchdev mode sop_drop_qpn field in the cqe is used by two features, in SWITCHDEV mode to restore the chain id in case of a miss and in LEGACY mode to support skbedit mark action. In build RX skb, the skb mark field is set regardless of the configured mode which cause a corruption of the mark field in case of switchdev mode. Fix by overriding the mark value back to 0 in the representor tc update skb flow. Fixes: 8f1e0b97cc70 ("net/mlx5: E-Switch, Mark miss packets with new chain id mapping") Signed-off-by: Maor Dickman Reviewed-by: Raed Salem Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index d29af7b9c695..76177f7c5ec2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -626,6 +626,11 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe, if (!reg_c0) return true; + /* If reg_c0 is not equal to the default flow tag then skb->mark + * is not supported and must be reset back to 0. + */ + skb->mark = 0; + priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; -- cgit 1.4.1 From b1c0aca3d3ddeebeec57ada9c2df9ed647939249 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 27 Dec 2020 16:33:19 +0200 Subject: net/mlx5e: ethtool, Fix restriction of autoneg with 56G Prior to this patch, configuring speed to 50G with autoneg off over devices supporting 50G per lane failed. Support for 50G per lane introduced a new set of link-modes, on which driver always performed a speed validation as if only legacy link-modes were configured. Fix driver speed validation to force setting autoneg over 56G only if in legacy link-mode. Fixes: 3d7cadae51f1 ("net/mlx5e: ethtool, Fix analysis of speed setting") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 24 ++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d9076d543104..2d37742a888c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1010,6 +1010,22 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings); } +static int mlx5e_speed_validate(struct net_device *netdev, bool ext, + const unsigned long link_modes, u8 autoneg) +{ + /* Extended link-mode has no speed limitations. */ + if (ext) + return 0; + + if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + return -EINVAL; + } + return 0; +} + static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; @@ -1103,13 +1119,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext); - if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && - autoneg != AUTONEG_ENABLE) { - netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", - __func__); - err = -EINVAL; + err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); + if (err) goto out; - } link_modes = link_modes & eproto.cap; if (!link_modes) { -- cgit 1.4.1 From 4d8be21112f6fa2ac4b8a13f35866ad65b11d48c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 4 Jan 2021 10:08:36 +0200 Subject: net/mlx5: Release devlink object if adev fails Add missed freeing previously allocated devlink object. Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c08315b51fd3..ca6f2fc39ea0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1368,8 +1368,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) MLX5_COREDEV_VF : MLX5_COREDEV_PF; dev->priv.adev_idx = mlx5_adev_idx_alloc(); - if (dev->priv.adev_idx < 0) - return dev->priv.adev_idx; + if (dev->priv.adev_idx < 0) { + err = dev->priv.adev_idx; + goto adev_init_err; + } err = mlx5_mdev_init(dev, prof_sel); if (err) @@ -1403,6 +1405,7 @@ pci_init_err: mlx5_mdev_uninit(dev); mdev_init_err: mlx5_adev_idx_free(dev->priv.adev_idx); +adev_init_err: mlx5_devlink_free(devlink); return err; -- cgit 1.4.1 From 7a6eb072a9548492ead086f3e820e9aac71c7138 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 28 Dec 2020 16:48:40 +0800 Subject: net/mlx5e: Fix two double free cases mlx5e_create_ttc_table_groups() frees ft->g on failure of kvzalloc(), but such failure will be caught by its caller in mlx5e_create_ttc_table() and ft->g will be freed again in mlx5e_destroy_flow_table(). The same issue also occurs in mlx5e_create_ttc_table_groups(). Set ft->g to NULL after kfree() to avoid double free. Fixes: 7b3722fa9ef6 ("net/mlx5e: Support RSS for GRE tunneled packets") Fixes: 33cfaaa8f36f ("net/mlx5e: Split the main flow steering table") Signed-off-by: Dinghao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index fa8149f6eb08..44a2dfbc3853 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -942,6 +942,7 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } @@ -1087,6 +1088,7 @@ static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) in = kvzalloc(inlen, GFP_KERNEL); if (!in) { kfree(ft->g); + ft->g = NULL; return -ENOMEM; } -- cgit 1.4.1 From 5b0bb12c58ac7d22e05b5bfdaa30a116c8c32e32 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 21 Dec 2020 19:27:31 +0800 Subject: net/mlx5e: Fix memleak in mlx5e_create_l2_table_groups When mlx5_create_flow_group() fails, ft->g should be freed just like when kvzalloc() fails. The caller of mlx5e_create_l2_table_groups() does not catch this issue on failure, which leads to memleak. Fixes: 33cfaaa8f36f ("net/mlx5e: Split the main flow steering table") Signed-off-by: Dinghao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 44a2dfbc3853..e02e5895703d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -1392,6 +1392,7 @@ err_destroy_groups: ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); kvfree(in); + kfree(ft->g); return err; } -- cgit 1.4.1 From f2bc3af6353cb2a33dfa9d270d999d839eef54cb Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 29 Dec 2020 18:46:53 -0800 Subject: RDMA/ocrdma: Fix use after free in ocrdma_dealloc_ucontext_pd() In ocrdma_dealloc_ucontext_pd() uctx->cntxt_pd is assigned to the variable pd and then after uctx->cntxt_pd is freed, the variable pd is passed to function _ocrdma_dealloc_pd() which dereferences pd directly or through its call to ocrdma_mbx_dealloc_pd(). Reorder the free using the variable pd. Cc: stable@vger.kernel.org Fixes: 21a428a019c9 ("RDMA: Handle PD allocations by IB/core") Link: https://lore.kernel.org/r/20201230024653.1516495-1-trix@redhat.com Signed-off-by: Tom Rix Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bc98bd950d99..3acb5c10b155 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } - kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; _ocrdma_dealloc_pd(dev, pd); + kfree(pd); } static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) -- cgit 1.4.1 From f3562f5e00bbae2a6b292941ec76a9140aa3b7dd Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 6 Jan 2021 17:17:35 +0100 Subject: docs: octeontx2: tune rst markup Commit 80b9414832a1 ("docs: octeontx2: Add Documentation for NPA health reporters") added new documentation with improper formatting for rst, and caused a few new warnings for make htmldocs in octeontx2.rst:169--202. Tune markup and formatting for better presentation in the HTML view. Signed-off-by: Lukas Bulwahn Acked-by: Randy Dunlap Acked-by: George Cherian Link: https://lore.kernel.org/r/20210106161735.21751-1-lukas.bulwahn@gmail.com Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/marvell/octeontx2.rst | 62 +++++++++++++--------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index d3fcf536d14e..61e850460e18 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -164,46 +164,56 @@ Devlink health reporters NPA Reporters ------------- -The NPA reporters are responsible for reporting and recovering the following group of errors +The NPA reporters are responsible for reporting and recovering the following group of errors: + 1. GENERAL events + - Error due to operation of unmapped PF. - Error due to disabled alloc/free for other HW blocks (NIX, SSO, TIM, DPI and AURA). + 2. ERROR events + - Fault due to NPA_AQ_INST_S read or NPA_AQ_RES_S write. - AQ Doorbell Error. + 3. RAS events + - RAS Error Reporting for NPA_AQ_INST_S/NPA_AQ_RES_S. + 4. RVU events + - Error due to unmapped slot. -Sample Output -------------- -~# devlink health -pci/0002:01:00.0: - reporter hw_npa_intr - state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_gen - state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_err - state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true - reporter hw_npa_ras - state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true +Sample Output:: + + ~# devlink health + pci/0002:01:00.0: + reporter hw_npa_intr + state healthy error 2872 recover 2872 last_dump_date 2020-12-10 last_dump_time 09:39:09 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_gen + state healthy error 2872 recover 2872 last_dump_date 2020-12-11 last_dump_time 04:43:04 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_err + state healthy error 2871 recover 2871 last_dump_date 2020-12-10 last_dump_time 09:39:17 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_ras + state healthy error 0 recover 0 last_dump_date 2020-12-10 last_dump_time 09:32:40 grace_period 0 auto_recover true auto_dump true Each reporter dumps the + - Error Type - Error Register value - Reason in words -For eg: -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen - NPA_AF_GENERAL: - NPA General Interrupt Reg : 1 - NIX0: free disabled RX -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr - NPA_AF_RVU: - NPA RVU Interrupt Reg : 1 - Unmap Slot Error -~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err - NPA_AF_ERR: - NPA Error Interrupt Reg : 4096 - AQ Doorbell Error +For example:: + + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_gen + NPA_AF_GENERAL: + NPA General Interrupt Reg : 1 + NIX0: free disabled RX + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_intr + NPA_AF_RVU: + NPA RVU Interrupt Reg : 1 + Unmap Slot Error + ~# devlink health dump show pci/0002:01:00.0 reporter hw_npa_err + NPA_AF_ERR: + NPA Error Interrupt Reg : 4096 + AQ Doorbell Error -- cgit 1.4.1 From 0ef597c3ac49a62e1a2c1c10f88dd76fde1e1636 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 5 Jan 2021 06:58:15 +0100 Subject: docs: remove mention of ENABLE_MUST_CHECK We removed ENABLE_MUST_CHECK in 196793946264 ("Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK"), so let's remove docs' mentions. At the same time, fix the outdated text related to ENABLE_WARN_DEPRECATED that wasn't removed in 3337d5cfe5e08 ("configs: get rid of obsolete CONFIG_ENABLE_WARN_DEPRECATED"). Finally, reflow the paragraph. Signed-off-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20210105055815.GA5173@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/process/4.Coding.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst index c27e59d2f702..0825dc496f22 100644 --- a/Documentation/process/4.Coding.rst +++ b/Documentation/process/4.Coding.rst @@ -249,10 +249,8 @@ features; most of these are found in the "kernel hacking" submenu. Several of these options should be turned on for any kernel used for development or testing purposes. In particular, you should turn on: - - ENABLE_MUST_CHECK and FRAME_WARN to get an - extra set of warnings for problems like the use of deprecated interfaces - or ignoring an important return value from a function. The output - generated by these warnings can be verbose, but one need not worry about + - FRAME_WARN to get warnings for stack frames larger than a given amount. + The output generated can be verbose, but one need not worry about warnings from other parts of the kernel. - DEBUG_OBJECTS will add code to track the lifetime of various objects -- cgit 1.4.1 From a734a7235ef3768dd3c9b7034f663ae6b260375f Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Fri, 1 Jan 2021 22:14:47 +0100 Subject: docs: binfmt-misc: Fix .rst formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "name below" is not part of the /proc path and should not be formatted in monospace. "doesn``t" is rendered in HTML with a double backtick. Revert it back to "doesn't". Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20210101211447.1021412-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/binfmt-misc.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst index 7a864131e5ea..59cd902e3549 100644 --- a/Documentation/admin-guide/binfmt-misc.rst +++ b/Documentation/admin-guide/binfmt-misc.rst @@ -23,7 +23,7 @@ Here is what the fields mean: - ``name`` is an identifier string. A new /proc file will be created with this - ``name below /proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for + name below ``/proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for obvious reasons. - ``type`` is the type of recognition. Give ``M`` for magic and ``E`` for extension. @@ -83,7 +83,7 @@ Here is what the fields mean: ``F`` - fix binary The usual behaviour of binfmt_misc is to spawn the binary lazily when the misc format file is invoked. However, - this doesn``t work very well in the face of mount namespaces and + this doesn't work very well in the face of mount namespaces and changeroots, so the ``F`` mode opens the binary as soon as the emulation is installed and uses the opened image to spawn the emulator, meaning it is always available once installed, -- cgit 1.4.1 From 25942e5ecbac33918ec2f0869ca9a374dbb023f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 31 Dec 2020 20:08:31 -0800 Subject: Documentation/admin-guide: kernel-parameters: hyphenate comma-separated Hyphenate "comma separated" when it is used as a compound adjective. hyphenated. Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20210101040831.4148-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c722ec19cd00..9e3cdb271d06 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1385,7 +1385,7 @@ ftrace_filter=[function-list] [FTRACE] Limit the functions traced by the function - tracer at boot up. function-list is a comma separated + tracer at boot up. function-list is a comma-separated list of functions. This list can be changed at run time by the set_ftrace_filter file in the debugfs tracing directory. @@ -1399,13 +1399,13 @@ ftrace_graph_filter=[function-list] [FTRACE] Limit the top level callers functions traced by the function graph tracer at boot up. - function-list is a comma separated list of functions + function-list is a comma-separated list of functions that can be changed at run time by the set_graph_function file in the debugfs tracing directory. ftrace_graph_notrace=[function-list] [FTRACE] Do not trace from the functions specified in - function-list. This list is a comma separated list of + function-list. This list is a comma-separated list of functions that can be changed at run time by the set_graph_notrace file in the debugfs tracing directory. @@ -2421,7 +2421,7 @@ when set. Format: - libata.force= [LIBATA] Force configurations. The format is comma + libata.force= [LIBATA] Force configurations. The format is comma- separated list of "[ID:]VAL" where ID is PORT[.DEVICE]. PORT and DEVICE are decimal numbers matching port, link or device. Basically, it matches @@ -5145,7 +5145,7 @@ stacktrace_filter=[function-list] [FTRACE] Limit the functions that the stack tracer - will trace at boot up. function-list is a comma separated + will trace at boot up. function-list is a comma-separated list of functions. This list can be changed at run time by the stack_trace_filter file in the debugfs tracing directory. Note, this enables stack tracing @@ -5348,7 +5348,7 @@ trace_event=[event-list] [FTRACE] Set and start specified trace events in order to facilitate early boot debugging. The event-list is a - comma separated list of trace events to enable. See + comma-separated list of trace events to enable. See also Documentation/trace/events.rst trace_options=[option-list] -- cgit 1.4.1 From 9d54ee78aef62c29b15ae2f58a70b1d1cd63a8f0 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Thu, 7 Jan 2021 18:26:10 +0530 Subject: docs: admin-guide: bootconfig: Fix feils to fails s/feils/fails/p Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210107125610.1576368-1-unixbhaskar@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/bootconfig.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index 9b90efcc3a35..452b7dcd7f6b 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -154,7 +154,7 @@ get the boot configuration data. Because of this "piggyback" method, there is no need to change or update the boot loader and the kernel image itself as long as the boot loader passes the correct initrd file size. If by any chance, the boot -loader passes a longer size, the kernel feils to find the bootconfig data. +loader passes a longer size, the kernel fails to find the bootconfig data. To do this operation, Linux kernel provides "bootconfig" command under tools/bootconfig, which allows admin to apply or delete the config file -- cgit 1.4.1 From bb12433bf56e76789c6b08b36c546f745a6aa6e1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 Jan 2021 12:34:36 -0800 Subject: ARC: unbork 5.11 bootup: fix snafu in _TIF_NOTIFY_SIGNAL handling Linux 5.11.rcX was failing to boot on ARC HSDK board. Turns out we have a couple of issues, this being the first one, and I'm to blame as I didn't pay attention during review. TIF_NOTIFY_SIGNAL support requires checking multiple TIF_* bits in kernel return code path. Old code only needed to check a single bit so BBIT0 worked. New code needs to check multiple bits so AND instruction. So needs to use bit mask variant _TIF_SIGPENDING Cc: Jens Axboe Fixes: 53855e12588743ea128 ("arc: add support for TIF_NOTIFY_SIGNAL") Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/34 Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1f5308abf36d..1743506081da 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -307,7 +307,7 @@ resume_user_mode_begin: mov r0, sp ; pt_regs for arg to do_signal()/do_notify_resume() GET_CURR_THR_INFO_FLAGS r9 - and.f 0, r9, TIF_SIGPENDING|TIF_NOTIFY_SIGNAL + and.f 0, r9, _TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL bz .Lchk_notify_resume ; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs -- cgit 1.4.1 From 9e7a67dee27902fedab880b9af909bd4acd0fba9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jan 2021 00:15:21 +0100 Subject: selftests: netfilter: add selftest for ipip pmtu discovery with enabled connection tracking Convert Christians bug description into a reproducer. Cc: Shuah Khan Reported-by: Christian Perle Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- tools/testing/selftests/netfilter/Makefile | 3 +- .../selftests/netfilter/ipip-conntrack-mtu.sh | 206 +++++++++++++++++++++ 2 files changed, 208 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a374e10ef506..3006a8e5b41a 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,8 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh + nft_queue.sh nft_meta.sh \ + ipip-conntrack-mtu.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh new file mode 100755 index 000000000000..4a6f5c3b3215 --- /dev/null +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rnd=$(mktemp -u XXXXXXXX) +rx=$(mktemp) + +r_a="ns-ra-$rnd" +r_b="ns-rb-$rnd" +r_w="ns-rw-$rnd" +c_a="ns-ca-$rnd" +c_b="ns-cb-$rnd" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} + +checktool "iptables --version" "run test without iptables" +checktool "ip -Version" "run test without ip tool" +checktool "which nc" "run test without nc (netcat)" +checktool "ip netns add ${r_a}" "create net namespace" + +for n in ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns add ${n} +done + +cleanup() { + for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do + ip netns del ${n} + done + rm -f ${rx} +} + +trap cleanup EXIT + +test_path() { + msg="$1" + + ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + + sleep 1 + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + done + + wait + + bytes=$(wc -c < ${rx}) + + if [ $bytes -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} +ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_a} link set $dev up +done + +ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 +ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 + +ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 +ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} +ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net ${r_b} link set $dev up +done + +ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 +ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 + +ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 +ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 +ip -net ${c_a} link set dev lo up +ip -net ${c_a} link set dev veth0 up +ip -net ${c_a} route add default via 192.168.10.1 + +# Client A +ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 +ip -net ${c_b} link set dev veth0 up +ip -net ${c_b} link set dev lo up +ip -net ${c_b} route add default via 192.168.20.1 + +# Wan +ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 +ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 + +ip -net ${r_w} link set dev lo up +ip -net ${r_w} link set dev veth0 up mtu 1400 +ip -net ${r_w} link set dev veth1 up mtu 1400 + +ip -net ${r_a} link set dev veth0 mtu 1400 +ip -net ${r_b} link set dev veth0 mtu 1400 + +ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" -- cgit 1.4.1 From 50c661670f6a3908c273503dfa206dfc7aa54c07 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jan 2021 00:15:22 +0100 Subject: net: fix pmtu check in nopmtudisc mode For some reason ip_tunnel insist on setting the DF bit anyway when the inner header has the DF bit set, EVEN if the tunnel was configured with 'nopmtudisc'. This means that the script added in the previous commit cannot be made to work by adding the 'nopmtudisc' flag to the ip tunnel configuration. Doing so breaks connectivity even for the without-conntrack/netfilter scenario. When nopmtudisc is set, the tunnel will skip the mtu check, so no icmp error is sent to client. Then, because inner header has DF set, the outer header gets added with DF bit set as well. IP stack then sends an error to itself because the packet exceeds the device MTU. Fixes: 23a3647bc4f93 ("ip_tunnels: Use skb-len to PMTU check.") Cc: Stefano Brivio Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ee65c9225178..64594aa755f0 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -759,8 +759,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph, - 0, 0, false)) { + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) + df |= (inner_iph->frag_off & htons(IP_DF)); + + if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) { ip_rt_put(rt); goto tx_error; } @@ -788,10 +791,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - df = tnl_params->frag_off; - if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df) - df |= (inner_iph->frag_off&htons(IP_DF)); - max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + rt->dst.header_len + ip_encap_hlen(&tunnel->encap); if (max_headroom > dev->needed_headroom) -- cgit 1.4.1 From bb4cc1a18856a73f0ff5137df0c2a31f4c50f6cf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Jan 2021 00:15:23 +0100 Subject: net: ip: always refragment ip defragmented packets Conntrack reassembly records the largest fragment size seen in IPCB. However, when this gets forwarded/transmitted, fragmentation will only be forced if one of the fragmented packets had the DF bit set. In that case, a flag in IPCB will force fragmentation even if the MTU is large enough. This should work fine, but this breaks with ip tunnels. Consider client that sends a UDP datagram of size X to another host. The client fragments the datagram, so two packets, of size y and z, are sent. DF bit is not set on any of these packets. Middlebox netfilter reassembles those packets back to single size-X packet, before routing decision. packet-size-vs-mtu checks in ip_forward are irrelevant, because DF bit isn't set. At output time, ip refragmentation is skipped as well because x is still smaller than the mtu of the output device. If ttransmit device is an ip tunnel, the packet size increases to x+overhead. Also, tunnel might be configured to force DF bit on outer header. In this case, packet will be dropped (exceeds MTU) and an ICMP error is generated back to sender. But sender already respects the announced MTU, all the packets that it sent did fit the announced mtu. Force refragmentation as per original sizes unconditionally so ip tunnel will encapsulate the fragments instead. The only other solution I see is to place ip refragmentation in the ip_tunnel code to handle this case. Fixes: d6b915e29f4ad ("ip_fragment: don't forward defragmented DF packet") Reported-by: Christian Perle Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 89fff5f59eea..2ed0b01f72f0 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -302,7 +302,7 @@ static int __ip_finish_output(struct net *net, struct sock *sk, struct sk_buff * if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); - if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU)) + if (skb->len > mtu || IPCB(skb)->frag_max_size) return ip_fragment(net, sk, skb, mtu, ip_finish_output2); return ip_finish_output2(net, sk, skb); -- cgit 1.4.1 From 2aa078932ff6c66bf10cc5b3144440dbfa7d813d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:36 -0800 Subject: KVM: x86/mmu: Use -1 to flag an undefined spte in get_mmio_spte() Return -1 from the get_walk() helpers if the shadow walk doesn't fill at least one spte, which can theoretically happen if the walk hits a not-present PDPTR. Returning the root level in such a case will cause get_mmio_spte() to return garbage (uninitialized stack data). In practice, such a scenario should be impossible as KVM shouldn't get a reserved-bit page fault with a not-present PDPTR. Note, using mmu->root_level in get_walk() is wrong for other reasons, too, but that's now a moot point. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Cc: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 7 ++++++- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7a6ae9e90bd7..a48cd12c01d7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3488,7 +3488,7 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct kvm_shadow_walk_iterator iterator; - int leaf = vcpu->arch.mmu->root_level; + int leaf = -1; u64 spte; @@ -3532,6 +3532,11 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) else leaf = get_walk(vcpu, addr, sptes); + if (unlikely(leaf < 0)) { + *sptep = 0ull; + return reserved; + } + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 84c8f06bec26..50cec7a15ddb 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1152,8 +1152,8 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; - int leaf = vcpu->arch.mmu->shadow_root_level; gfn_t gfn = addr >> PAGE_SHIFT; + int leaf = -1; tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; -- cgit 1.4.1 From 39b4d43e6003cee51cd119596d3c33d0449eb44c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:37 -0800 Subject: KVM: x86/mmu: Get root level from walkers when retrieving MMIO SPTE Get the so called "root" level from the low level shadow page table walkers instead of manually attempting to calculate it higher up the stack, e.g. in get_mmio_spte(). When KVM is using PAE shadow paging, the starting level of the walk, from the callers perspective, is not the CR3 root but rather the PDPTR "root". Checking for reserved bits from the CR3 root causes get_mmio_spte() to consume uninitialized stack data due to indexing into sptes[] for a level that was not filled by get_walk(). This can result in false positives and/or negatives depending on what garbage happens to be on the stack. Opportunistically nuke a few extra newlines. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Reported-by: Richard Herbert Cc: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 ++++++--------- arch/x86/kvm/mmu/tdp_mmu.c | 5 ++++- arch/x86/kvm/mmu/tdp_mmu.h | 4 +++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a48cd12c01d7..52f36c879086 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { struct kvm_shadow_walk_iterator iterator; int leaf = -1; u64 spte; - walk_shadow_page_lockless_begin(vcpu); - for (shadow_walk_init(&iterator, vcpu, addr); + for (shadow_walk_init(&iterator, vcpu, addr), + *root_level = iterator.level; shadow_walk_okay(&iterator); __shadow_walk_next(&iterator, spte)) { leaf = iterator.level; @@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) if (!is_shadow_present_pte(spte)) break; - } walk_shadow_page_lockless_end(vcpu); @@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->shadow_root_level; - int leaf; - int level; + int root, leaf, level; bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) { @@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) } if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) - leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes); + leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else - leaf = get_walk(vcpu, addr, sptes); + leaf = get_walk(vcpu, addr, sptes, &root); if (unlikely(leaf < 0)) { *sptep = 0ull; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 50cec7a15ddb..a4f9447f8327 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; gfn_t gfn = addr >> PAGE_SHIFT; int leaf = -1; + *root_level = vcpu->arch.mmu->shadow_root_level; + tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; sptes[leaf - 1] = iter.old_spte; diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 556e065503f6..cbbdbadd1526 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes); +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level); + #endif /* __KVM_X86_MMU_TDP_MMU_H */ -- cgit 1.4.1 From dde81f9477d018a96fba991c5928c6ab8cc109f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:38 -0800 Subject: KVM: x86/mmu: Use raw level to index into MMIO walks' sptes array Bump the size of the sptes array by one and use the raw level of the SPTE to index into the sptes array. Using the SPTE level directly improves readability by eliminating the need to reason out why the level is being adjusted when indexing the array. The array is on the stack and is not explicitly initialized; bumping its size is nothing more than a superficial adjustment to the stack frame. Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-4-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 +++++++-------- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 52f36c879086..4798a4472066 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3500,7 +3500,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level leaf = iterator.level; spte = mmu_spte_get_lockless(iterator.sptep); - sptes[leaf - 1] = spte; + sptes[leaf] = spte; if (!is_shadow_present_pte(spte)) break; @@ -3514,7 +3514,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level /* return true if reserved bit is detected on spte. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { - u64 sptes[PT64_ROOT_MAX_LEVEL]; + u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; struct rsvd_bits_validate *rsvd_check; int root, leaf, level; bool reserved = false; @@ -3537,16 +3537,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level - 1])) + if (!is_shadow_present_pte(sptes[level])) break; /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid * adding a Jcc in the loop. */ - reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) | - __is_rsvd_bits_set(rsvd_check, sptes[level - 1], - level); + reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | + __is_rsvd_bits_set(rsvd_check, sptes[level], level); } if (reserved) { @@ -3554,10 +3553,10 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) __func__, addr); for (level = root; level >= leaf; level--) pr_err("------ spte 0x%llx level %d.\n", - sptes[level - 1], level); + sptes[level], level); } - *sptep = sptes[leaf - 1]; + *sptep = sptes[leaf]; return reserved; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a4f9447f8327..efef571806ad 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1160,7 +1160,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; - sptes[leaf - 1] = iter.old_spte; + sptes[leaf] = iter.old_spte; } return leaf; -- cgit 1.4.1 From 9aa418792f5f11ef5d6f72265e1f8ae07efd5784 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:39 -0800 Subject: KVM: x86/mmu: Optimize not-present/MMIO SPTE check in get_mmio_spte() Check only the terminal leaf for a "!PRESENT || MMIO" SPTE when looking for reserved bits on valid, non-MMIO SPTEs. The get_walk() helpers terminate their walks if a not-present or MMIO SPTE is encountered, i.e. the non-terminal SPTEs have already been verified to be regular SPTEs. This eliminates an extra check-and-branch in a relatively hot loop. Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4798a4472066..769855f5f0a1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3511,7 +3511,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level return leaf; } -/* return true if reserved bit is detected on spte. */ +/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; @@ -3534,11 +3534,20 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) return reserved; } + *sptep = sptes[leaf]; + + /* + * Skip reserved bits checks on the terminal leaf if it's not a valid + * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by + * design, always have reserved bits set. The purpose of the checks is + * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs. + */ + if (!is_shadow_present_pte(sptes[leaf])) + leaf++; + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; - for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level])) - break; + for (level = root; level >= leaf; level--) /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid @@ -3546,7 +3555,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) */ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | __is_rsvd_bits_set(rsvd_check, sptes[level], level); - } if (reserved) { pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", @@ -3556,8 +3564,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) sptes[level], level); } - *sptep = sptes[leaf]; - return reserved; } -- cgit 1.4.1 From f65cf84ee769767536dc367acc9568ddb6e4c9f4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 18 Dec 2020 23:37:11 -0700 Subject: KVM: SVM: Add register operand to vmsave call in sev_es_vcpu_load When using LLVM's integrated assembler (LLVM_IAS=1) while building x86_64_defconfig + CONFIG_KVM=y + CONFIG_KVM_AMD=y, the following build error occurs: $ make LLVM=1 LLVM_IAS=1 arch/x86/kvm/svm/sev.o arch/x86/kvm/svm/sev.c:2004:15: error: too few operands for instruction asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); ^ arch/x86/kvm/svm/sev.c:28:17: note: expanded from macro '__ex' #define __ex(x) __kvm_handle_fault_on_reboot(x) ^ ./arch/x86/include/asm/kvm_host.h:1646:10: note: expanded from macro '__kvm_handle_fault_on_reboot' "666: \n\t" \ ^ :2:2: note: instantiated into assembly here vmsave ^ 1 error generated. This happens because LLVM currently does not support calling vmsave without the fixed register operand (%rax for 64-bit and %eax for 32-bit). This will be fixed in LLVM 12 but the kernel currently supports LLVM 10.0.1 and newer so this needs to be handled. Add the proper register using the _ASM_AX macro, which matches the vmsave call in vmenter.S. Fixes: 861377730aa9 ("KVM: SVM: Provide support for SEV-ES vCPU loading") Link: https://reviews.llvm.org/D93524 Link: https://github.com/ClangBuiltLinux/linux/issues/1216 Signed-off-by: Nathan Chancellor Message-Id: <20201219063711.3526947-1-natechancellor@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9858d5ae9ddd..563ced07b0b8 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2001,7 +2001,7 @@ void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu) * of which one step is to perform a VMLOAD. Since hardware does not * perform a VMSAVE on VMRUN, the host savearea must be updated. */ - asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); + asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); /* * Certain MSRs are restored on VMEXIT, only save ones that aren't -- cgit 1.4.1 From 52782d5b63725a6c4bf642557c83507430064110 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 20 Dec 2020 21:03:39 +0100 Subject: KVM/SVM: Remove leftover __svm_vcpu_run prototype from svm.c Commit 16809ecdc1e8a moved __svm_vcpu_run the prototype to svm.h, but forgot to remove the original from svm.c. Fixes: 16809ecdc1e8a ("KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests") Cc: Tom Lendacky Cc: Paolo Bonzini Signed-off-by: Uros Bizjak Message-Id: <20201220200339.65115-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cce0143a6f80..6824d611dc5d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3677,8 +3677,6 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } -void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); - static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, struct vcpu_svm *svm) { -- cgit 1.4.1 From e42ac777d661e878c3b9bac56df11e226cab3010 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:32 +0100 Subject: KVM: selftests: Factor out guest mode code demand_paging_test, dirty_log_test, and dirty_log_perf_test have redundant guest mode code. Factor it out. Also, while adding a new include, remove the ones we don't need. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-2-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- tools/testing/selftests/kvm/demand_paging_test.c | 107 +++++------------- tools/testing/selftests/kvm/dirty_log_perf_test.c | 121 ++++++--------------- tools/testing/selftests/kvm/dirty_log_test.c | 125 +++++++--------------- tools/testing/selftests/kvm/include/guest_modes.h | 21 ++++ tools/testing/selftests/kvm/lib/guest_modes.c | 70 ++++++++++++ 6 files changed, 189 insertions(+), 257 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/guest_modes.h create mode 100644 tools/testing/selftests/kvm/lib/guest_modes.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c7ca4faba272..a7286a08c3ae 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 3d96a7bfaff3..946161a9ce2d 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,23 +7,20 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ +#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ #include #include -#include -#include -#include #include #include #include -#include -#include #include +#include -#include "perf_test_util.h" -#include "processor.h" +#include "kvm_util.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" #ifdef __NR_userfaultfd @@ -248,9 +245,14 @@ static int setup_demand_paging(struct kvm_vm *vm, return 0; } -static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay) +struct test_params { + bool use_uffd; + useconds_t uffd_delay; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); - if (use_uffd) { + if (p->use_uffd) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); @@ -308,7 +310,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, r = setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], - uffd_delay, &uffd_args[vcpu_id], + p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); @@ -339,7 +341,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("All vCPU threads joined\n"); - if (use_uffd) { + if (p->use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ @@ -362,38 +364,19 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, free(guest_data_prototype); free(vcpu_threads); - if (use_uffd) { + if (p->use_uffd) { free(uffd_handler_threads); free(uffd_args); free(pipefds); } } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" " [-b memory] [-v vcpus]\n", name); - printf(" -m: specify the guest mode ID to test\n" - " (default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -u: use User Fault FD to handle vCPU page\n" " faults.\n"); printf(" -d: add a delay in usec to the User Fault\n" @@ -410,53 +393,22 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - bool mode_selected = false; - unsigned int mode; - int opt, i; - bool use_uffd = false; - useconds_t uffd_delay = 0; - -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + struct test_params p = {}; + int opt; + + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { switch (opt) { case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'u': - use_uffd = true; + p.use_uffd = true; break; case 'd': - uffd_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(uffd_delay >= 0, - "A negative UFFD delay is not supported."); + p.uffd_delay = strtoul(optarg, NULL, 0); + TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); break; case 'b': guest_percpu_mem_size = parse_size(optarg); @@ -473,14 +425,7 @@ int main(int argc, char *argv[]) } } - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9c6a7be31e03..506741eb5d7f 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -12,16 +12,14 @@ #include #include -#include #include #include #include -#include #include "kvm_util.h" -#include "perf_test_util.h" -#include "processor.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -89,9 +87,15 @@ static void *vcpu_worker(void *data) return NULL; } -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - uint64_t phys_offset, int wr_fract) +struct test_params { + unsigned long iterations; + uint64_t phys_offset; + int wr_fract; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long *bmap; @@ -108,7 +112,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); - perf_test_args.wr_fract = wr_fract; + perf_test_args.wr_fract = p->wr_fract; guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -156,7 +160,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); - while (iteration < iterations) { + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs * dirtying memory again. @@ -210,15 +214,15 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - avg = timespec_div(get_dirty_log_total, iterations); + avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, get_dirty_log_total.tv_sec, + p->iterations, get_dirty_log_total.tv_sec, get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); if (dirty_log_manual_caps) { - avg = timespec_div(clear_dirty_log_total, iterations); + avg = timespec_div(clear_dirty_log_total, p->iterations); pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, clear_dirty_log_total.tv_sec, + p->iterations, clear_dirty_log_total.tv_sec, clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); } @@ -228,20 +232,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] " "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); @@ -250,14 +242,7 @@ static void help(char *name) TEST_HOST_LOOP_N); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -272,74 +257,43 @@ static void help(char *name) int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i; - int wr_fract = 1; + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .wr_fract = 1, + }; + int opt; dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { switch (opt) { case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'f': - wr_fract = atoi(optarg); - TEST_ASSERT(wr_fract >= 1, + p.wr_fract = atoi(optarg); + TEST_ASSERT(p.wr_fract >= 1, "Write fraction cannot be less than one"); break; case 'v': nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(nr_vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -348,18 +302,11 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); - pr_info("Test iterations: %"PRIu64"\n", iterations); + pr_info("Test iterations: %"PRIu64"\n", p.iterations); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, iterations, phys_offset, wr_fract); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 471baecb7772..bb2752d78fe3 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -9,8 +9,6 @@ #include #include -#include -#include #include #include #include @@ -20,8 +18,9 @@ #include #include -#include "test_util.h" #include "kvm_util.h" +#include "test_util.h" +#include "guest_modes.h" #include "processor.h" #define VCPU_ID 1 @@ -673,9 +672,15 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - unsigned long interval, uint64_t phys_offset) +struct test_params { + unsigned long iterations; + unsigned long interval; + uint64_t phys_offset; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; struct kvm_vm *vm; unsigned long *bmap; @@ -709,12 +714,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_page_size = getpagesize(); host_num_pages = vm_num_host_pages(mode, guest_num_pages); - if (!phys_offset) { + if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { - guest_test_phys_mem = phys_offset; + guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ @@ -758,9 +763,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - while (iteration < iterations) { + while (iteration < p->iterations) { /* Give the vcpu thread some time to dirty some pages */ - usleep(interval * 1000); + usleep(p->interval * 1000); log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); @@ -783,20 +788,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); @@ -813,51 +806,23 @@ static void help(char *name) printf(" -M: specify the host logging mode " "(default: run all log modes). Supported modes: \n\t"); log_modes_dump(); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); puts(""); exit(0); } int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - unsigned long interval = TEST_HOST_LOOP_INTERVAL; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i, j; + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .interval = TEST_HOST_LOOP_INTERVAL, + }; + int opt, i; sem_init(&dirty_ring_vcpu_stop, 0, 0); sem_init(&dirty_ring_vcpu_cont, 0, 0); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { @@ -865,24 +830,16 @@ int main(int argc, char *argv[]) test_dirty_ring_count = strtol(optarg, NULL, 10); break; case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'I': - interval = strtol(optarg, NULL, 10); + p.interval = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'M': if (!strcmp(optarg, "all")) { @@ -911,32 +868,24 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); - TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); + p.iterations, p.interval); srandom(time(0)); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - if (host_log_mode_option == LOG_MODE_ALL) { - /* Run each log mode */ - for (j = 0; j < LOG_MODE_NUM; j++) { - pr_info("Testing Log Mode '%s'\n", - log_modes[j].name); - host_log_mode = j; - run_test(i, iterations, interval, phys_offset); - } - } else { - host_log_mode = host_log_mode_option; - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (i = 0; i < LOG_MODE_NUM; i++) { + pr_info("Testing Log Mode '%s'\n", log_modes[i].name); + host_log_mode = i; + for_each_guest_mode(run_test, &p); } + } else { + host_log_mode = host_log_mode_option; + for_each_guest_mode(run_test, &p); } return 0; diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h new file mode 100644 index 000000000000..b691df33e64e --- /dev/null +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "kvm_util.h" + +struct guest_mode { + bool supported; + bool enabled; +}; + +extern struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_append(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +void guest_modes_append_default(void); +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg); +void guest_modes_help(void); +void guest_modes_cmdline(const char *arg); diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c new file mode 100644 index 000000000000..25bff307c71f --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "guest_modes.h" + +struct guest_mode guest_modes[NUM_VM_MODES]; + +void guest_modes_append_default(void) +{ + guest_mode_append(VM_MODE_DEFAULT, true, true); + +#ifdef __aarch64__ + guest_mode_append(VM_MODE_P40V48_64K, true, true); + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + if (limit >= 52) + guest_mode_append(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_64K, true, true); + } + } +#endif +} + +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) +{ + int i; + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + func(i, arg); + } +} + +void guest_modes_help(void) +{ + int i; + + printf(" -m: specify the guest mode ID to test\n" + " (default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } +} + +void guest_modes_cmdline(const char *arg) +{ + static bool mode_selected; + unsigned int mode; + int i; + + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; +} -- cgit 1.4.1 From 1133e17ea7c9929ff7b90e81d8926f9e870748e9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:33 +0100 Subject: KVM: selftests: Use vm_create_with_vcpus in create_vm Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/demand_paging_test.c | 2 +- tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 - tools/testing/selftests/kvm/include/kvm_util.h | 8 ++++ .../testing/selftests/kvm/include/perf_test_util.h | 47 +++++----------------- tools/testing/selftests/kvm/lib/kvm_util.c | 9 +---- 5 files changed, 21 insertions(+), 47 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 946161a9ce2d..b0c41de32e9b 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,7 +7,7 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ +#define _GNU_SOURCE /* for pipe2 */ #include #include diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 506741eb5d7f..36bea75a8d6f 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -8,8 +8,6 @@ * Copyright (C) 2020, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include #include #include diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index dfa9d369e8fc..149766ecd68b 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -70,6 +70,14 @@ enum vm_guest_mode { #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 239421e4f6b8..cd4c258f458d 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -13,9 +13,6 @@ #define MAX_VCPUS 512 -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - #define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ @@ -94,41 +91,26 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes) { struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; uint64_t guest_num_pages; - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - - perf_test_args.vm = vm; - perf_test_args.guest_page_size = vm_get_page_size(vm); perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, "Guest memory size is not guest page size aligned."); - guest_num_pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; /* * If there should be more memory in the guest test region than there @@ -140,18 +122,13 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * perf_test_args.guest_page_size; guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); - #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem &= ~((1 << 20) - 1); #endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); /* Add an extra memory slot for testing */ @@ -177,8 +154,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - vm_vcpu_add_default(vm, vcpu_id, guest_code); - vcpu_args->vcpu_id = vcpu_id; vcpu_args->gva = guest_test_virt_mem + (vcpu_id * vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 88ef7067f1e6..fa5a90e6c6f0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -153,14 +153,7 @@ const char * const vm_guest_mode_string[] = { _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; - -static const struct vm_guest_mode_params vm_guest_mode_params[] = { +const struct vm_guest_mode_params vm_guest_mode_params[] = { { 52, 48, 0x1000, 12 }, { 52, 48, 0x10000, 16 }, { 48, 48, 0x1000, 12 }, -- cgit 1.4.1 From b268b6f0bd36322358accb15c45683a9e1220231 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:34 +0100 Subject: KVM: selftests: Implement perf_test_util more conventionally It's not conventional C to put non-inline functions in header files. Create a source file for the functions instead. Also reduce the amount of globals and rename the functions to something less generic. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-4-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- tools/testing/selftests/kvm/demand_paging_test.c | 11 +- tools/testing/selftests/kvm/dirty_log_perf_test.c | 22 ++-- tools/testing/selftests/kvm/include/kvm_util.h | 1 + .../testing/selftests/kvm/include/perf_test_util.h | 142 ++------------------- tools/testing/selftests/kvm/lib/perf_test_util.c | 134 +++++++++++++++++++ 6 files changed, 166 insertions(+), 146 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/perf_test_util.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a7286a08c3ae..fe41c6a0fa67 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index b0c41de32e9b..cdad1eca72f7 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -36,12 +36,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static char *guest_data_prototype; static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -263,7 +265,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int vcpu_id; int r; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); if (p->use_uffd) { uffd_handler_threads = @@ -359,8 +361,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); free(guest_data_prototype); free(vcpu_threads); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 36bea75a8d6f..2283a0ec74a9 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -22,11 +22,14 @@ /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + /* Host variables */ static u64 dirty_log_manual_caps; static bool host_quit; static uint64_t iteration; -static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; +static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS]; static void *vcpu_worker(void *data) { @@ -38,7 +41,7 @@ static void *vcpu_worker(void *data) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); @@ -108,7 +111,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = p->wr_fract; @@ -126,7 +129,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); sync_global_to_guest(vm, perf_test_args); @@ -152,7 +155,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Enable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, KVM_MEM_LOG_DIRTY_PAGES); ts_diff = timespec_diff_now(start); pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", @@ -179,7 +182,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration, ts_diff.tv_sec, ts_diff.tv_nsec); clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap); ts_diff = timespec_diff_now(start); get_dirty_log_total = timespec_add(get_dirty_log_total, @@ -189,7 +192,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (dirty_log_manual_caps) { clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0, host_num_pages); ts_diff = timespec_diff_now(start); @@ -207,7 +210,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0); ts_diff = timespec_diff_now(start); pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); @@ -226,8 +229,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(vcpu_threads); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 149766ecd68b..5cbb861525ed 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_MAX_VCPUS 512 /* * Callers of kvm_util only have an incomplete/opaque description of the diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index cd4c258f458d..b1188823c31b 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -9,35 +9,15 @@ #define SELFTEST_KVM_PERF_TEST_UTIL_H #include "kvm_util.h" -#include "processor.h" - -#define MAX_VCPUS 512 - -#define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 #define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; - -/* Number of VCPUs for the test */ -static int nr_vcpus = 1; +#define PERF_TEST_MEM_SLOT_INDEX 1 -struct vcpu_args { +struct perf_test_vcpu_args { uint64_t gva; uint64_t pages; @@ -51,119 +31,21 @@ struct perf_test_args { uint64_t guest_page_size; int wr_fract; - struct vcpu_args vcpu_args[MAX_VCPUS]; + struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; -static struct perf_test_args perf_test_args; +extern struct perf_test_args perf_test_args; /* - * Continuously write to the first 8 bytes of each page in the - * specified region. + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. */ -static void guest_code(uint32_t vcpu_id) -{ - struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); - - gva = vcpu_args->gva; - pages = vcpu_args->pages; - - while (true) { - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); - - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); - } - - GUEST_SYNC(1); - } -} - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t guest_num_pages; - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - perf_test_args.host_page_size = getpagesize(); - perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; - - guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); - - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - vm = vm_create_with_vcpus(mode, vcpus, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, - 0, guest_code, NULL); - - perf_test_args.vm = vm; - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - return vm; -} - -static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) -{ - vm_paddr_t vcpu_gpa; - struct vcpu_args *vcpu_args; - int vcpu_id; - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - - vcpu_args->vcpu_id = vcpu_id; - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; +extern uint64_t guest_test_phys_mem; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - } -} +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes); +void perf_test_destroy_vm(struct kvm_vm *vm); +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c new file mode 100644 index 000000000000..9be1944c2d1c --- /dev/null +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + */ + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" + +struct perf_test_args perf_test_args; + +uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t guest_num_pages; + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + PERF_TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +void perf_test_destroy_vm(struct kvm_vm *vm) +{ + ucall_uninit(vm); + kvm_vm_free(vm); +} + +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} -- cgit 1.4.1 From 2f80d502d627f30257ba7e3655e71c373b7d1a5a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 22 Dec 2020 05:20:43 -0500 Subject: KVM: x86: fix shift out of bounds reported by UBSAN Since we know that e >= s, we can reassociate the left shift, changing the shifted number from 1 to 2 in exchange for decreasing the right hand side by 1. Reported-by: syzbot+e87846c48bf72bc85311@syzkaller.appspotmail.com Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 9c4a9c8e43d9..581925e476d6 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -49,7 +49,7 @@ static inline u64 rsvd_bits(int s, int e) if (e < s) return 0; - return ((1ULL << (e - s + 1)) - 1) << s; + return ((2ULL << (e - s)) - 1) << s; } void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask); -- cgit 1.4.1 From 7f0c1f1a8277de906a242a6ef907476149f006de Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Jan 2021 10:29:16 -0800 Subject: MAINTAINERS: Really update email address for Sean Christopherson Use my @google.com address in MAINTAINERS, somehow only the .mailmap entry was added when the original update patch was applied. Fixes: c2b1209d852f ("MAINTAINERS: Update email address for Sean Christopherson") Cc: kvm@vger.kernel.org Reported-by: Nathan Chancellor Signed-off-by: Sean Christopherson Message-Id: <20210106182916.331743-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 281de213ef47..9d8b77332ae3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9672,7 +9672,7 @@ F: tools/testing/selftests/kvm/s390x/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) M: Paolo Bonzini -R: Sean Christopherson +R: Sean Christopherson R: Vitaly Kuznetsov R: Wanpeng Li R: Jim Mattson -- cgit 1.4.1 From de7860c8a388e4cb757c7da26889b9e2641ffcfe Mon Sep 17 00:00:00 2001 From: Stephen Zhang Date: Fri, 18 Dec 2020 15:51:37 +0800 Subject: KVM: x86: change in pv_eoi_get_pending() to make code more readable Signed-off-by: Stephen Zhang Message-Id: <1608277897-1932-1-git-send-email-stephenzhangzsd@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3136e05831cf..78823227c592 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -674,7 +674,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) (unsigned long long)vcpu->arch.pv_eoi.msr_val); return false; } - return val & 0x1; + return val & KVM_PV_EOI_ENABLED; } static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) -- cgit 1.4.1 From 88bf56d04bc3564542049ec4ec168a8b60d0b48c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 17 Dec 2020 23:41:18 +0800 Subject: kvm: check tlbs_dirty directly In kvm_mmu_notifier_invalidate_range_start(), tlbs_dirty is used as: need_tlb_flush |= kvm->tlbs_dirty; with need_tlb_flush's type being int and tlbs_dirty's type being long. It means that tlbs_dirty is always used as int and the higher 32 bits is useless. We need to check tlbs_dirty in a correct way and this change checks it directly without propagating it to need_tlb_flush. Note: it's _extremely_ unlikely this neglecting of higher 32 bits can cause problems in practice. It would require encountering tlbs_dirty on a 4 billion count boundary, and KVM would need to be using shadow paging or be running a nested guest. Cc: stable@vger.kernel.org Fixes: a4ee1ca4a36e ("KVM: MMU: delay flush all tlbs on sync_page path") Signed-off-by: Lai Jiangshan Message-Id: <20201217154118.16497-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3abcb2ce5b7d..19dae28904f7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -485,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mmu_notifier_count++; need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end, range->flags); - need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->tlbs_dirty) kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); -- cgit 1.4.1 From a889ea54b3daa63ee1463dc19ed699407d61458b Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 6 Jan 2021 16:19:34 -0800 Subject: KVM: x86/mmu: Ensure TDP MMU roots are freed after yield Many TDP MMU functions which need to perform some action on all TDP MMU roots hold a reference on that root so that they can safely drop the MMU lock in order to yield to other threads. However, when releasing the reference on the root, there is a bug: the root will not be freed even if its reference count (root_count) is reduced to 0. To simplify acquiring and releasing references on TDP MMU root pages, and to ensure that these roots are properly freed, move the get/put operations into another TDP MMU root iterator macro. Moving the get/put operations into an iterator macro also helps simplify control flow when a root does need to be freed. Note that using the list_for_each_entry_safe macro would not have been appropriate in this situation because it could keep a pointer to the next root across an MMU lock release + reacquire, during which time that root could be freed. Reported-by: Maciej S. Szmigiero Suggested-by: Paolo Bonzini Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Fixes: 063afacd8730 ("kvm: x86/mmu: Support invalidate range MMU notifier for TDP MMU") Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU") Fixes: 14881998566d ("kvm: x86/mmu: Support disabling dirty logging for the tdp MMU") Signed-off-by: Ben Gardon Message-Id: <20210107001935.3732070-1-bgardon@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 104 +++++++++++++++++++++------------------------ 1 file changed, 48 insertions(+), 56 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6574af2d0994..2ef8615f9dba 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -44,7 +44,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); } -#define for_each_tdp_mmu_root(_kvm, _root) \ +static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) +{ + if (kvm_mmu_put_root(kvm, root)) + kvm_tdp_mmu_free_root(kvm, root); +} + +static inline bool tdp_mmu_next_root_valid(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + lockdep_assert_held(&kvm->mmu_lock); + + if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link)) + return false; + + kvm_mmu_get_root(kvm, root); + return true; + +} + +static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + struct kvm_mmu_page *next_root; + + next_root = list_next_entry(root, link); + tdp_mmu_put_root(kvm, root); + return next_root; +} + +/* + * Note: this iterator gets and puts references to the roots it iterates over. + * This makes it safe to release the MMU lock and yield within the loop, but + * if exiting the loop early, the caller must drop the reference to the most + * recent root. (Unless keeping a live reference is desirable.) + */ +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ + for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \ + typeof(*_root), link); \ + tdp_mmu_next_root_valid(_kvm, _root); \ + _root = tdp_mmu_next_root(_kvm, _root)) + +#define for_each_tdp_mmu_root(_kvm, _root) \ list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) @@ -447,18 +488,9 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) struct kvm_mmu_page *root; bool flush = false; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) flush |= zap_gfn_range(kvm, root, start, end, true); - kvm_mmu_put_root(kvm, root); - } - return flush; } @@ -619,13 +651,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, int ret = 0; int as_id; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) { as_id = kvm_mmu_page_as_id(root); slots = __kvm_memslots(kvm, as_id); kvm_for_each_memslot(memslot, slots) { @@ -647,8 +673,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, ret |= handler(kvm, memslot, root, gfn_start, gfn_end, data); } - - kvm_mmu_put_root(kvm, root); } return ret; @@ -838,21 +862,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -906,21 +922,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -1029,21 +1037,13 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; } @@ -1089,21 +1089,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, struct kvm_mmu_page *root; int root_as_id; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - zap_collapsible_spte_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } } -- cgit 1.4.1 From c0dba6e46825716db15c4b3a8f05c85b4a59edda Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 6 Jan 2021 16:19:35 -0800 Subject: KVM: x86/mmu: Clarify TDP MMU page list invariants The tdp_mmu_roots and tdp_mmu_pages in struct kvm_arch should only contain pages with tdp_mmu_page set to true. tdp_mmu_pages should not contain any pages with a non-zero root_count and tdp_mmu_roots should only contain pages with a positive root_count, unless a thread holds the MMU lock and is in the process of modifying the list. Various functions expect these invariants to be maintained, but they are not explictily documented. Add to the comments on both fields to document the above invariants. Signed-off-by: Ben Gardon Message-Id: <20210107001935.3732070-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3ab7b46087b7..afed3da3b3a0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1010,9 +1010,21 @@ struct kvm_arch { */ bool tdp_mmu_enabled; - /* List of struct tdp_mmu_pages being used as roots */ + /* + * List of struct kvmp_mmu_pages being used as roots. + * All struct kvm_mmu_pages in the list should have + * tdp_mmu_page set. + * All struct kvm_mmu_pages in the list should have a positive + * root_count except when a thread holds the MMU lock and is removing + * an entry from the list. + */ struct list_head tdp_mmu_roots; - /* List of struct tdp_mmu_pages not being used as roots */ + + /* + * List of struct kvmp_mmu_pages not being used as roots. + * All struct kvm_mmu_pages in the list should have + * tdp_mmu_page set and a root_count of 0. + */ struct list_head tdp_mmu_pages; }; -- cgit 1.4.1 From 81f76adad560dfc39cb9625cf1e00a7e2b7b88df Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:52 +0200 Subject: KVM: nSVM: correctly restore nested_run_pending on migration The code to store it on the migration exists, but no code was restoring it. One of the side effects of fixing this is that L1->L2 injected events are no longer lost when migration happens with nested run pending. Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b0b667456b2e..a466336aab43 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1194,6 +1194,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, * in the registers, the save area of the nested state instead * contains saved L1 state. */ + + svm->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); hsave->save = *save; -- cgit 1.4.1 From 56fe28de8c4f0167275c411c0daa5709e9a47bd7 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:54 +0200 Subject: KVM: nSVM: mark vmcb as dirty when forcingly leaving the guest mode We overwrite most of vmcb fields while doing so, so we must mark it as dirty. Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a466336aab43..a622e63739b4 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -754,6 +754,7 @@ void svm_leave_nested(struct vcpu_svm *svm) leave_guest_mode(&svm->vcpu); copy_vmcb_control_area(&vmcb->control, &hsave->control); nested_svm_uninit_mmu_context(&svm->vcpu); + vmcb_mark_all_dirty(svm->vmcb); } kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); -- cgit 1.4.1 From f2c7ef3ba9556d62a7e2bb23b563c6510007d55c Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:51 +0200 Subject: KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit It is possible to exit the nested guest mode, entered by svm_set_nested_state prior to first vm entry to it (e.g due to pending event) if the nested run was not pending during the migration. In this case we must not switch to the nested msr permission bitmap. Also add a warning to catch similar cases in the future. Fixes: a7d5c7ce41ac1 ("KVM: nSVM: delay MSR permission processing to first nested VM run") Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ arch/x86/kvm/vmx/nested.c | 2 ++ arch/x86/kvm/x86.c | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a622e63739b4..cb4c6ee10029 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -199,6 +199,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (!nested_svm_vmrun_msrpm(svm)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = @@ -595,6 +596,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->nested.vmcb12_gpa = 0; WARN_ON_ONCE(svm->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); + /* in case we halted in L2 */ svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e2f26564a12d..0fbb46990dfc 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4442,6 +4442,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + /* Service the TLB flush request for L2 before switching to L1. */ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) kvm_vcpu_flush_tlb_current(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e28ab76b80dc..f6e7b25c40e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8789,7 +8789,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { - if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { + if (WARN_ON_ONCE(!is_guest_mode(vcpu))) + ; + else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; goto out; } -- cgit 1.4.1 From 647daca25d24fb6eadc7b6cd680ad3e6eed0f3d5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 4 Jan 2021 14:20:01 -0600 Subject: KVM: SVM: Add support for booting APs in an SEV-ES guest Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence, where the guest vCPU register state is updated and then the vCPU is VMRUN to begin execution of the AP. For an SEV-ES guest, this won't work because the guest register state is encrypted. Following the GHCB specification, the hypervisor must not alter the guest register state, so KVM must track an AP/vCPU boot. Should the guest want to park the AP, it must use the AP Reset Hold exit event in place of, for example, a HLT loop. First AP boot (first INIT-SIPI-SIPI sequence): Execute the AP (vCPU) as it was initialized and measured by the SEV-ES support. It is up to the guest to transfer control of the AP to the proper location. Subsequent AP boot: KVM will expect to receive an AP Reset Hold exit event indicating that the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to awaken it. When the AP Reset Hold exit event is received, KVM will place the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI sequence, KVM will make the vCPU runnable. It is again up to the guest to then transfer control of the AP to the proper location. To differentiate between an actual HLT and an AP Reset Hold, a new MP state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is placed in upon receiving the AP Reset Hold exit event. Additionally, to communicate the AP Reset Hold exit event up to userspace (if needed), a new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD. A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds a new function that, for non SEV-ES guests, invokes the original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests, implements the logic above. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/svm/sev.c | 22 ++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 10 ++++++++++ arch/x86/kvm/svm/svm.h | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 26 +++++++++++++++++++++----- include/uapi/linux/kvm.h | 2 ++ 8 files changed, 63 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index afed3da3b3a0..3d6616f6f6ef 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1299,6 +1299,8 @@ struct kvm_x86_ops { void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); + + void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); }; struct kvm_x86_nested_ops { @@ -1480,6 +1482,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_vcpu_halt(struct kvm_vcpu *vcpu); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 78823227c592..43cceadd073e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; - kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 563ced07b0b8..c8ffdbc81709 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1563,6 +1563,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; break; case SVM_VMGEXIT_NMI_COMPLETE: + case SVM_VMGEXIT_AP_HLT_LOOP: case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; @@ -1888,6 +1889,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_NMI_COMPLETE: ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); break; + case SVM_VMGEXIT_AP_HLT_LOOP: + ret = kvm_emulate_ap_reset_hold(&svm->vcpu); + break; case SVM_VMGEXIT_AP_JUMP_TABLE: { struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; @@ -2040,3 +2044,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm) wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); } } + +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + /* First SIPI: Use the values as initially set by the VMM */ + if (!svm->received_first_sipi) { + svm->received_first_sipi = true; + return; + } + + /* + * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where + * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a + * non-zero value. + */ + ghcb_set_sw_exit_info_2(svm->ghcb, 1); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6824d611dc5d..7ef171790d02 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4382,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT)); } +static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) +{ + if (!sev_es_guest(vcpu->kvm)) + return kvm_vcpu_deliver_sipi_vector(vcpu, vector); + + sev_vcpu_deliver_sipi_vector(vcpu, vector); +} + static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); @@ -4524,6 +4532,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .msr_filter_changed = svm_msr_filter_changed, .complete_emulated_msr = svm_complete_emulated_msr, + + .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5431e6335e2e..0fe874ae5498 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -185,6 +185,7 @@ struct vcpu_svm { struct vmcb_save_area *vmsa; struct ghcb *ghcb; struct kvm_host_map ghcb_map; + bool received_first_sipi; /* SEV-ES scratch area support */ void *ghcb_sa; @@ -591,6 +592,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); void sev_es_vcpu_put(struct vcpu_svm *svm); +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); /* vmenter.S */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 75c9c6a0a3a4..2af05d3b0590 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7707,6 +7707,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .msr_filter_changed = vmx_msr_filter_changed, .complete_emulated_msr = kvm_complete_insn_gp, .cpu_dirty_log_size = vmx_cpu_dirty_log_size, + + .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, }; static __init int hardware_setup(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6e7b25c40e2..0287840b93e0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7976,17 +7976,22 @@ void kvm_arch_exit(void) kmem_cache_destroy(x86_fpu_cache); } -int kvm_vcpu_halt(struct kvm_vcpu *vcpu) +int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) { ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { - vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + vcpu->arch.mp_state = state; return 1; } else { - vcpu->run->exit_reason = KVM_EXIT_HLT; + vcpu->run->exit_reason = reason; return 0; } } + +int kvm_vcpu_halt(struct kvm_vcpu *vcpu) +{ + return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); +} EXPORT_SYMBOL_GPL(kvm_vcpu_halt); int kvm_emulate_halt(struct kvm_vcpu *vcpu) @@ -8000,6 +8005,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + + return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); + #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, unsigned long clock_type) @@ -9096,6 +9109,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: + case KVM_MP_STATE_AP_RESET_HOLD: vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -9522,8 +9536,9 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); - if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && - vcpu->arch.pv.pv_unhalted) + if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED || + vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) && + vcpu->arch.pv.pv_unhalted) mp_state->mp_state = KVM_MP_STATE_RUNNABLE; else mp_state->mp_state = vcpu->arch.mp_state; @@ -10154,6 +10169,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); kvm_rip_write(vcpu, 0); } +EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); int kvm_arch_hardware_enable(void) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 886802b8ffba..374c67875cdb 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; -- cgit 1.4.1 From 717df0f4cdc9044c415431a3522b3e9ccca5b4a3 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:06 +0530 Subject: chtls: Fix hardware tid leak send_abort_rpl() is not calculating cpl_abort_req_rss offset and ends up sending wrong TID with abort_rpl WR causng tid leaks. Replaced send_abort_rpl() with chtls_send_abort_rpl() as it is redundant. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls_cm.c | 39 ++-------------------- 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index a0e0d8a83681..561b5f2273af 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1997,39 +1997,6 @@ static void t4_defer_reply(struct sk_buff *skb, struct chtls_dev *cdev, spin_unlock_bh(&cdev->deferq.lock); } -static void send_abort_rpl(struct sock *sk, struct sk_buff *skb, - struct chtls_dev *cdev, int status, int queue) -{ - struct cpl_abort_req_rss *req = cplhdr(skb); - struct sk_buff *reply_skb; - struct chtls_sock *csk; - - csk = rcu_dereference_sk_user_data(sk); - - reply_skb = alloc_skb(sizeof(struct cpl_abort_rpl), - GFP_KERNEL); - - if (!reply_skb) { - req->status = (queue << 1); - t4_defer_reply(skb, cdev, send_defer_abort_rpl); - return; - } - - set_abort_rpl_wr(reply_skb, GET_TID(req), status); - kfree_skb(skb); - - set_wr_txq(reply_skb, CPL_PRIORITY_DATA, queue); - if (csk_conn_inline(csk)) { - struct l2t_entry *e = csk->l2t_entry; - - if (e && sk->sk_state != TCP_SYN_RECV) { - cxgb4_l2t_send(csk->egress_dev, reply_skb, e); - return; - } - } - cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); -} - static void chtls_send_abort_rpl(struct sock *sk, struct sk_buff *skb, struct chtls_dev *cdev, int status, int queue) @@ -2079,8 +2046,8 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb) skb->sk = NULL; do_abort_syn_rcv(child, lsk); - send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, - CPL_ABORT_NO_RST, queue); + chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, + CPL_ABORT_NO_RST, queue); } static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) @@ -2111,7 +2078,7 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) int queue = csk->txq_idx; do_abort_syn_rcv(sk, psk); - send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); + chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); } else { skb->sk = sk; BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv; -- cgit 1.4.1 From 827d329105bfde6701f0077e34a09c4a86e27145 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:07 +0530 Subject: chtls: Remove invalid set_tcb call At the time of SYN_RECV, connection information is not initialized at FW, updating tcb flag over uninitialized connection causes adapter crash. We don't need to update the flag during SYN_RECV state, so avoid this. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 561b5f2273af..431d1e3844ab 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -2096,9 +2096,6 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) int queue = csk->txq_idx; if (is_neg_adv(req->status)) { - if (sk->sk_state == TCP_SYN_RECV) - chtls_set_tcb_tflag(sk, 0, 0); - kfree_skb(skb); return; } -- cgit 1.4.1 From 5a5fac9966bb6d513198634b0b1357be7e8447d2 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:08 +0530 Subject: chtls: Fix panic when route to peer not configured If route to peer is not configured, we might get non tls devices from dst_neigh_lookup() which is invalid, adding a check to avoid it. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 431d1e3844ab..04a8bd5af3b9 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1109,6 +1109,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, const struct cpl_pass_accept_req *req, struct chtls_dev *cdev) { + struct adapter *adap = pci_get_drvdata(cdev->pdev); struct neighbour *n = NULL; struct inet_sock *newinet; const struct iphdr *iph; @@ -1118,9 +1119,10 @@ static struct sock *chtls_recv_sock(struct sock *lsk, struct dst_entry *dst; struct tcp_sock *tp; struct sock *newsk; + bool found = false; u16 port_id; int rxq_idx; - int step; + int step, i; iph = (const struct iphdr *)network_hdr; newsk = tcp_create_openreq_child(lsk, oreq, cdev->askb); @@ -1152,7 +1154,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, n = dst_neigh_lookup(dst, &ip6h->saddr); #endif } - if (!n) + if (!n || !n->dev) goto free_sk; ndev = n->dev; @@ -1161,6 +1163,13 @@ static struct sock *chtls_recv_sock(struct sock *lsk, if (is_vlan_dev(ndev)) ndev = vlan_dev_real_dev(ndev); + for_each_port(adap, i) + if (cdev->ports[i] == ndev) + found = true; + + if (!found) + goto free_dst; + port_id = cxgb4_port_idx(ndev); csk = chtls_sock_create(cdev); @@ -1238,6 +1247,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, free_csk: chtls_sock_release(&csk->kref); free_dst: + neigh_release(n); dst_release(dst); free_sk: inet_csk_prepare_forced_close(newsk); -- cgit 1.4.1 From f8d15d29d6e6b32704c8fce9229716ca145a0de2 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:09 +0530 Subject: chtls: Avoid unnecessary freeing of oreq pointer In chtls_pass_accept_request(), removing the chtls_reqsk_free() call to avoid oreq freeing twice. Here oreq is the pointer to struct request_sock. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 04a8bd5af3b9..3022c802d09a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1397,7 +1397,7 @@ static void chtls_pass_accept_request(struct sock *sk, newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev); if (!newsk) - goto free_oreq; + goto reject; if (chtls_get_module(newsk)) goto reject; @@ -1413,8 +1413,6 @@ static void chtls_pass_accept_request(struct sock *sk, kfree_skb(skb); return; -free_oreq: - chtls_reqsk_free(oreq); reject: mk_tid_release(reply_skb, 0, tid); cxgb4_ofld_send(cdev->lldi->ports[0], reply_skb); -- cgit 1.4.1 From a84b2c0d5fa23da6d6c8c0d5f5c93184a2744d3e Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:10 +0530 Subject: chtls: Replace skb_dequeue with skb_peek The skb is unlinked twice, one in __skb_dequeue in function chtls_reset_synq() and another in cleanup_syn_rcv_conn(). So in this patch using skb_peek() instead of __skb_dequeue(), so that unlink will be handled only in cleanup_syn_rcv_conn(). Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 3022c802d09a..ff3969a24d74 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -621,7 +621,7 @@ static void chtls_reset_synq(struct listen_ctx *listen_ctx) while (!skb_queue_empty(&listen_ctx->synq)) { struct chtls_sock *csk = - container_of((struct synq *)__skb_dequeue + container_of((struct synq *)skb_peek (&listen_ctx->synq), struct chtls_sock, synq); struct sock *child = csk->sk; -- cgit 1.4.1 From eade1e0a4fb31d48eeb1589d9bb859ae4dd6181d Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:11 +0530 Subject: chtls: Added a check to avoid NULL pointer dereference In case of server removal lookup_stid() may return NULL pointer, which is used as listen_ctx. So added a check before accessing this pointer. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index ff3969a24d74..1c6d3c93a0c8 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1597,6 +1597,11 @@ static int chtls_pass_establish(struct chtls_dev *cdev, struct sk_buff *skb) sk_wake_async(sk, 0, POLL_OUT); data = lookup_stid(cdev->tids, stid); + if (!data) { + /* listening server close */ + kfree_skb(skb); + goto unlock; + } lsk = ((struct listen_ctx *)data)->lsk; bh_lock_sock(lsk); -- cgit 1.4.1 From 15ef6b0e30b354253e2c10b3836bc59767eb162b Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 6 Jan 2021 09:59:12 +0530 Subject: chtls: Fix chtls resources release sequence CPL_ABORT_RPL is sent after releasing the resources by calling chtls_release_resources(sk); and chtls_conn_done(sk); eventually causing kernel panic. Fixing it by calling release in appropriate order. Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Vinay Kumar Yadav Signed-off-by: Ayush Sawal Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 1c6d3c93a0c8..51dd030b3b36 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -2058,9 +2058,9 @@ static void bl_abort_syn_rcv(struct sock *lsk, struct sk_buff *skb) queue = csk->txq_idx; skb->sk = NULL; - do_abort_syn_rcv(child, lsk); chtls_send_abort_rpl(child, skb, BLOG_SKB_CB(skb)->cdev, CPL_ABORT_NO_RST, queue); + do_abort_syn_rcv(child, lsk); } static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) @@ -2090,8 +2090,8 @@ static int abort_syn_rcv(struct sock *sk, struct sk_buff *skb) if (!sock_owned_by_user(psk)) { int queue = csk->txq_idx; - do_abort_syn_rcv(sk, psk); chtls_send_abort_rpl(sk, skb, cdev, CPL_ABORT_NO_RST, queue); + do_abort_syn_rcv(sk, psk); } else { skb->sk = sk; BLOG_SKB_CB(skb)->backlog_rcv = bl_abort_syn_rcv; @@ -2135,12 +2135,12 @@ static void chtls_abort_req_rss(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_SYN_RECV && !abort_syn_rcv(sk, skb)) return; - chtls_release_resources(sk); - chtls_conn_done(sk); } chtls_send_abort_rpl(sk, skb, BLOG_SKB_CB(skb)->cdev, rst_status, queue); + chtls_release_resources(sk); + chtls_conn_done(sk); } static void chtls_abort_rpl_rss(struct sock *sk, struct sk_buff *skb) -- cgit 1.4.1 From cf7b2ae4d70432fa94ebba3fbaab825481ae7189 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 21 Dec 2020 23:52:00 +0100 Subject: riscv: return -ENOSYS for syscall -1 Properly return -ENOSYS for syscall -1 instead of leaving the return value uninitialized. This fixes the strace teststuite. Fixes: 5340627e3fe0 ("riscv: add support for SECCOMP and SECCOMP_FILTER") Cc: stable@vger.kernel.org Signed-off-by: Andreas Schwab Reviewed-by: Tycho Andersen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 524d918f3601..d07763001eb0 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -186,14 +186,7 @@ check_syscall_nr: * Syscall number held in a7. * If syscall number is above allowed value, redirect to ni_syscall. */ - bge a7, t0, 1f - /* - * Check if syscall is rejected by tracer, i.e., a7 == -1. - * If yes, we pretend it was executed. - */ - li t1, -1 - beq a7, t1, ret_from_syscall_rejected - blt a7, t1, 1f + bgeu a7, t0, 1f /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR -- cgit 1.4.1 From 11f4c2e940e2f317c9d8fb5a79702f2a4a02ff98 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:34 +0900 Subject: riscv: Fix kernel time_init() If of_clk_init() is not called in time_init(), clock providers defined in the system device tree are not initialized, resulting in failures for other devices to initialize due to missing clocks. Similarly to other architectures and to the default kernel time_init() implementation, call of_clk_init() before executing timer_probe() in time_init(). Signed-off-by: Damien Le Moal Acked-by: Stephen Boyd Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/time.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 4d3a1048ad8b..8a5cf99c0776 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -24,6 +25,8 @@ void __init time_init(void) riscv_timebase = prop; lpj_fine = riscv_timebase / HZ; + + of_clk_init(NULL); timer_probe(); } -- cgit 1.4.1 From 1f1496a923b6ba16679074fe77100e1b53cdb880 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:35 +0900 Subject: riscv: Fix sifive serial driver Setup the port uartclk in sifive_serial_probe() so that the base baud rate is correctly printed during device probe instead of always showing "0". I.e. the probe message is changed from 38000000.serial: ttySIF0 at MMIO 0x38000000 (irq = 1, base_baud = 0) is a SiFive UART v0 to the correct: 38000000.serial: ttySIF0 at MMIO 0x38000000 (irq = 1, base_baud = 115200) is a SiFive UART v0 Signed-off-by: Damien Le Moal Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- drivers/tty/serial/sifive.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 1066eebe3b28..328d5a78792f 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -1000,6 +1000,7 @@ static int sifive_serial_probe(struct platform_device *pdev) /* Set up clock divider */ ssp->clkin_rate = clk_get_rate(ssp->clk); ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE; + ssp->port.uartclk = ssp->baud_rate * 16; __ssp_update_div(ssp); platform_set_drvdata(pdev, ssp); -- cgit 1.4.1 From 643437b996bac9267785e0bd528332e2d5811067 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:36 +0900 Subject: riscv: Enable interrupts during syscalls with M-Mode When running is M-Mode (no MMU config), MPIE does not get set. This results in all syscalls being executed with interrupts disabled as handle_exception never sets SR_IE as it always sees SR_PIE being cleared. Fix this by always force enabling interrupts in handle_syscall when CONFIG_RISCV_M_MODE is enabled. Signed-off-by: Damien Le Moal Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d07763001eb0..48de316c68c1 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -155,6 +155,15 @@ skip_context_tracking: tail do_trap_unknown handle_syscall: +#ifdef CONFIG_RISCV_M_MODE + /* + * When running is M-Mode (no MMU config), MPIE does not get set. + * As a result, we need to force enable interrupts here because + * handle_exception did not do set SR_IE as it always sees SR_PIE + * being cleared. + */ + csrs CSR_STATUS, SR_IE +#endif #if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) /* Recover a0 - a7 for system calls */ REG_L a0, PT_A0(sp) -- cgit 1.4.1 From ac7996d680d8b4a51bb99bbdcee3dc838b985498 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Jan 2021 12:39:16 +0000 Subject: octeontx2-af: fix memory leak of lmac and lmac->name Currently the error return paths don't kfree lmac and lmac->name leading to some memory leaks. Fix this by adding two error return paths that kfree these objects Addresses-Coverity: ("Resource leak") Fixes: 1463f382f58d ("octeontx2-af: Add support for CGX link management") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210107123916.189748-1-colin.king@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 7d0f96290943..1a8f5a039d50 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -871,8 +871,10 @@ static int cgx_lmac_init(struct cgx *cgx) if (!lmac) return -ENOMEM; lmac->name = kcalloc(1, sizeof("cgx_fwi_xxx_yyy"), GFP_KERNEL); - if (!lmac->name) - return -ENOMEM; + if (!lmac->name) { + err = -ENOMEM; + goto err_lmac_free; + } sprintf(lmac->name, "cgx_fwi_%d_%d", cgx->cgx_id, i); lmac->lmac_id = i; lmac->cgx = cgx; @@ -883,7 +885,7 @@ static int cgx_lmac_init(struct cgx *cgx) CGX_LMAC_FWI + i * 9), cgx_fwi_event_handler, 0, lmac->name, lmac); if (err) - return err; + goto err_irq; /* Enable interrupt */ cgx_write(cgx, lmac->lmac_id, CGXX_CMRX_INT_ENA_W1S, @@ -895,6 +897,12 @@ static int cgx_lmac_init(struct cgx *cgx) } return cgx_lmac_verify_fwi_version(cgx); + +err_irq: + kfree(lmac->name); +err_lmac_free: + kfree(lmac); + return err; } static int cgx_lmac_exit(struct cgx *cgx) -- cgit 1.4.1 From 07e61a979ca4dddb3661f59328b3cd109f6b0070 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Jan 2021 16:48:21 +0200 Subject: nexthop: Fix off-by-one error in error path A reference was not taken for the current nexthop entry, so do not try to put it in the error path. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5e1b22d4f939..f8035cfa9c20 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1459,7 +1459,7 @@ static struct nexthop *nexthop_create_group(struct net *net, return nh; out_no_nh: - for (; i >= 0; --i) + for (i--; i >= 0; --i) nexthop_put(nhg->nh_entries[i].nh); kfree(nhg->spare); -- cgit 1.4.1 From 7b01e53eee6dce7a8a6736e06b99b68cd0cc7a27 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Jan 2021 16:48:22 +0200 Subject: nexthop: Unlink nexthop group entry in error path In case of error, remove the nexthop group entry from the list to which it was previously added. Fixes: 430a049190de ("nexthop: Add support for nexthop groups") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f8035cfa9c20..712cdc061cde 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1459,8 +1459,10 @@ static struct nexthop *nexthop_create_group(struct net *net, return nh; out_no_nh: - for (i--; i >= 0; --i) + for (i--; i >= 0; --i) { + list_del(&nhg->nh_entries[i].nh_list); nexthop_put(nhg->nh_entries[i].nh); + } kfree(nhg->spare); kfree(nhg); -- cgit 1.4.1 From b19218b27f3477316d296e8bcf4446aaf017aa69 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 7 Jan 2021 16:48:23 +0200 Subject: nexthop: Bounce NHA_GATEWAY in FDB nexthop groups The function nh_check_attr_group() is called to validate nexthop groups. The intention of that code seems to have been to bounce all attributes above NHA_GROUP_TYPE except for NHA_FDB. However instead it bounces all these attributes except when NHA_FDB attribute is present--then it accepts them. NHA_FDB validation that takes place before, in rtm_to_nh_config(), already bounces NHA_OIF, NHA_BLACKHOLE, NHA_ENCAP and NHA_ENCAP_TYPE. Yet further back, NHA_GROUPS and NHA_MASTER are bounced unconditionally. But that still leaves NHA_GATEWAY as an attribute that would be accepted in FDB nexthop groups (with no meaning), so long as it keeps the address family as unspecified: # ip nexthop add id 1 fdb via 127.0.0.1 # ip nexthop add id 10 fdb via default group 1 The nexthop code is still relatively new and likely not used very broadly, and the FDB bits are newer still. Even though there is a reproducer out there, it relies on an improbable gateway arguments "via default", "via all" or "via any". Given all this, I believe it is OK to reformulate the condition to do the right thing and bounce NHA_GATEWAY. Fixes: 38428d68719c ("nexthop: support for fdb ecmp nexthops") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 712cdc061cde..e53e43aef785 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -627,7 +627,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - if (tb[NHA_FDB]) + if (i == NHA_FDB) continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); -- cgit 1.4.1 From a5c9ca76a1c61fb5e4c35de8eb25aa925b03c9e4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 7 Jan 2021 16:48:24 +0200 Subject: selftests: fib_nexthops: Fix wrong mausezahn invocation For IPv6 traffic, mausezahn needs to be invoked with '-6'. Otherwise an error is returned: # ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" Failed to set source IPv4 address. Please check if source is set to a valid IPv4 address. Invalid command line parameters! Fixes: 7c741868ceab ("selftests: Add torture tests to nexthop tests") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_nexthops.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index eb693a3b7b4a..4c7d33618437 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -869,7 +869,7 @@ ipv6_torture() pid3=$! ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 -- cgit 1.4.1 From 0b9902c1fcc59ba75268386c0420a554f8844168 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 7 Jan 2021 18:24:40 +0100 Subject: s390/qeth: fix deadlock during recovery When qeth_dev_layer2_store() - holding the discipline_mutex - waits inside qeth_l*_remove_device() for a qeth_do_reset() thread to complete, we can hit a deadlock if qeth_do_reset() concurrently calls qeth_set_online() and thus tries to aquire the discipline_mutex. Move the discipline_mutex locking outside of qeth_set_online() and qeth_set_offline(), and turn the discipline into a parameter so that callers understand the dependency. To fix the deadlock, we can now relax the locking: As already established, qeth_l*_remove_device() waits for qeth_do_reset() to complete. So qeth_do_reset() itself is under no risk of having card->discipline ripped out while it's running, and thus doesn't need to take the discipline_mutex. Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core.h | 3 ++- drivers/s390/net/qeth_core_main.c | 35 ++++++++++++++++++++++------------- drivers/s390/net/qeth_l2_main.c | 7 +++++-- drivers/s390/net/qeth_l3_main.c | 7 +++++-- 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 6f5ddc3eab8c..28f637042d44 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1079,7 +1079,8 @@ struct qeth_card *qeth_get_card_by_busid(char *bus_id); void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask); int qeth_threads_running(struct qeth_card *, unsigned long); -int qeth_set_offline(struct qeth_card *card, bool resetting); +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting); int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *, int (*reply_cb) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index f4b60294a969..d45e223fc521 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5507,12 +5507,12 @@ out: return rc; } -static int qeth_set_online(struct qeth_card *card) +static int qeth_set_online(struct qeth_card *card, + const struct qeth_discipline *disc) { bool carrier_ok; int rc; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 2, "setonlin"); @@ -5529,7 +5529,7 @@ static int qeth_set_online(struct qeth_card *card) /* no need for locking / error handling at this early stage: */ qeth_set_real_num_tx_queues(card, qeth_tx_actual_queues(card)); - rc = card->discipline->set_online(card, carrier_ok); + rc = disc->set_online(card, carrier_ok); if (rc) goto err_online; @@ -5537,7 +5537,6 @@ static int qeth_set_online(struct qeth_card *card) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; err_online: @@ -5552,15 +5551,14 @@ err_hardsetup: qdio_free(CARD_DDEV(card)); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return rc; } -int qeth_set_offline(struct qeth_card *card, bool resetting) +int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc, + bool resetting) { int rc, rc2, rc3; - mutex_lock(&card->discipline_mutex); mutex_lock(&card->conf_mutex); QETH_CARD_TEXT(card, 3, "setoffl"); @@ -5581,7 +5579,7 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) cancel_work_sync(&card->rx_mode_work); - card->discipline->set_offline(card); + disc->set_offline(card); qeth_qdio_clear_card(card, 0); qeth_drain_output_queues(card); @@ -5602,16 +5600,19 @@ int qeth_set_offline(struct qeth_card *card, bool resetting) kobject_uevent(&card->gdev->dev.kobj, KOBJ_CHANGE); mutex_unlock(&card->conf_mutex); - mutex_unlock(&card->discipline_mutex); return 0; } EXPORT_SYMBOL_GPL(qeth_set_offline); static int qeth_do_reset(void *data) { + const struct qeth_discipline *disc; struct qeth_card *card = data; int rc; + /* Lock-free, other users will block until we are done. */ + disc = card->discipline; + QETH_CARD_TEXT(card, 2, "recover1"); if (!qeth_do_run_thread(card, QETH_RECOVER_THREAD)) return 0; @@ -5619,8 +5620,8 @@ static int qeth_do_reset(void *data) dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_offline(card, true); - rc = qeth_set_online(card); + qeth_set_offline(card, disc, true); + rc = qeth_set_online(card, disc); if (!rc) { dev_info(&card->gdev->dev, "Device successfully recovered!\n"); @@ -6647,7 +6648,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) } } - rc = qeth_set_online(card); + mutex_lock(&card->discipline_mutex); + rc = qeth_set_online(card, card->discipline); + mutex_unlock(&card->discipline_mutex); + err: return rc; } @@ -6655,8 +6659,13 @@ err: static int qeth_core_set_offline(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + int rc; - return qeth_set_offline(card, false); + mutex_lock(&card->discipline_mutex); + rc = qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + + return rc; } static void qeth_core_shutdown(struct ccwgroup_device *gdev) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 4ed0fb0705a5..37279b1e29f6 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2207,8 +2207,11 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (gdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + if (gdev->state == CCWGROUP_ONLINE) { + mutex_lock(&card->discipline_mutex); + qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d138ac432d01..8d474179ce98 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1970,8 +1970,11 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (cgdev->state == CCWGROUP_ONLINE) - qeth_set_offline(card, false); + if (cgdev->state == CCWGROUP_ONLINE) { + mutex_lock(&card->discipline_mutex); + qeth_set_offline(card, card->discipline, false); + mutex_unlock(&card->discipline_mutex); + } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) -- cgit 1.4.1 From b41b554c1ee75070a14c02a88496b1f231c7eacc Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 7 Jan 2021 18:24:41 +0100 Subject: s390/qeth: fix locking for discipline setup / removal Due to insufficient locking, qeth_core_set_online() and qeth_dev_layer2_store() can run in parallel, both attempting to load & setup the discipline (and stepping on each other toes along the way). A similar race can also occur between qeth_core_remove_device() and qeth_dev_layer2_store(). Access to .discipline is meant to be protected by the discipline_mutex, so add/expand the locking in qeth_core_remove_device() and qeth_core_set_online(). Adjust the locking in qeth_l*_remove_device() accordingly, as it's now handled by the callers in a consistent manner. Based on an initial patch by Ursula Braun. Fixes: 9dc48ccc68b9 ("qeth: serialize sysfs-triggered device configurations") Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core_main.c | 7 +++++-- drivers/s390/net/qeth_l2_main.c | 5 +---- drivers/s390/net/qeth_l3_main.c | 5 +---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index d45e223fc521..cf18d87da41e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6585,6 +6585,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) break; default: card->info.layer_enforced = true; + /* It's so early that we don't need the discipline_mutex yet. */ rc = qeth_core_load_discipline(card, enforced_disc); if (rc) goto err_load; @@ -6617,10 +6618,12 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev) QETH_CARD_TEXT(card, 2, "removedv"); + mutex_lock(&card->discipline_mutex); if (card->discipline) { card->discipline->remove(gdev); qeth_core_free_discipline(card); } + mutex_unlock(&card->discipline_mutex); qeth_free_qdio_queues(card); @@ -6635,6 +6638,7 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) int rc = 0; enum qeth_discipline_id def_discipline; + mutex_lock(&card->discipline_mutex); if (!card->discipline) { def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 : QETH_DISCIPLINE_LAYER2; @@ -6648,11 +6652,10 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) } } - mutex_lock(&card->discipline_mutex); rc = qeth_set_online(card, card->discipline); - mutex_unlock(&card->discipline_mutex); err: + mutex_unlock(&card->discipline_mutex); return rc; } diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 37279b1e29f6..4254caf1d9b6 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2207,11 +2207,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (gdev->state == CCWGROUP_ONLINE) { - mutex_lock(&card->discipline_mutex); + if (gdev->state == CCWGROUP_ONLINE) qeth_set_offline(card, card->discipline, false); - mutex_unlock(&card->discipline_mutex); - } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 8d474179ce98..6970597bc885 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1970,11 +1970,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (cgdev->state == CCWGROUP_ONLINE) { - mutex_lock(&card->discipline_mutex); + if (cgdev->state == CCWGROUP_ONLINE) qeth_set_offline(card, card->discipline, false); - mutex_unlock(&card->discipline_mutex); - } cancel_work_sync(&card->close_dev_work); if (card->dev->reg_state == NETREG_REGISTERED) -- cgit 1.4.1 From f9c4845385c8f6631ebd5dddfb019ea7a285fba4 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 7 Jan 2021 18:24:42 +0100 Subject: s390/qeth: fix L2 header access in qeth_l3_osa_features_check() ip_finish_output_gso() may call .ndo_features_check() even before the skb has a L2 header. This conflicts with qeth_get_ip_version()'s attempt to inspect the L2 header via vlan_eth_hdr(). Switch to vlan_get_protocol(), as already used further down in the common qeth_features_check() path. Fixes: f13ade199391 ("s390/qeth: run non-offload L3 traffic over common xmit path") Signed-off-by: Julian Wiedmann Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_l3_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 6970597bc885..4c2cae7ae9a7 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1813,7 +1813,7 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { - if (qeth_get_ip_version(skb) != 4) + if (vlan_get_protocol(skb) != htons(ETH_P_IP)) features &= ~NETIF_F_HW_VLAN_CTAG_TX; return qeth_features_check(skb, dev, features); } -- cgit 1.4.1 From 3545454c7801e391b0d966f82c98614d45394770 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Thu, 7 Jan 2021 20:58:18 +0100 Subject: net: dsa: lantiq_gswip: Exclude RMII from modes that report 1 GbE Exclude RMII from modes that report 1 GbE support. Reduced MII supports up to 100 MbE. Fixes: 14fceff4771e ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210107195818.3878-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lantiq_gswip.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 4b36d89bec06..662e68a0e7e6 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1436,11 +1436,12 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port, phylink_set(mask, Pause); phylink_set(mask, Asym_Pause); - /* With the exclusion of MII and Reverse MII, we support Gigabit, - * including Half duplex + /* With the exclusion of MII, Reverse MII and Reduced MII, we + * support Gigabit, including Half duplex */ if (state->interface != PHY_INTERFACE_MODE_MII && - state->interface != PHY_INTERFACE_MODE_REVMII) { + state->interface != PHY_INTERFACE_MODE_REVMII && + state->interface != PHY_INTERFACE_MODE_RMII) { phylink_set(mask, 1000baseT_Full); phylink_set(mask, 1000baseT_Half); } -- cgit 1.4.1 From 2d2f6f1b4799428d160c021dd652bc3e3593945e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jan 2021 19:36:40 +0100 Subject: block: pre-initialize struct block_device in bdev_alloc_inode bdev_evict_inode and bdev_free_inode are also called for the root inode of bdevfs, for which bdev_alloc is never called. Move the zeroing o f struct block_device and the initialization of the bd_bdi field into bdev_alloc_inode to make sure they are initialized for the root inode as well. Fixes: e6cb53827ed6 ("block: initialize struct block_device in bdev_alloc") Reported-by: Alexey Kardashevskiy Tested-by: Alexey Kardashevskiy Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index e454c5a81043..3b8963e228a1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -776,8 +776,11 @@ static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); + if (!ei) return NULL; + memset(&ei->bdev, 0, sizeof(ei->bdev)); + ei->bdev.bd_bdi = &noop_backing_dev_info; return &ei->vfs_inode; } @@ -871,14 +874,12 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) mapping_set_gfp_mask(&inode->i_data, GFP_USER); bdev = I_BDEV(inode); - memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); bdev->bd_disk = disk; bdev->bd_partno = partno; bdev->bd_inode = inode; - bdev->bd_bdi = &noop_backing_dev_info; #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif -- cgit 1.4.1 From ae28d1aae48a1258bd09a6f707ebb4231d79a761 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 17 Dec 2020 14:31:18 -0800 Subject: x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR Currently, when moving a task to a resource group the PQR_ASSOC MSR is updated with the new closid and rmid in an added task callback. If the task is running, the work is run as soon as possible. If the task is not running, the work is executed later in the kernel exit path when the kernel returns to the task again. Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task is running is the right thing to do. Queueing work for a task that is not running is unnecessary (the PQR_ASSOC MSR is already updated when the task is scheduled in) and causing system resource waste with the way in which it is implemented: Work to update the PQR_ASSOC register is queued every time the user writes a task id to the "tasks" file, even if the task already belongs to the resource group. This could result in multiple pending work items associated with a single task even if they are all identical and even though only a single update with most recent values is needed. Specifically, even if a task is moved between different resource groups while it is sleeping then it is only the last move that is relevant but yet a work item is queued during each move. This unnecessary queueing of work items could result in significant system resource waste, especially on tasks sleeping for a long time. For example, as demonstrated by Shakeel Butt in [1] writing the same task id to the "tasks" file can quickly consume significant memory. The same problem (wasted system resources) occurs when moving a task between different resource groups. As pointed out by Valentin Schneider in [2] there is an additional issue with the way in which the queueing of work is done in that the task_struct update is currently done after the work is queued, resulting in a race with the register update possibly done before the data needed by the update is available. To solve these issues, update the PQR_ASSOC MSR in a synchronous way right after the new closid and rmid are ready during the task movement, only if the task is running. If a moved task is not running nothing is done since the PQR_ASSOC MSR will be updated next time the task is scheduled. This is the same way used to update the register when tasks are moved as part of resource group removal. [1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ [2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com [ bp: Massage commit message and drop the two update_task_closid_rmid() variants. ] Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Reported-by: Shakeel Butt Reported-by: Valentin Schneider Signed-off-by: Fenghua Yu Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: James Morse Reviewed-by: Valentin Schneider Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 112 +++++++++++++-------------------- 1 file changed, 43 insertions(+), 69 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 29ffb95b25ff..1c6f8a60ac52 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -525,89 +525,63 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp) kfree(rdtgrp); } -struct task_move_callback { - struct callback_head work; - struct rdtgroup *rdtgrp; -}; - -static void move_myself(struct callback_head *head) +static void _update_task_closid_rmid(void *task) { - struct task_move_callback *callback; - struct rdtgroup *rdtgrp; - - callback = container_of(head, struct task_move_callback, work); - rdtgrp = callback->rdtgrp; - /* - * If resource group was deleted before this task work callback - * was invoked, then assign the task to root group and free the - * resource group. + * If the task is still current on this CPU, update PQR_ASSOC MSR. + * Otherwise, the MSR is updated when the task is scheduled in. */ - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - current->closid = 0; - current->rmid = 0; - rdtgroup_remove(rdtgrp); - } - - if (unlikely(current->flags & PF_EXITING)) - goto out; - - preempt_disable(); - /* update PQR_ASSOC MSR to make resource group go into effect */ - resctrl_sched_in(); - preempt_enable(); + if (task == current) + resctrl_sched_in(); +} -out: - kfree(callback); +static void update_task_closid_rmid(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) + smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); + else + _update_task_closid_rmid(t); } static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - int ret; - - callback = kzalloc(sizeof(*callback), GFP_KERNEL); - if (!callback) - return -ENOMEM; - callback->work.func = move_myself; - callback->rdtgrp = rdtgrp; - /* - * Take a refcount, so rdtgrp cannot be freed before the - * callback has been invoked. + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. + * + * For ctrl_mon groups, move both closid and rmid. + * For monitor groups, can move the tasks only from + * their parent CTRL group. */ - atomic_inc(&rdtgrp->waitcount); - ret = task_work_add(tsk, &callback->work, TWA_RESUME); - if (ret) { - /* - * Task is exiting. Drop the refcount and free the callback. - * No need to check the refcount as the group cannot be - * deleted before the write function unlocks rdtgroup_mutex. - */ - atomic_dec(&rdtgrp->waitcount); - kfree(callback); - rdt_last_cmd_puts("Task exited\n"); - } else { - /* - * For ctrl_mon groups move both closid and rmid. - * For monitor groups, can move the tasks only from - * their parent CTRL group. - */ - if (rdtgrp->type == RDTCTRL_GROUP) { - tsk->closid = rdtgrp->closid; + + if (rdtgrp->type == RDTCTRL_GROUP) { + tsk->closid = rdtgrp->closid; + tsk->rmid = rdtgrp->mon.rmid; + } else if (rdtgrp->type == RDTMON_GROUP) { + if (rdtgrp->mon.parent->closid == tsk->closid) { tsk->rmid = rdtgrp->mon.rmid; - } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) { - tsk->rmid = rdtgrp->mon.rmid; - } else { - rdt_last_cmd_puts("Can't move task to different control group\n"); - ret = -EINVAL; - } + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; } } - return ret; + + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. + */ + barrier(); + + /* + * By now, the task's closid and rmid are set. If the task is current + * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource + * group go into effect. If the task is not current, the MSR will be + * updated when the task is scheduled in. + */ + update_task_closid_rmid(tsk); + + return 0; } static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) -- cgit 1.4.1 From a0195f314a25582b38993bf30db11c300f4f4611 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 17 Dec 2020 14:31:19 -0800 Subject: x86/resctrl: Don't move a task to the same resource group Shakeel Butt reported in [1] that a user can request a task to be moved to a resource group even if the task is already in the group. It just wastes time to do the move operation which could be costly to send IPI to a different CPU. Add a sanity check to ensure that the move operation only happens when the task is not already in the resource group. [1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Reported-by: Shakeel Butt Signed-off-by: Fenghua Yu Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1c6f8a60ac52..460f3e0df106 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -546,6 +546,13 @@ static void update_task_closid_rmid(struct task_struct *t) static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { + /* If the task is already in rdtgrp, no need to move the task. */ + if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && + tsk->rmid == rdtgrp->mon.rmid) || + (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && + tsk->closid == rdtgrp->mon.parent->closid)) + return 0; + /* * Set the task's closid/rmid before the PQR_ASSOC MSR can be * updated by them. -- cgit 1.4.1 From 872f36eb0b0f4f0e3a81ea1e51a6bdf58ccfdc6e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Jan 2021 05:54:44 -0500 Subject: KVM: x86: __kvm_vcpu_halt can be static Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0287840b93e0..a480804ae27a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7976,7 +7976,7 @@ void kvm_arch_exit(void) kmem_cache_destroy(x86_fpu_cache); } -int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) +static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) { ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { -- cgit 1.4.1 From e400071a805d6229223a98899e9da8c6233704a1 Mon Sep 17 00:00:00 2001 From: Filipe Laíns Date: Mon, 4 Jan 2021 20:47:17 +0000 Subject: HID: logitech-dj: add the G602 receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested. The device gets correctly exported to userspace and I can see mouse and keyboard events. Signed-off-by: Filipe Laíns Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 1ffcfc9a1e03..45e7e0bdd382 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1869,6 +1869,10 @@ static const struct hid_device_id logi_dj_receivers[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xc531), .driver_data = recvr_type_gaming_hidpp}, + { /* Logitech G602 receiver (0xc537) */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, + 0xc537), + .driver_data = recvr_type_gaming_hidpp}, { /* Logitech lightspeed receiver (0xc539) */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1), -- cgit 1.4.1 From 74acfa996b2aec2a4ea8587104c7e2f8d4c6aec2 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:30 +0100 Subject: block/rnbd: Select SG_POOL for RNBD_CLIENT lkp reboot following build error: drivers/block/rnbd/rnbd-clt.c: In function 'rnbd_softirq_done_fn': >> drivers/block/rnbd/rnbd-clt.c:387:2: error: implicit declaration of function 'sg_free_table_chained' [-Werror=implicit-function-declaration] 387 | sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); | ^~~~~~~~~~~~~~~~~~~~~ The reason is CONFIG_SG_POOL is not enabled in the config, to avoid such failure, select SG_POOL in Kconfig for RNBD_CLIENT. Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu") Reported-by: kernel test robot Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnbd/Kconfig b/drivers/block/rnbd/Kconfig index 4b6d3d816d1f..2ff05a0d2646 100644 --- a/drivers/block/rnbd/Kconfig +++ b/drivers/block/rnbd/Kconfig @@ -7,6 +7,7 @@ config BLK_DEV_RNBD_CLIENT tristate "RDMA Network Block Device driver client" depends on INFINIBAND_RTRS_CLIENT select BLK_DEV_RNBD + select SG_POOL help RNBD client is a network block device driver using rdma transport. -- cgit 1.4.1 From 1a84e7c629f8f288e02236bc799f9b0be1cab4a7 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:31 +0100 Subject: block/rnbd-srv: Fix use after free in rnbd_srv_sess_dev_force_close KASAN detect following BUG: [ 778.215311] ================================================================== [ 778.216696] BUG: KASAN: use-after-free in rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.219037] Read of size 8 at addr ffff88b1d6516c28 by task tee/8842 [ 778.220500] CPU: 37 PID: 8842 Comm: tee Kdump: loaded Not tainted 5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71 [ 778.220529] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020 [ 778.220555] Call Trace: [ 778.220609] dump_stack+0x99/0xcb [ 778.220667] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220715] print_address_description.constprop.7+0x1e/0x230 [ 778.220750] ? freeze_kernel_threads+0x73/0x73 [ 778.220896] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220932] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220994] kasan_report.cold.9+0x37/0x7c [ 778.221066] ? kobject_put+0x80/0x270 [ 778.221102] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.221184] rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.221240] rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server] [ 778.221304] ? sysfs_file_ops+0x90/0x90 [ 778.221353] kernfs_fop_write+0x141/0x240 [ 778.221451] vfs_write+0x142/0x4d0 [ 778.221553] ksys_write+0xc0/0x160 [ 778.221602] ? __ia32_sys_read+0x50/0x50 [ 778.221684] ? lockdep_hardirqs_on_prepare+0x13d/0x210 [ 778.221718] ? syscall_enter_from_user_mode+0x1c/0x50 [ 778.221821] do_syscall_64+0x33/0x40 [ 778.221862] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 778.221896] RIP: 0033:0x7f4affdd9504 [ 778.221928] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 778.221956] RSP: 002b:00007fffebb36b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 778.222011] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f4affdd9504 [ 778.222038] RDX: 0000000000000002 RSI: 00007fffebb36c50 RDI: 0000000000000003 [ 778.222066] RBP: 00007fffebb36c50 R08: 0000556a151aa600 R09: 00007f4affeb1540 [ 778.222094] R10: fffffffffffffc19 R11: 0000000000000246 R12: 0000556a151aa520 [ 778.222121] R13: 0000000000000002 R14: 00007f4affea6760 R15: 0000000000000002 [ 778.222764] Allocated by task 3212: [ 778.223285] kasan_save_stack+0x19/0x40 [ 778.223316] __kasan_kmalloc.constprop.7+0xc1/0xd0 [ 778.223347] kmem_cache_alloc_trace+0x186/0x350 [ 778.223382] rnbd_srv_rdma_ev+0xf16/0x1690 [rnbd_server] [ 778.223422] process_io_req+0x4d1/0x670 [rtrs_server] [ 778.223573] __ib_process_cq+0x10a/0x350 [ib_core] [ 778.223709] ib_cq_poll_work+0x31/0xb0 [ib_core] [ 778.223743] process_one_work+0x521/0xa90 [ 778.223773] worker_thread+0x65/0x5b0 [ 778.223802] kthread+0x1f2/0x210 [ 778.223833] ret_from_fork+0x22/0x30 [ 778.224296] Freed by task 8842: [ 778.224800] kasan_save_stack+0x19/0x40 [ 778.224829] kasan_set_track+0x1c/0x30 [ 778.224860] kasan_set_free_info+0x1b/0x30 [ 778.224889] __kasan_slab_free+0x108/0x150 [ 778.224919] slab_free_freelist_hook+0x64/0x190 [ 778.224947] kfree+0xe2/0x650 [ 778.224982] rnbd_destroy_sess_dev+0x2fa/0x3b0 [rnbd_server] [ 778.225011] kobject_put+0xda/0x270 [ 778.225046] rnbd_srv_sess_dev_force_close+0x30/0x60 [rnbd_server] [ 778.225081] rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server] [ 778.225111] kernfs_fop_write+0x141/0x240 [ 778.225140] vfs_write+0x142/0x4d0 [ 778.225169] ksys_write+0xc0/0x160 [ 778.225198] do_syscall_64+0x33/0x40 [ 778.225227] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 778.226506] The buggy address belongs to the object at ffff88b1d6516c00 which belongs to the cache kmalloc-512 of size 512 [ 778.227464] The buggy address is located 40 bytes inside of 512-byte region [ffff88b1d6516c00, ffff88b1d6516e00) The problem is in the sess_dev release function we call rnbd_destroy_sess_dev, and could free the sess_dev already, but we still set the keep_id in rnbd_srv_sess_dev_force_close, which lead to use after free. To fix it, move the keep_id before the sysfs removal, and cache the rnbd_srv_session for lock accessing, Fixes: 786998050cbc ("block/rnbd-srv: close a mapped device from server side.") Signed-off-by: Jack Wang Reviewed-by: Guoqing Jiang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index b8e44331e494..a6a68d44f517 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -338,10 +338,12 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev) { - mutex_lock(&sess_dev->sess->lock); - rnbd_srv_destroy_dev_session_sysfs(sess_dev); - mutex_unlock(&sess_dev->sess->lock); + struct rnbd_srv_session *sess = sess_dev->sess; + sess_dev->keep_id = true; + mutex_lock(&sess->lock); + rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&sess->lock); } static int process_msg_close(struct rtrs_srv *rtrs, -- cgit 1.4.1 From 80f99093d81370c5cec37fca3b5a6bdf6bddf0f6 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 8 Jan 2021 15:36:32 +0100 Subject: block/rnbd-clt: Fix sg table use after free Since dynamically allocate sglist is used for rnbd_iu, we can't free sg table after send_usr_msg since the callback function (cqe.done) could still access the sglist. Otherwise KASAN reports UAF issue: [ 4856.600257] BUG: KASAN: use-after-free in dma_direct_unmap_sg+0x53/0x290 [ 4856.600772] Read of size 4 at addr ffff888206af3a98 by task swapper/1/0 [ 4856.601729] CPU: 1 PID: 0 Comm: swapper/1 Kdump: loaded Tainted: G W 5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71 [ 4856.601748] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020 [ 4856.601766] Call Trace: [ 4856.601785] [ 4856.601822] dump_stack+0x99/0xcb [ 4856.601856] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.601888] print_address_description.constprop.7+0x1e/0x230 [ 4856.601913] ? freeze_kernel_threads+0x73/0x73 [ 4856.601965] ? mark_held_locks+0x29/0xa0 [ 4856.602019] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602039] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602079] kasan_report.cold.9+0x37/0x7c [ 4856.602188] ? mlx5_ib_post_recv+0x430/0x520 [mlx5_ib] [ 4856.602209] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602256] dma_direct_unmap_sg+0x53/0x290 [ 4856.602366] complete_rdma_req+0x188/0x4b0 [rtrs_client] [ 4856.602451] ? rtrs_clt_close+0x80/0x80 [rtrs_client] [ 4856.602535] ? mlx5_ib_poll_cq+0x48b/0x16e0 [mlx5_ib] [ 4856.602589] ? radix_tree_insert+0x3a0/0x3a0 [ 4856.602610] ? do_raw_spin_lock+0x119/0x1d0 [ 4856.602647] ? rwlock_bug.part.1+0x60/0x60 [ 4856.602740] rtrs_clt_rdma_done+0x3f7/0x670 [rtrs_client] [ 4856.602804] ? rtrs_clt_rdma_cm_handler+0xda0/0xda0 [rtrs_client] [ 4856.602857] ? check_flags.part.31+0x6c/0x1f0 [ 4856.602927] ? rcu_read_lock_sched_held+0xaf/0xe0 [ 4856.602963] ? rcu_read_lock_bh_held+0xc0/0xc0 [ 4856.603137] __ib_process_cq+0x10a/0x350 [ib_core] [ 4856.603309] ib_poll_handler+0x41/0x1c0 [ib_core] [ 4856.603358] irq_poll_softirq+0xe6/0x280 [ 4856.603392] ? lockdep_hardirqs_on_prepare+0x111/0x210 [ 4856.603446] __do_softirq+0x10d/0x646 [ 4856.603540] asm_call_irq_on_stack+0x12/0x20 [ 4856.603563] [ 4856.605096] Allocated by task 8914: [ 4856.605510] kasan_save_stack+0x19/0x40 [ 4856.605532] __kasan_kmalloc.constprop.7+0xc1/0xd0 [ 4856.605552] __kmalloc+0x155/0x320 [ 4856.605574] __sg_alloc_table+0x155/0x1c0 [ 4856.605594] sg_alloc_table+0x1f/0x50 [ 4856.605620] send_msg_sess_info+0x119/0x2e0 [rnbd_client] [ 4856.605646] remap_devs+0x71/0x210 [rnbd_client] [ 4856.605676] init_sess+0xad8/0xe10 [rtrs_client] [ 4856.605706] rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client] [ 4856.605728] process_one_work+0x521/0xa90 [ 4856.605748] worker_thread+0x65/0x5b0 [ 4856.605769] kthread+0x1f2/0x210 [ 4856.605789] ret_from_fork+0x22/0x30 [ 4856.606159] Freed by task 8914: [ 4856.606559] kasan_save_stack+0x19/0x40 [ 4856.606580] kasan_set_track+0x1c/0x30 [ 4856.606601] kasan_set_free_info+0x1b/0x30 [ 4856.606622] __kasan_slab_free+0x108/0x150 [ 4856.606642] slab_free_freelist_hook+0x64/0x190 [ 4856.606661] kfree+0xe2/0x650 [ 4856.606681] __sg_free_table+0xa4/0x100 [ 4856.606707] send_msg_sess_info+0x1d6/0x2e0 [rnbd_client] [ 4856.606733] remap_devs+0x71/0x210 [rnbd_client] [ 4856.606763] init_sess+0xad8/0xe10 [rtrs_client] [ 4856.606792] rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client] [ 4856.606813] process_one_work+0x521/0xa90 [ 4856.606833] worker_thread+0x65/0x5b0 [ 4856.606853] kthread+0x1f2/0x210 [ 4856.606872] ret_from_fork+0x22/0x30 The solution is to free iu's sgtable after the iu is not used anymore. And also move sg_alloc_table into rnbd_get_iu accordingly. Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu") Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 96e3f9fe8241..506e9094487f 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -375,12 +375,19 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, init_waitqueue_head(&iu->comp.wait); iu->comp.errno = INT_MAX; + if (sg_alloc_table(&iu->sgt, 1, GFP_KERNEL)) { + rnbd_put_permit(sess, permit); + kfree(iu); + return NULL; + } + return iu; } static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) { if (atomic_dec_and_test(&iu->refcount)) { + sg_free_table(&iu->sgt); rnbd_put_permit(sess, iu->permit); kfree(iu); } @@ -487,8 +494,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) iu->buf = NULL; iu->dev = dev; - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); - msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); msg.device_id = cpu_to_le32(device_id); @@ -502,7 +507,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -575,7 +579,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) iu->buf = rsp; iu->dev = dev; - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); @@ -594,7 +597,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -622,8 +624,6 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) iu->buf = rsp; iu->sess = sess; - - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); @@ -650,7 +650,6 @@ put_iu: } else { err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } -- cgit 1.4.1 From ef8048dd2345d070c41bc7df16763fd4d8fac296 Mon Sep 17 00:00:00 2001 From: Swapnil Ingle Date: Fri, 8 Jan 2021 15:36:33 +0100 Subject: block/rnbd: Adding name to the Contributors List Adding name to the Contributors List Signed-off-by: Swapnil Ingle Acked-by: Jack Wang Acked-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/README | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnbd/README b/drivers/block/rnbd/README index 1773c0aa0bd4..080f58a5400a 100644 --- a/drivers/block/rnbd/README +++ b/drivers/block/rnbd/README @@ -90,3 +90,4 @@ Kleber Souza Lutz Pogrell Milind Dumbare Roman Penyaev +Swapnil Ingle -- cgit 1.4.1 From 3a21777c6ee99749bac10727b3c17e5bcfebe5c1 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:34 +0100 Subject: block/rnbd-clt: avoid module unload race with close confirmation We had kernel panic, it is caused by unload module and last close confirmation. call trace: [1196029.743127] free_sess+0x15/0x50 [rtrs_client] [1196029.743128] rtrs_clt_close+0x4c/0x70 [rtrs_client] [1196029.743129] ? rnbd_clt_unmap_device+0x1b0/0x1b0 [rnbd_client] [1196029.743130] close_rtrs+0x25/0x50 [rnbd_client] [1196029.743131] rnbd_client_exit+0x93/0xb99 [rnbd_client] [1196029.743132] __x64_sys_delete_module+0x190/0x260 And in the crashdump confirmation kworker is also running. PID: 6943 TASK: ffff9e2ac8098000 CPU: 4 COMMAND: "kworker/4:2" #0 [ffffb206cf337c30] __schedule at ffffffff9f93f891 #1 [ffffb206cf337cc8] schedule at ffffffff9f93fe98 #2 [ffffb206cf337cd0] schedule_timeout at ffffffff9f943938 #3 [ffffb206cf337d50] wait_for_completion at ffffffff9f9410a7 #4 [ffffb206cf337da0] __flush_work at ffffffff9f08ce0e #5 [ffffb206cf337e20] rtrs_clt_close_conns at ffffffffc0d5f668 [rtrs_client] #6 [ffffb206cf337e48] rtrs_clt_close at ffffffffc0d5f801 [rtrs_client] #7 [ffffb206cf337e68] close_rtrs at ffffffffc0d26255 [rnbd_client] #8 [ffffb206cf337e78] free_sess at ffffffffc0d262ad [rnbd_client] #9 [ffffb206cf337e88] rnbd_clt_put_dev at ffffffffc0d266a7 [rnbd_client] The problem is both code path try to close same session, which lead to panic. To fix it, just skip the sess if the refcount already drop to 0. Fixes: f7a7a5c228d4 ("block/rnbd: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 506e9094487f..45a470076652 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1697,7 +1697,8 @@ static void rnbd_destroy_sessions(void) */ list_for_each_entry_safe(sess, sn, &sess_list, list) { - WARN_ON(!rnbd_clt_get_sess(sess)); + if (!rnbd_clt_get_sess(sess)) + continue; close_rtrs(sess); list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { /* -- cgit 1.4.1 From 02f938e9fed1681791605ca8b96c2d9da9355f6a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 8 Jan 2021 16:55:37 +0800 Subject: blk-mq-debugfs: Add decode for BLK_MQ_F_TAG_HCTX_SHARED Showing the hctx flags for when BLK_MQ_F_TAG_HCTX_SHARED is set gives something like: root@debian:/home/john# more /sys/kernel/debug/block/sda/hctx0/flags alloc_policy=FIFO SHOULD_MERGE|TAG_QUEUE_SHARED|3 Add the decoding for that flag. Fixes: 32bc15afed04b ("blk-mq: Facilitate a shared sbitmap per tagset") Signed-off-by: John Garry Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e21eed20a155..712a95f74ccb 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -246,6 +246,7 @@ static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), + HCTX_FLAG_NAME(TAG_HCTX_SHARED), }; #undef HCTX_FLAG_NAME -- cgit 1.4.1 From bac717171971176b78c72d15a8b6961764ab197f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 16:20:05 +0100 Subject: ARM: picoxcell: fix missing interrupt-parent properties dtc points out that the interrupts for some devices are not parsable: picoxcell-pc3x2.dtsi:45.19-49.5: Warning (interrupts_property): /paxi/gem@30000: Missing interrupt-parent picoxcell-pc3x2.dtsi:51.21-55.5: Warning (interrupts_property): /paxi/dmac@40000: Missing interrupt-parent picoxcell-pc3x2.dtsi:57.21-61.5: Warning (interrupts_property): /paxi/dmac@50000: Missing interrupt-parent picoxcell-pc3x2.dtsi:233.21-237.5: Warning (interrupts_property): /rwid-axi/axi2pico@c0000000: Missing interrupt-parent There are two VIC instances, so it's not clear which one needs to be used. I found the BSP sources that reference VIC0, so use that: https://github.com/r1mikey/meta-picoxcell/blob/master/recipes-kernel/linux/linux-picochip-3.0/0001-picoxcell-support-for-Picochip-picoXcell-SoC.patch Acked-by: Jamie Iles Link: https://lore.kernel.org/r/20201230152010.3914962-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/picoxcell-pc3x2.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi index c4c6c7e9e37b..5898879a3038 100644 --- a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi +++ b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi @@ -45,18 +45,21 @@ emac: gem@30000 { compatible = "cadence,gem"; reg = <0x30000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <31>; }; dmac1: dmac@40000 { compatible = "snps,dw-dmac"; reg = <0x40000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <25>; }; dmac2: dmac@50000 { compatible = "snps,dw-dmac"; reg = <0x50000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <26>; }; @@ -233,6 +236,7 @@ axi2pico@c0000000 { compatible = "picochip,axi2pico-pc3x2"; reg = <0xc0000000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <13 14 15 16 17 18 19 20 21>; }; }; -- cgit 1.4.1 From 84e261553e6f919bf0b4d65244599ab2b41f1da5 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 7 Jan 2021 09:47:07 -0500 Subject: hwmon: (amd_energy) fix allocation of hwmon_channel_info config hwmon, specifically hwmon_num_channel_attrs, expects the config array in the hwmon_channel_info structure to be terminated by a zero entry. amd_energy does not honor this convention. As result, a KASAN warning is possible. Fix this by adding an additional entry and setting it to zero. Fixes: 8abee9566b7e ("hwmon: Add amd_energy driver to report energy counters") Signed-off-by: David Arcari Cc: Naveen Krishna Chatradhi Cc: Jean Delvare Cc: Guenter Roeck Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: David Arcari Acked-by: Naveen Krishna Chatradhi Link: https://lore.kernel.org/r/20210107144707.6927-1-darcari@redhat.com Signed-off-by: Guenter Roeck --- drivers/hwmon/amd_energy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c index 9b306448b7a0..822c2e74b98d 100644 --- a/drivers/hwmon/amd_energy.c +++ b/drivers/hwmon/amd_energy.c @@ -222,7 +222,7 @@ static int amd_create_sensor(struct device *dev, */ cpus = num_present_cpus() / num_siblings; - s_config = devm_kcalloc(dev, cpus + sockets, + s_config = devm_kcalloc(dev, cpus + sockets + 1, sizeof(u32), GFP_KERNEL); if (!s_config) return -ENOMEM; @@ -254,6 +254,7 @@ static int amd_create_sensor(struct device *dev, scnprintf(label_l[i], 10, "Esocket%u", (i - cpus)); } + s_config[i] = 0; return 0; } -- cgit 1.4.1 From e076ab2a2ca70a0270232067cd49f76cd92efe64 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 7 Jan 2021 17:08:30 -0500 Subject: btrfs: shrink delalloc pages instead of full inodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc") cleaned up how we do delalloc shrinking by utilizing some infrastructure we have in place to flush inodes that we use for device replace and snapshot. However this introduced a pretty serious performance regression. To reproduce the user untarred the source tarball of Firefox (360MiB xz compressed/1.5GiB uncompressed), and would see it take anywhere from 5 to 20 times as long to untar in 5.10 compared to 5.9. This was observed on fast devices (SSD and better) and not on HDD. The root cause is because before we would generally use the normal writeback path to reclaim delalloc space, and for this we would provide it with the number of pages we wanted to flush. The referenced commit changed this to flush that many inodes, which drastically increased the amount of space we were flushing in certain cases, which severely affected performance. We cannot revert this patch unfortunately because of 3d45f221ce62 ("btrfs: fix deadlock when cloning inline extent and low on free metadata space") which requires the ability to skip flushing inodes that are being cloned in certain scenarios, which means we need to keep using our flushing infrastructure or risk re-introducing the deadlock. Instead to fix this problem we can go back to providing btrfs_start_delalloc_roots with a number of pages to flush, and then set up a writeback_control and utilize sync_inode() to handle the flushing for us. This gives us the same behavior we had prior to the fix, while still allowing us to avoid the deadlock that was fixed by Filipe. I redid the users original test and got the following results on one of our test machines (256GiB of ram, 56 cores, 2TiB Intel NVMe drive) 5.9 0m54.258s 5.10 1m26.212s 5.10+patch 0m38.800s 5.10+patch is significantly faster than plain 5.9 because of my patch series "Change data reservations to use the ticketing infra" which contained the patch that introduced the regression, but generally improved the overall ENOSPC flushing mechanisms. Additional testing on consumer-grade SSD (8GiB ram, 8 CPU) confirm the results: 5.10.5 4m00s 5.10.5+patch 1m08s 5.11-rc2 5m14s 5.11-rc2+patch 1m30s Reported-by: René Rebe Fixes: 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc") CC: stable@vger.kernel.org # 5.10 Signed-off-by: Josef Bacik Tested-by: David Sterba Reviewed-by: David Sterba [ add my test results ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 60 ++++++++++++++++++++++++++++++++++++--------------- fs/btrfs/space-info.c | 4 +++- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 070716650df8..a8e0a6b038d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot, +static int start_delalloc_inodes(struct btrfs_root *root, + struct writeback_control *wbc, bool snapshot, bool in_reclaim_context) { struct btrfs_inode *binode; @@ -9399,6 +9400,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot struct list_head works; struct list_head splice; int ret = 0; + bool full_flush = wbc->nr_to_write == LONG_MAX; INIT_LIST_HEAD(&works); INIT_LIST_HEAD(&splice); @@ -9427,18 +9429,24 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot if (snapshot) set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &binode->runtime_flags); - work = btrfs_alloc_delalloc_work(inode); - if (!work) { - iput(inode); - ret = -ENOMEM; - goto out; - } - list_add_tail(&work->list, &works); - btrfs_queue_work(root->fs_info->flush_workers, - &work->work); - if (*nr != U64_MAX) { - (*nr)--; - if (*nr == 0) + if (full_flush) { + work = btrfs_alloc_delalloc_work(inode); + if (!work) { + iput(inode); + ret = -ENOMEM; + goto out; + } + list_add_tail(&work->list, &works); + btrfs_queue_work(root->fs_info->flush_workers, + &work->work); + } else { + ret = sync_inode(inode, wbc); + if (!ret && + test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + ret = sync_inode(inode, wbc); + btrfs_add_delayed_iput(inode); + if (ret || wbc->nr_to_write <= 0) goto out; } cond_resched(); @@ -9464,18 +9472,29 @@ out: int btrfs_start_delalloc_snapshot(struct btrfs_root *root) { + struct writeback_control wbc = { + .nr_to_write = LONG_MAX, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_fs_info *fs_info = root->fs_info; - u64 nr = U64_MAX; if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &nr, true, false); + return start_delalloc_inodes(root, &wbc, true, false); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, bool in_reclaim_context) { + struct writeback_control wbc = { + .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_root *root; struct list_head splice; int ret; @@ -9489,6 +9508,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice) && nr) { + /* + * Reset nr_to_write here so we know that we're doing a full + * flush. + */ + if (nr == U64_MAX) + wbc.nr_to_write = LONG_MAX; + root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_root(root); @@ -9497,9 +9523,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context); + ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); btrfs_put_root(root); - if (ret < 0) + if (ret < 0 || wbc.nr_to_write <= 0) goto out; spin_lock(&fs_info->delalloc_root_lock); } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 67e55c5479b8..e8347461c8dd 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -532,7 +532,9 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, loops = 0; while ((delalloc_bytes || dio_bytes) && loops < 3) { - btrfs_start_delalloc_roots(fs_info, items, true); + u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; + + btrfs_start_delalloc_roots(fs_info, nr_pages, true); loops++; if (wait_ordered && !trans) { -- cgit 1.4.1 From 7c38e769d5c508939ce5dc26df72602f3c902342 Mon Sep 17 00:00:00 2001 From: Seth Miller Date: Mon, 4 Jan 2021 22:58:12 -0600 Subject: HID: Ignore battery for Elan touchscreen on ASUS UX550 Battery status is being reported for the Elan touchscreen on ASUS UX550 laptops despite not having a batter. It always shows either 0 or 1%. Signed-off-by: Seth Miller Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4c5f23640f9c..5ba0aa1d2335 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -389,6 +389,7 @@ #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401 #define USB_DEVICE_ID_HP_X2 0x074d #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 +#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index dc7f6b4a775c..f23027d2795b 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -322,6 +322,8 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, {} }; -- cgit 1.4.1 From a91bd6223ecd46addc71ee6fcd432206d39365d2 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 8 Jan 2021 12:48:47 +0100 Subject: Revert "init/console: Use ttynull as a fallback when there is no console" This reverts commit 757055ae8dedf5333af17b3b5b4b70ba9bc9da4e. The commit caused that ttynull was used as the default console on several systems[1][2][3]. As a result, the console was blank even when a better alternative existed. It happened when there was no console configured on the command line and ttynull_init() was the first initcall calling register_console(). Or it happened when /dev/ did not exist when console_on_rootfs() was called. It was not able to open /dev/console even though a console driver was registered. It tried to add ttynull console but it obviously did not help. But ttynull became the preferred console and was used by /dev/console when it was available later. The commit tried to fix a historical problem that have been there for ages. The primary motivation was the commit 3cffa06aeef7ece30f6 ("printk/console: Allow to disable console output by using console="" or console=null"). It provided a clean solution for a workaround that was widely used and worked only by chance. This revert causes that the console="" or console=null command line options will again work only by chance. These options will cause that a particular console will be preferred and the default (tty) ones will not get enabled. There will be no console registered at all. As a result there won't be stdin, stdout, and stderr for the init process. But it worked exactly this way even before. The proper solution has to fulfill many conditions: + Register ttynull only when explicitly required or as the ultimate fallback. + ttynull should get associated with /dev/console but it must not become preferred console when used as a fallback. Especially, it must still be possible to replace it by a better console later. Such a change requires clean up of the register_console() code. Otherwise, it would be even harder to follow. Especially, the use of has_preferred_console and CON_CONSDEV flag is tricky. The clean up is risky. The ordering of consoles is not well defined. And any changes tend to break existing user settings. Do the revert at the least risky solution for now. [1] https://lore.kernel.org/linux-kselftest/20201221144302.GR4077@smile.fi.intel.com/ [2] https://lore.kernel.org/lkml/d2a3b3c0-e548-7dd1-730f-59bc5c04e191@synopsys.com/ [3] https://patchwork.ozlabs.org/project/linux-um/patch/20210105120128.10854-1-thomas@m3y3r.de/ Reported-by: Andy Shevchenko Reported-by: Vineet Gupta Reported-by: Thomas Meyer Signed-off-by: Petr Mladek Acked-by: Greg Kroah-Hartman Acked-by: Sergey Senozhatsky Signed-off-by: Linus Torvalds --- drivers/tty/Kconfig | 14 ++++++++++++++ drivers/tty/Makefile | 3 ++- drivers/tty/ttynull.c | 18 ------------------ include/linux/console.h | 3 --- init/main.c | 10 ++-------- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 47a6e42f0d04..e15cd6b5bb99 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -401,6 +401,20 @@ config MIPS_EJTAG_FDC_KGDB_CHAN help FDC channel number to use for KGDB. +config NULL_TTY + tristate "NULL TTY driver" + help + Say Y here if you want a NULL TTY which simply discards messages. + + This is useful to allow userspace applications which expect a console + device to work without modifications even when no console is + available or desired. + + In order to use this driver, you should redirect the console to this + TTY, or boot the kernel with console=ttynull. + + If unsure, say N. + config TRACE_ROUTER tristate "Trace data router for MIPI P1149.7 cJTAG standard" depends on TRACE_SINK diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile index 3c1c5a9240a7..b3ccae932660 100644 --- a/drivers/tty/Makefile +++ b/drivers/tty/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_TTY) += tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \ tty_buffer.o tty_port.o tty_mutex.o \ tty_ldsem.o tty_baudrate.o tty_jobctrl.o \ - n_null.o ttynull.o + n_null.o obj-$(CONFIG_LEGACY_PTYS) += pty.o obj-$(CONFIG_UNIX98_PTYS) += pty.o obj-$(CONFIG_AUDIT) += tty_audit.o @@ -25,6 +25,7 @@ obj-$(CONFIG_ISI) += isicom.o obj-$(CONFIG_MOXA_INTELLIO) += moxa.o obj-$(CONFIG_MOXA_SMARTIO) += mxser.o obj-$(CONFIG_NOZOMI) += nozomi.o +obj-$(CONFIG_NULL_TTY) += ttynull.o obj-$(CONFIG_ROCKETPORT) += rocket.o obj-$(CONFIG_SYNCLINK_GT) += synclink_gt.o obj-$(CONFIG_PPC_EPAPR_HV_BYTECHAN) += ehv_bytechan.o diff --git a/drivers/tty/ttynull.c b/drivers/tty/ttynull.c index eced70ec54e1..17f05b7eb6d3 100644 --- a/drivers/tty/ttynull.c +++ b/drivers/tty/ttynull.c @@ -2,13 +2,6 @@ /* * Copyright (C) 2019 Axis Communications AB * - * The console is useful for userspace applications which expect a console - * device to work without modifications even when no console is available - * or desired. - * - * In order to use this driver, you should redirect the console to this - * TTY, or boot the kernel with console=ttynull. - * * Based on ttyprintk.c: * Copyright (C) 2010 Samo Pogacnik */ @@ -66,17 +59,6 @@ static struct console ttynull_console = { .device = ttynull_device, }; -void __init register_ttynull_console(void) -{ - if (!ttynull_driver) - return; - - if (add_preferred_console(ttynull_console.name, 0, NULL)) - return; - - register_console(&ttynull_console); -} - static int __init ttynull_init(void) { struct tty_driver *driver; diff --git a/include/linux/console.h b/include/linux/console.h index dbe78e8e2602..20874db50bc8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -186,12 +186,9 @@ extern int braille_register_console(struct console *, int index, extern int braille_unregister_console(struct console *); #ifdef CONFIG_TTY extern void console_sysfs_notify(void); -extern void register_ttynull_console(void); #else static inline void console_sysfs_notify(void) { } -static inline void register_ttynull_console(void) -{ } #endif extern bool console_suspend_enabled; diff --git a/init/main.c b/init/main.c index 421640fca375..c68d784376ca 100644 --- a/init/main.c +++ b/init/main.c @@ -1480,14 +1480,8 @@ void __init console_on_rootfs(void) struct file *file = filp_open("/dev/console", O_RDWR, 0); if (IS_ERR(file)) { - pr_err("Warning: unable to open an initial console. Fallback to ttynull.\n"); - register_ttynull_console(); - - file = filp_open("/dev/console", O_RDWR, 0); - if (IS_ERR(file)) { - pr_err("Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process!\n"); - return; - } + pr_err("Warning: unable to open an initial console.\n"); + return; } init_dup(file); init_dup(file); -- cgit 1.4.1 From ef0ba05538299f1391cbe097de36895bb36ecfe6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 7 Jan 2021 09:43:54 -0800 Subject: poll: fix performance regression due to out-of-line __put_user() The kernel test robot reported a -5.8% performance regression on the "poll2" test of will-it-scale, and bisected it to commit d55564cfc222 ("x86: Make __put_user() generate an out-of-line call"). I didn't expect an out-of-line __put_user() to matter, because no normal core code should use that non-checking legacy version of user access any more. But I had overlooked the very odd poll() usage, which does a __put_user() to update the 'revents' values of the poll array. Now, Al Viro correctly points out that instead of updating just the 'revents' field, it would be much simpler to just copy the _whole_ pollfd entry, and then we could just use "copy_to_user()" on the whole array of entries, the same way we use "copy_from_user()" a few lines earlier to get the original values. But that is not what we've traditionally done, and I worry that threaded applications might be concurrently modifying the other fields of the pollfd array. So while Al's suggestion is simpler - and perhaps worth trying in the future - this instead keeps the "just update revents" model. To fix the performance regression, use the modern "unsafe_put_user()" instead of __put_user(), with the proper "user_write_access_begin()" guarding in place. This improves code generation enormously. Link: https://lore.kernel.org/lkml/20210107134723.GA28532@xsang-OptiPlex-9020/ Reported-by: kernel test robot Tested-by: Oliver Sang Cc: Al Viro Cc: David Laight Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- fs/select.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/select.c b/fs/select.c index ebfebdfe5c69..37aaa8317f3a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1011,14 +1011,17 @@ static int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, fdcount = do_poll(head, &table, end_time); poll_freewait(&table); + if (!user_write_access_begin(ufds, nfds * sizeof(*ufds))) + goto out_fds; + for (walk = head; walk; walk = walk->next) { struct pollfd *fds = walk->entries; int j; - for (j = 0; j < walk->len; j++, ufds++) - if (__put_user(fds[j].revents, &ufds->revents)) - goto out_fds; + for (j = walk->len; j; fds++, ufds++, j--) + unsafe_put_user(fds->revents, &ufds->revents, Efault); } + user_write_access_end(); err = fdcount; out_fds: @@ -1030,6 +1033,11 @@ out_fds: } return err; + +Efault: + user_write_access_end(); + err = -EFAULT; + goto out_fds; } static long do_restart_poll(struct restart_block *restart_block) -- cgit 1.4.1 From 0378c625afe80eb3f212adae42cc33c9f6f31abf Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 6 Jan 2021 18:19:05 -0500 Subject: dm: eliminate potential source of excessive kernel log noise There wasn't ever a real need to log an error in the kernel log for ioctls issued with insufficient permissions. Simply return an error and if an admin/user is sufficiently motivated they can enable DM's dynamic debugging to see an explanation for why the ioctls were disallowed. Reported-by: Nir Soffer Fixes: e980f62353c6 ("dm: don't allow ioctls to targets that don't map to whole devices") Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b3c3c8b4cb42..7bac564f3faa 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -562,7 +562,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, * subset of the parent bdev; require extra privileges. */ if (!capable(CAP_SYS_RAWIO)) { - DMWARN_LIMIT( + DMDEBUG_LIMIT( "%s: sending ioctl %x to DM device without required privilege.", current->comm, cmd); r = -ENOIOCTLCMD; -- cgit 1.4.1 From 9b5948267adc9e689da609eb61cf7ed49cae5fa8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Jan 2021 11:15:56 -0500 Subject: dm integrity: fix flush with external metadata device With external metadata device, flush requests are not passed down to the data device. Fix this by submitting the flush request in dm_integrity_flush_buffers. In order to not degrade performance, we overlap the data device flush with the metadata device flush. Reported-by: Lukas Straub Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 6 +++++ drivers/md/dm-integrity.c | 60 ++++++++++++++++++++++++++++++++++++++--------- include/linux/dm-bufio.h | 1 + 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c1a86bde658..fce4cbf9529d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) } EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) +{ + return c->dm_io; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); + sector_t dm_bufio_get_block_number(struct dm_buffer *b) { return b->block; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 5a7a1b90e671..11c7c538f7a9 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1379,12 +1379,52 @@ thorough_test: #undef MAY_BE_HASH } -static void dm_integrity_flush_buffers(struct dm_integrity_c *ic) +struct flush_request { + struct dm_io_request io_req; + struct dm_io_region io_reg; + struct dm_integrity_c *ic; + struct completion comp; +}; + +static void flush_notify(unsigned long error, void *fr_) +{ + struct flush_request *fr = fr_; + if (unlikely(error != 0)) + dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO); + complete(&fr->comp); +} + +static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) { int r; + + struct flush_request fr; + + if (!ic->meta_dev) + flush_data = false; + if (flush_data) { + fr.io_req.bi_op = REQ_OP_WRITE, + fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + fr.io_req.mem.type = DM_IO_KMEM, + fr.io_req.mem.ptr.addr = NULL, + fr.io_req.notify.fn = flush_notify, + fr.io_req.notify.context = &fr; + fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), + fr.io_reg.bdev = ic->dev->bdev, + fr.io_reg.sector = 0, + fr.io_reg.count = 0, + fr.ic = ic; + init_completion(&fr.comp); + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); + BUG_ON(r); + } + r = dm_bufio_write_dirty_buffers(ic->bufio); if (unlikely(r)) dm_integrity_io_error(ic, "writing tags", r); + + if (flush_data) + wait_for_completion(&fr.comp); } static void sleep_on_endio_wait(struct dm_integrity_c *ic) @@ -2110,7 +2150,7 @@ offload_to_thread: if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { integrity_metadata(&dio->work); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); dio->in_flight = (atomic_t)ATOMIC_INIT(1); dio->completion = NULL; @@ -2195,7 +2235,7 @@ static void integrity_commit(struct work_struct *w) flushes = bio_list_get(&ic->flush_bio_list); if (unlikely(ic->mode != 'J')) { spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); goto release_flush_bios; } @@ -2409,7 +2449,7 @@ skip_io: complete_journal_op(&comp); wait_for_completion_io(&comp.comp); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } static void integrity_writer(struct work_struct *w) @@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm_integrity_c *ic) { int r; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); if (dm_integrity_failed(ic)) return; @@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct work_struct *work) unsigned long limit; struct bio *bio; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); range.logical_sector = 0; range.n_sectors = ic->provided_data_sectors; @@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work) add_new_range_and_wait(ic, &range); spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); - if (ic->meta_dev) - blkdev_issue_flush(ic->dev->bdev, GFP_NOIO); + dm_integrity_flush_buffers(ic, true); limit = ic->provided_data_sectors; if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { @@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti) if (ic->meta_dev) queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } if (ic->mode == 'B') { - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); #if 1 /* set to 0 to test bitmap replay code */ init_journal(ic, 0, ic->journal_sections, 0); diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 29d255fdd5d6..90bd558a17f5 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); sector_t dm_bufio_get_block_number(struct dm_buffer *b); void *dm_bufio_get_block_data(struct dm_buffer *b); void *dm_bufio_get_aux_data(struct dm_buffer *b); -- cgit 1.4.1 From e8deee4f1543eda9b75278f63322f412cad52f6a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 8 Jan 2021 13:46:55 -0800 Subject: ARC: [hsdk]: Enable FPU_SAVE_RESTORE HSDK has hardware floating point and the common use case is with glibc+hf so enable that as default. Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 6b5c54576f54..a2d10c29fbcc 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -7,6 +7,7 @@ menuconfig ARC_SOC_HSDK depends on ISA_ARCV2 select ARC_HAS_ACCL_REGS select ARC_IRQ_NO_AUTOSAVE + select ARC_FPU_SAVE_RESTORE select CLK_HSDK select RESET_CONTROLLER select RESET_HSDK -- cgit 1.4.1 From afba9dc1f3a5390475006061c0bdc5ad4915878e Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Jan 2021 11:07:55 +0100 Subject: net: ipa: modem: add missing SET_NETDEV_DEV() for proper sysfs links At the moment it is quite hard to identify the network interface provided by IPA in userspace components: The network interface is created as virtual device, without any link to the IPA device. The interface name ("rmnet_ipa%d") is the only indication that the network interface belongs to IPA, but this is not very reliable. Add SET_NETDEV_DEV() to associate the network interface with the IPA parent device. This allows userspace services like ModemManager to properly identify that this network interface is provided by IPA and belongs to the modem. Cc: Alex Elder Fixes: a646d6ec9098 ("soc: qcom: ipa: modem and microcontroller") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20210106100755.56800-1-stephan@gerhold.net Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_modem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index e34fe2d77324..9b08eb823984 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -216,6 +216,7 @@ int ipa_modem_start(struct ipa *ipa) ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; + SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; -- cgit 1.4.1 From 53475c5dd856212e91538a9501162e821cc1f791 Mon Sep 17 00:00:00 2001 From: Dongseok Yi Date: Fri, 8 Jan 2021 11:28:38 +0900 Subject: net: fix use-after-free when UDP GRO with shared fraglist skbs in fraglist could be shared by a BPF filter loaded at TC. If TC writes, it will call skb_ensure_writable -> pskb_expand_head to create a private linear section for the head_skb. And then call skb_clone_fraglist -> skb_get on each skb in the fraglist. skb_segment_list overwrites part of the skb linear section of each fragment itself. Even after skb_clone, the frag_skbs share their linear section with their clone in PF_PACKET. Both sk_receive_queue of PF_PACKET and PF_INET (or PF_INET6) can have a link for the same frag_skbs chain. If a new skb (not frags) is queued to one of the sk_receive_queue, multiple ptypes can see and release this. It causes use-after-free. [ 4443.426215] ------------[ cut here ]------------ [ 4443.426222] refcount_t: underflow; use-after-free. [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190 refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426726] pstate: 60400005 (nZCv daif +PAN -UAO) [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8 [ 4443.426808] Call trace: [ 4443.426813] refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426823] skb_release_data+0x144/0x264 [ 4443.426828] kfree_skb+0x58/0xc4 [ 4443.426832] skb_queue_purge+0x64/0x9c [ 4443.426844] packet_set_ring+0x5f0/0x820 [ 4443.426849] packet_setsockopt+0x5a4/0xcd0 [ 4443.426853] __sys_setsockopt+0x188/0x278 [ 4443.426858] __arm64_sys_setsockopt+0x28/0x38 [ 4443.426869] el0_svc_common+0xf0/0x1d0 [ 4443.426873] el0_svc_handler+0x74/0x98 [ 4443.426880] el0_svc+0x8/0xc Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.) Signed-off-by: Dongseok Yi Acked-by: Willem de Bruijn Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/1610072918-174177-1-git-send-email-dseok.yi@samsung.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f62cae3f75d8..b6f2b520a9b7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; - struct sk_buff *nskb; + struct sk_buff *nskb, *tmp; + int err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, nskb = list_skb; list_skb = list_skb->next; + err = 0; + if (skb_shared(nskb)) { + tmp = skb_clone(nskb, GFP_ATOMIC); + if (tmp) { + consume_skb(nskb); + nskb = tmp; + err = skb_unclone(nskb, GFP_ATOMIC); + } else { + err = -ENOMEM; + } + } + if (!tail) skb->next = nskb; else tail->next = nskb; + if (unlikely(err)) { + nskb->next = list_skb; + goto err_linearize; + } + tail = nskb; delta_len += nskb->len; -- cgit 1.4.1 From fd2ddef043592e7de80af53f47fa46fd3573086e Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Thu, 7 Jan 2021 16:11:10 +1100 Subject: udp: Prevent reuseport_select_sock from reading uninitialized socks reuse->socks[] is modified concurrently by reuseport_add_sock. To prevent reading values that have not been fully initialized, only read the array up until the last known safe index instead of incorrectly re-reading the last index of the array. Fixes: acdcecc61285f ("udp: correct reuseport selection with connected sockets") Signed-off-by: Baptiste Lepers Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20210107051110.12247-1-baptiste.lepers@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sock_reuseport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index bbdd3c7b6cb5..b065f0a103ed 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -293,7 +293,7 @@ select_by_hash: i = j = reciprocal_scale(hash, socks); while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { i++; - if (i >= reuse->num_socks) + if (i >= socks) i = 0; if (i == j) goto out; -- cgit 1.4.1 From c1787ffd0d24eb93eefac2dbba0eac5700da9ff1 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 7 Jan 2021 18:13:15 +0000 Subject: ppp: fix refcount underflow on channel unbridge When setting up a channel bridge, ppp_bridge_channels sets the pch->bridge field before taking the associated reference on the bridge file instance. This opens up a refcount underflow bug if ppp_bridge_channels called via. iotcl runs concurrently with ppp_unbridge_channels executing via. file release. The bug is triggered by ppp_bridge_channels taking the error path through the 'err_unset' label. In this scenario, pch->bridge is set, but the reference on the bridged channel will not be taken because the function errors out. If ppp_unbridge_channels observes pch->bridge before it is unset by the error path, it will erroneously drop the reference on the bridged channel and cause a refcount underflow. To avoid this, ensure that ppp_bridge_channels holds a reference on each channel in advance of setting the bridge pointers. Signed-off-by: Tom Parkin Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls") Acked-by: Guillaume Nault Link: https://lore.kernel.org/r/20210107181315.3128-1-tparkin@katalix.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 09c27f7773f9..d445ecb1d0c7 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -623,6 +623,7 @@ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb) write_unlock_bh(&pch->upl); return -EALREADY; } + refcount_inc(&pchb->file.refcnt); rcu_assign_pointer(pch->bridge, pchb); write_unlock_bh(&pch->upl); @@ -632,19 +633,24 @@ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb) write_unlock_bh(&pchb->upl); goto err_unset; } + refcount_inc(&pch->file.refcnt); rcu_assign_pointer(pchb->bridge, pch); write_unlock_bh(&pchb->upl); - refcount_inc(&pch->file.refcnt); - refcount_inc(&pchb->file.refcnt); - return 0; err_unset: write_lock_bh(&pch->upl); + /* Re-read pch->bridge with upl held in case it was modified concurrently */ + pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl)); RCU_INIT_POINTER(pch->bridge, NULL); write_unlock_bh(&pch->upl); synchronize_rcu(); + + if (pchb) + if (refcount_dec_and_test(&pchb->file.refcnt)) + ppp_destroy_channel(pchb); + return -EALREADY; } -- cgit 1.4.1 From 2b446e650b418f9a9e75f99852e2f2560cabfa17 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:05 -0800 Subject: docs: net: explain struct net_device lifetime Explain the two basic flows of struct net_device's operation. Signed-off-by: Jakub Kicinski --- Documentation/networking/netdevices.rst | 171 ++++++++++++++++++++++++++++++-- net/core/rtnetlink.c | 2 +- 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index e65665c5ab50..17bdcb746dcf 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -10,18 +10,177 @@ Introduction The following is a random collection of documentation regarding network devices. -struct net_device allocation rules -================================== +struct net_device lifetime rules +================================ Network device structures need to persist even after module is unloaded and must be allocated with alloc_netdev_mqs() and friends. If device has registered successfully, it will be freed on last use -by free_netdev(). This is required to handle the pathologic case cleanly -(example: rmmod mydriver needs_free_netdev = true; + } + + static void my_destructor(struct net_device *dev) + { + some_obj_destroy(priv->obj); + some_uninit(priv); + } + + int create_link() + { + struct my_device_priv *priv; + int err; + + ASSERT_RTNL(); + + dev = alloc_netdev(sizeof(*priv), "net%d", NET_NAME_UNKNOWN, my_setup); + if (!dev) + return -ENOMEM; + priv = netdev_priv(dev); + + /* Implicit constructor */ + err = some_init(priv); + if (err) + goto err_free_dev; + + priv->obj = some_obj_create(); + if (!priv->obj) { + err = -ENOMEM; + goto err_some_uninit; + } + /* End of constructor, set the destructor: */ + dev->priv_destructor = my_destructor; + + err = register_netdevice(dev); + if (err) + /* register_netdevice() calls destructor on failure */ + goto err_free_dev; + + /* If anything fails now unregister_netdevice() (or unregister_netdev()) + * will take care of calling my_destructor and free_netdev(). + */ + + return 0; + + err_some_uninit: + some_uninit(priv); + err_free_dev: + free_netdev(dev); + return err; + } + +If struct net_device.priv_destructor is set it will be called by the core +some time after unregister_netdevice(), it will also be called if +register_netdevice() fails. The callback may be invoked with or without +``rtnl_lock`` held. + +There is no explicit constructor callback, driver "constructs" the private +netdev state after allocating it and before registration. + +Setting struct net_device.needs_free_netdev makes core call free_netdevice() +automatically after unregister_netdevice() when all references to the device +are gone. It only takes effect after a successful call to register_netdevice() +so if register_netdevice() fails driver is responsible for calling +free_netdev(). + +free_netdev() is safe to call on error paths right after unregister_netdevice() +or when register_netdevice() fails. Parts of netdev (de)registration process +happen after ``rtnl_lock`` is released, therefore in those cases free_netdev() +will defer some of the processing until ``rtnl_lock`` is released. + +Devices spawned from struct rtnl_link_ops should never free the +struct net_device directly. + +.ndo_init and .ndo_uninit +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``.ndo_init`` and ``.ndo_uninit`` callbacks are called during net_device +registration and de-registration, under ``rtnl_lock``. Drivers can use +those e.g. when parts of their init process need to run under ``rtnl_lock``. + +``.ndo_init`` runs before device is visible in the system, ``.ndo_uninit`` +runs during de-registering after device is closed but other subsystems +may still have outstanding references to the netdevice. MTU === diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bb0596c41b3e..79f514afb17d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3441,7 +3441,7 @@ replay: if (ops->newlink) { err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should call free_netdev() in ->destructor + /* Drivers should set dev->needs_free_netdev * and unregister it on failure after registration * so that device could be finally freed in rtnl_unlock. */ -- cgit 1.4.1 From c269a24ce057abfc31130960e96ab197ef6ab196 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:06 -0800 Subject: net: make free_netdev() more lenient with unregistering devices There are two flavors of handling netdev registration: - ones called without holding rtnl_lock: register_netdev() and unregister_netdev(); and - those called with rtnl_lock held: register_netdevice() and unregister_netdevice(). While the semantics of the former are pretty clear, the same can't be said about the latter. The netdev_todo mechanism is utilized to perform some of the device unregistering tasks and it hooks into rtnl_unlock() so the locked variants can't actually finish the work. In general free_netdev() does not mix well with locked calls. Most drivers operating under rtnl_lock set dev->needs_free_netdev to true and expect core to make the free_netdev() call some time later. The part where this becomes most problematic is error paths. There is no way to unwind the state cleanly after a call to register_netdevice(), since unreg can't be performed fully without dropping locks. Make free_netdev() more lenient, and defer the freeing if device is being unregistered. This allows error paths to simply call free_netdev() both after register_netdevice() failed, and after a call to unregister_netdevice() but before dropping rtnl_lock. Simplify the error paths which are currently doing gymnastics around free_netdev() handling. Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 4 +--- net/core/dev.c | 11 +++++++++++ net/core/rtnetlink.c | 23 ++++++----------------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 15bbfaf943fd..8b644113715e 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -284,9 +284,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - if (new_dev->reg_state == NETREG_UNINITIALIZED || - new_dev->reg_state == NETREG_UNREGISTERED) - free_netdev(new_dev); + free_netdev(new_dev); return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 8fa739259041..adde93cbca9f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10631,6 +10631,17 @@ void free_netdev(struct net_device *dev) struct napi_struct *p, *n; might_sleep(); + + /* When called immediately after register_netdevice() failed the unwind + * handling may still be dismantling the device. Handle that case by + * deferring the free. + */ + if (dev->reg_state == NETREG_UNREGISTERING) { + ASSERT_RTNL(); + dev->needs_free_netdev = true; + return; + } + netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 79f514afb17d..3d6ab194d0f5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3439,26 +3439,15 @@ replay: dev->ifindex = ifm->ifi_index; - if (ops->newlink) { + if (ops->newlink) err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should set dev->needs_free_netdev - * and unregister it on failure after registration - * so that device could be finally freed in rtnl_unlock. - */ - if (err < 0) { - /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED || - dev->reg_state == NETREG_UNREGISTERED) - free_netdev(dev); - goto out; - } - } else { + else err = register_netdevice(dev); - if (err < 0) { - free_netdev(dev); - goto out; - } + if (err < 0) { + free_netdev(dev); + goto out; } + err = rtnl_configure_link(dev, ifm); if (err < 0) goto out_unregister; -- cgit 1.4.1 From 766b0515d5bec4b780750773ed3009b148df8c0a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:07 -0800 Subject: net: make sure devices go through netdev_wait_all_refs If register_netdevice() fails at the very last stage - the notifier call - some subsystems may have already seen it and grabbed a reference. struct net_device can't be freed right away without calling netdev_wait_all_refs(). Now that we have a clean interface in form of dev->needs_free_netdev and lenient free_netdev() we can undo what commit 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev failure.") has done and complete the unregistration path by bringing the net_set_todo() call back. After registration fails user is still expected to explicitly free the net_device, so make sure ->needs_free_netdev is cleared, otherwise rolling back the registration will cause the old double free for callers who release rtnl_lock before the free. This also solves the problem of priv_destructor not being called on notifier error. net_set_todo() will be moved back into unregister_netdevice_queue() in a follow up. Reported-by: Hulk Robot Reported-by: Yang Yingliang Signed-off-by: Jakub Kicinski --- net/core/dev.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index adde93cbca9f..0071a11a6dc3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10077,17 +10077,11 @@ int register_netdevice(struct net_device *dev) ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); if (ret) { + /* Expect explicit free_netdev() on failure */ + dev->needs_free_netdev = false; rollback_registered(dev); - rcu_barrier(); - - dev->reg_state = NETREG_UNREGISTERED; - /* We should put the kobject that hold in - * netdev_unregister_kobject(), otherwise - * the net device cannot be freed when - * driver calls free_netdev(), because the - * kobject is being hold. - */ - kobject_put(&dev->dev.kobj); + net_set_todo(dev); + goto out; } /* * Prevent userspace races by waiting until the network -- cgit 1.4.1 From e80927079fd97b4d5457e3af2400a0087b561564 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Mon, 4 Jan 2021 15:41:18 +0800 Subject: bcache: set pdev_set_uuid before scond loop iteration There is no need to reassign pdev_set_uuid in the second loop iteration, so move it to the place before second loop. Signed-off-by: Yi Li Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a4752ac410dc..6aa23a6fb394 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2644,8 +2644,8 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, } list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { + char *pdev_set_uuid = pdev->dc->sb.set_uuid; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { - char *pdev_set_uuid = pdev->dc->sb.set_uuid; char *set_uuid = c->set_uuid; if (!memcmp(pdev_set_uuid, set_uuid, 16)) { -- cgit 1.4.1 From f7b4943dea48a572ad751ce1f18a245d43debe7e Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:19 +0800 Subject: bcache: fix typo from SUUP to SUPP in features.h This patch fixes the following typos, from BCH_FEATURE_COMPAT_SUUP to BCH_FEATURE_COMPAT_SUPP from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_INCOMPAT_SUPP from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_RO_COMPAT_SUPP Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index a1653c478041..32c5bbda2f0d 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -15,9 +15,9 @@ /* Incompat feature set */ #define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ -#define BCH_FEATURE_COMPAT_SUUP 0 -#define BCH_FEATURE_RO_COMPAT_SUUP 0 -#define BCH_FEATURE_INCOMPAT_SUUP BCH_FEATURE_INCOMPAT_LARGE_BUCKET +#define BCH_FEATURE_COMPAT_SUPP 0 +#define BCH_FEATURE_RO_COMPAT_SUPP 0 +#define BCH_FEATURE_INCOMPAT_SUPP BCH_FEATURE_INCOMPAT_LARGE_BUCKET #define BCH_HAS_COMPAT_FEATURE(sb, mask) \ ((sb)->feature_compat & (mask)) -- cgit 1.4.1 From 1dfc0686c29a9bbd3a446a29f9ccde3dec3bc75a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:20 +0800 Subject: bcache: check unsupported feature sets for bcache register This patch adds the check for features which is incompatible for current supported feature sets. Now if the bcache device created by bcache-tools has features that current kernel doesn't support, read_super() will fail with error messoage. E.g. if an unsupported incompatible feature detected, bcache register will fail with dmesg "bcache: register_bcache() error : Unsupported incompatible feature found". Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 15 +++++++++++++++ drivers/md/bcache/super.c | 14 ++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index 32c5bbda2f0d..e73724c2b49b 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -79,6 +79,21 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); +static inline bool bch_has_unknown_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_compat & ~BCH_FEATURE_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_ro_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_ro_compat & ~BCH_FEATURE_RO_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_incompat_features(struct cache_sb *sb) +{ + return ((sb->feature_incompat & ~BCH_FEATURE_INCOMPAT_SUPP) != 0); +} + int bch_print_cache_set_feature_compat(struct cache_set *c, char *buf, int size); int bch_print_cache_set_feature_ro_compat(struct cache_set *c, char *buf, int size); int bch_print_cache_set_feature_incompat(struct cache_set *c, char *buf, int size); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6aa23a6fb394..f4674a3298af 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -228,6 +228,20 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, sb->feature_compat = le64_to_cpu(s->feature_compat); sb->feature_incompat = le64_to_cpu(s->feature_incompat); sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat); + + /* Check incompatible features */ + err = "Unsupported compatible feature found"; + if (bch_has_unknown_compat_features(sb)) + goto err; + + err = "Unsupported read-only compatible feature found"; + if (bch_has_unknown_ro_compat_features(sb)) + goto err; + + err = "Unsupported incompatible feature found"; + if (bch_has_unknown_incompat_features(sb)) + goto err; + err = read_super_common(sb, bdev, s); if (err) goto err; -- cgit 1.4.1 From b16671e8f493e3df40b1fb0dff4078f391c5099a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:21 +0800 Subject: bcache: introduce BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE for large bucket When large bucket feature was added, BCH_FEATURE_INCOMPAT_LARGE_BUCKET was introduced into the incompat feature set. It used bucket_size_hi (which was added at the tail of struct cache_sb_disk) to extend current 16bit bucket size to 32bit with existing bucket_size in struct cache_sb_disk. This is not a good idea, there are two obvious problems, - Bucket size is always value power of 2, if store log2(bucket size) in existing bucket_size of struct cache_sb_disk, it is unnecessary to add bucket_size_hi. - Macro csum_set() assumes d[SB_JOURNAL_BUCKETS] is the last member in struct cache_sb_disk, bucket_size_hi was added after d[] which makes csum_set calculate an unexpected super block checksum. To fix the above problems, this patch introduces a new incompat feature bit BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, when this bit is set, it means bucket_size in struct cache_sb_disk stores the order of power-of-2 bucket size value. When user specifies a bucket size larger than 32768 sectors, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE will be set to incompat feature set, and bucket_size stores log2(bucket size) more than store the real bucket size value. The obsoleted BCH_FEATURE_INCOMPAT_LARGE_BUCKET won't be used anymore, it is renamed to BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET and still only recognized by kernel driver for legacy compatible purpose. The previous bucket_size_hi is renmaed to obso_bucket_size_hi in struct cache_sb_disk and not used in bcache-tools anymore. For cache device created with BCH_FEATURE_INCOMPAT_LARGE_BUCKET feature, bcache-tools and kernel driver still recognize the feature string and display it as "obso_large_bucket". With this change, the unnecessary extra space extend of bcache on-disk super block can be avoided, and csum_set() may generate expected check sum as well. Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.c | 2 +- drivers/md/bcache/features.h | 11 ++++++++--- drivers/md/bcache/super.c | 22 +++++++++++++++++++--- include/uapi/linux/bcache.h | 2 +- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c index 6469223f0b77..d636b7b2d070 100644 --- a/drivers/md/bcache/features.c +++ b/drivers/md/bcache/features.c @@ -17,7 +17,7 @@ struct feature { }; static struct feature feature_list[] = { - {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET, + {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, "large_bucket"}, {0, 0, 0 }, }; diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index e73724c2b49b..84fc2c0f0101 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -13,11 +13,15 @@ /* Feature set definition */ /* Incompat feature set */ -#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ +/* 32bit bucket size, obsoleted */ +#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET 0x0001 +/* real bucket size is (1 << bucket_size) */ +#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE 0x0002 #define BCH_FEATURE_COMPAT_SUPP 0 #define BCH_FEATURE_RO_COMPAT_SUPP 0 -#define BCH_FEATURE_INCOMPAT_SUPP BCH_FEATURE_INCOMPAT_LARGE_BUCKET +#define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \ + BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE) #define BCH_HAS_COMPAT_FEATURE(sb, mask) \ ((sb)->feature_compat & (mask)) @@ -77,7 +81,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ ~BCH##_FEATURE_INCOMPAT_##flagname; \ } -BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE); static inline bool bch_has_unknown_compat_features(struct cache_sb *sb) { diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f4674a3298af..3999641f1775 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -64,9 +64,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s { unsigned int bucket_size = le16_to_cpu(s->bucket_size); - if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES && - bch_has_feature_large_bucket(sb)) - bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16; + if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { + if (bch_has_feature_large_bucket(sb)) { + unsigned int max, order; + + max = sizeof(unsigned int) * BITS_PER_BYTE - 1; + order = le16_to_cpu(s->bucket_size); + /* + * bcache tool will make sure the overflow won't + * happen, an error message here is enough. + */ + if (order > max) + pr_err("Bucket size (1 << %u) overflows\n", + order); + bucket_size = 1 << order; + } else if (bch_has_feature_obso_large_bucket(sb)) { + bucket_size += + le16_to_cpu(s->obso_bucket_size_hi) << 16; + } + } return bucket_size; } diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 52e8bcb33981..cf7399f03b71 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -213,7 +213,7 @@ struct cache_sb_disk { __le16 keys; }; __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ - __le16 bucket_size_hi; + __le16 obso_bucket_size_hi; /* obsoleted */ }; /* -- cgit 1.4.1 From 5342fd4255021ef0c4ce7be52eea1c4ebda11c63 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:22 +0800 Subject: bcache: set bcache device into read-only mode for BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET If BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET is set in incompat feature set, it means the cache device is created with obsoleted layout with obso_bucket_site_hi. Now bcache does not support this feature bit, a new BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE incompat feature bit is added for a better layout to support large bucket size. For the legacy compatibility purpose, if a cache device created with obsoleted BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit, all bcache devices attached to this cache set should be set to read-only. Then the dirty data can be written back to backing device before re-create the cache device with BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE feature bit by the latest bcache-tools. This patch checks BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit when running a cache set and attach a bcache device to the cache set. If this bit is set, - When run a cache set, print an error kernel message to indicate all following attached bcache device will be read-only. - When attach a bcache device, print an error kernel message to indicate the attached bcache device will be read-only, and ask users to update to latest bcache-tools. Such change is only for cache device whose bucket size >= 32MB, this is for the zoned SSD and almost nobody uses such large bucket size at this moment. If you don't explicit set a large bucket size for a zoned SSD, such change is totally transparent to your bcache device. Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3999641f1775..2047a9cccdb5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1332,6 +1332,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); + if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(dc->disk.disk, 1); + } + /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1554,6 +1560,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) bcache_device_link(d, c, "volume"); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(d->disk, 1); + } + return 0; err: kobject_put(&d->kobj); @@ -2113,6 +2125,9 @@ static int run_cache_set(struct cache_set *c) c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); bcache_write_super(c); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) + pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n"); + list_for_each_entry_safe(dc, t, &uncached_devices, list) bch_cached_dev_attach(dc, c, NULL); -- cgit 1.4.1 From 55e6ac1e1f31c7f678d9f3c8d54c6f102e5f1550 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:22 +0000 Subject: io_uring: io_rw_reissue lockdep annotations We expect io_rw_reissue() to take place only during submission with uring_lock held. Add a lockdep annotation to check that invariant. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index cb57e0360fcb..55ba1922a349 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2692,6 +2692,8 @@ static bool io_rw_reissue(struct io_kiocb *req, long res) if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) return false; + lockdep_assert_held(&req->ctx->uring_lock); + ret = io_sq_thread_acquire_mm_files(req->ctx, req); if (io_resubmit_prep(req, ret)) { -- cgit 1.4.1 From 4f793dc40bc605b97624fd36baf085b3c35e8bfd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:23 +0000 Subject: io_uring: inline io_uring_attempt_task_drop() A simple preparation change inlining io_uring_attempt_task_drop() into io_uring_flush(). Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 55ba1922a349..1c931e7a3948 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8964,23 +8964,6 @@ static void io_uring_del_task_file(struct file *file) fput(file); } -/* - * Drop task note for this file if we're the only ones that hold it after - * pending fput() - */ -static void io_uring_attempt_task_drop(struct file *file) -{ - if (!current->io_uring) - return; - /* - * fput() is pending, will be 2 if the only other ref is our potential - * task file note. If the task is exiting, drop regardless of count. - */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || - atomic_long_read(&file->f_count) == 2) - io_uring_del_task_file(file); -} - static void io_uring_remove_task_files(struct io_uring_task *tctx) { struct file *file; @@ -9072,7 +9055,17 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { - io_uring_attempt_task_drop(file); + if (!current->io_uring) + return 0; + + /* + * fput() is pending, will be 2 if the only other ref is our potential + * task file note. If the task is exiting, drop regardless of count. + */ + if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || + atomic_long_read(&file->f_count) == 2) + io_uring_del_task_file(file); + return 0; } -- cgit 1.4.1 From 6b5733eb638b7068ab7cb34e663b55a1d1892d85 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:24 +0000 Subject: io_uring: add warn_once for io_uring_flush() files_cancel() should cancel all relevant requests and drop file notes, so we should never have file notes after that, including on-exit fput and flush. Add a WARN_ONCE to be sure. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1c931e7a3948..f39671a0d84f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9055,17 +9055,23 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { - if (!current->io_uring) + struct io_uring_task *tctx = current->io_uring; + + if (!tctx) return 0; + /* we should have cancelled and erased it before PF_EXITING */ + WARN_ON_ONCE((current->flags & PF_EXITING) && + xa_load(&tctx->xa, (unsigned long)file)); + /* * fput() is pending, will be 2 if the only other ref is our potential * task file note. If the task is exiting, drop regardless of count. */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || - atomic_long_read(&file->f_count) == 2) - io_uring_del_task_file(file); + if (atomic_long_read(&file->f_count) != 2) + return 0; + io_uring_del_task_file(file); return 0; } -- cgit 1.4.1 From d9d05217cb6990b9a56e13b56e7a1b71e2551f6c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:25 +0000 Subject: io_uring: stop SQPOLL submit on creator's death When the creator of SQPOLL io_uring dies (i.e. sqo_task), we don't want its internals like ->files and ->mm to be poked by the SQPOLL task, it have never been nice and recently got racy. That can happen when the owner undergoes destruction and SQPOLL tasks tries to submit new requests in parallel, and so calls io_sq_thread_acquire*(). That patch halts SQPOLL submissions when sqo_task dies by introducing sqo_dead flag. Once set, the SQPOLL task must not do any submission, which is synchronised by uring_lock as well as the new flag. The tricky part is to make sure that disabling always happens, that means either the ring is discovered by creator's do_exit() -> cancel, or if the final close() happens before it's done by the creator. The last is guaranteed by the fact that for SQPOLL the creator task and only it holds exactly one file note, so either it pins up to do_exit() or removed by the creator on the final put in flush. (see comments in uring_flush() around file->f_count == 2). One more place that can trigger io_sq_thread_acquire_*() is __io_req_task_submit(). Shoot off requests on sqo_dead there, even though actually we don't need to. That's because cancellation of sqo_task should wait for the request before going any further. note 1: io_disable_sqo_submit() does io_ring_set_wakeup_flag() so the caller would enter the ring to get an error, but it still doesn't guarantee that the flag won't be cleared. note 2: if final __userspace__ close happens not from the creator task, the file note will pin the ring until the task dies. Fixed: b1b6b5a30dce8 ("kernel/io_uring: cancel io_uring before task works") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f39671a0d84f..2f305c097bd5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -262,6 +262,7 @@ struct io_ring_ctx { unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; + unsigned int sqo_dead: 1; /* * Ring buffer of indices into array of io_uring_sqe, which is @@ -2160,12 +2161,11 @@ static void io_req_task_cancel(struct callback_head *cb) static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - bool fail; - fail = __io_sq_thread_acquire_mm(ctx) || - __io_sq_thread_acquire_files(ctx); mutex_lock(&ctx->uring_lock); - if (!fail) + if (!ctx->sqo_dead && + !__io_sq_thread_acquire_mm(ctx) && + !__io_sq_thread_acquire_files(ctx)) __io_queue_sqe(req, NULL); else __io_req_task_cancel(req, -EFAULT); @@ -6954,7 +6954,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, &nr_events, 0); - if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs))) + if (to_submit && !ctx->sqo_dead && + likely(!percpu_ref_is_dying(&ctx->refs))) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } @@ -8712,6 +8713,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + + if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead)) + ctx->sqo_dead = 1; + /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) @@ -8874,6 +8879,18 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } } +static void io_disable_sqo_submit(struct io_ring_ctx *ctx) +{ + WARN_ON_ONCE(ctx->sqo_task != current); + + mutex_lock(&ctx->uring_lock); + ctx->sqo_dead = 1; + mutex_unlock(&ctx->uring_lock); + + /* make sure callers enter the ring to get error */ + io_ring_set_wakeup_flag(ctx); +} + /* * We need to iteratively cancel requests, in case a request has dependent * hard links. These persist even for failure of cancelations, hence keep @@ -8885,6 +8902,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { + /* for SQPOLL only sqo_task has task notes */ + io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle); io_sq_thread_park(ctx->sq_data); @@ -9056,6 +9075,7 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { struct io_uring_task *tctx = current->io_uring; + struct io_ring_ctx *ctx = file->private_data; if (!tctx) return 0; @@ -9071,7 +9091,16 @@ static int io_uring_flush(struct file *file, void *data) if (atomic_long_read(&file->f_count) != 2) return 0; - io_uring_del_task_file(file); + if (ctx->flags & IORING_SETUP_SQPOLL) { + /* there is only one file note, which is owned by sqo_task */ + WARN_ON_ONCE((ctx->sqo_task == current) == + !xa_load(&tctx->xa, (unsigned long)file)); + + io_disable_sqo_submit(ctx); + } + + if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current) + io_uring_del_task_file(file); return 0; } @@ -9145,8 +9174,9 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, #endif /* !CONFIG_MMU */ -static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) +static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) { + int ret = 0; DEFINE_WAIT(wait); do { @@ -9155,6 +9185,11 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); + if (unlikely(ctx->sqo_dead)) { + ret = -EOWNERDEAD; + goto out; + } + if (!io_sqring_full(ctx)) break; @@ -9162,6 +9197,8 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); +out: + return ret; } static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, @@ -9235,10 +9272,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (ctx->flags & IORING_SETUP_SQPOLL) { io_cqring_overflow_flush(ctx, false, NULL, NULL); + ret = -EOWNERDEAD; + if (unlikely(ctx->sqo_dead)) + goto out; if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); - if (flags & IORING_ENTER_SQ_WAIT) - io_sqpoll_wait_sq(ctx); + if (flags & IORING_ENTER_SQ_WAIT) { + ret = io_sqpoll_wait_sq(ctx); + if (ret) + goto out; + } submitted = to_submit; } else if (to_submit) { ret = io_uring_add_task_file(ctx, f.file); @@ -9665,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: + io_disable_sqo_submit(ctx); io_ring_ctx_wait_and_kill(ctx); return ret; } -- cgit 1.4.1 From 6bae85bd70d063b63fbe262d943cc321eab31b17 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 8 Jan 2021 22:46:02 -0800 Subject: maintainers: update my email address Change my email contact ahead of a likely painful eleven-month migration to a certain cobalt enteprisey groupware cloud product that will totally break my workflow. Some day I may get used to having to email being sequestered behind both claret and cerulean oath2+sms 2fa layers, but for now I'll stick with keying in one password to receive an email vs. the required four. Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3d1b8fe97261..292394098e39 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9273,7 +9273,7 @@ F: drivers/net/ethernet/sgi/ioc3-eth.c IOMAP FILESYSTEM LIBRARY M: Christoph Hellwig -M: Darrick J. Wong +M: Darrick J. Wong M: linux-xfs@vger.kernel.org M: linux-fsdevel@vger.kernel.org L: linux-xfs@vger.kernel.org @@ -19504,7 +19504,7 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XFS FILESYSTEM -M: Darrick J. Wong +M: Darrick J. Wong M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org S: Supported -- cgit 1.4.1 From a2bc221b972db91e4be1970e776e98f16aa87904 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 7 Jan 2021 02:15:20 -0800 Subject: netxen_nic: fix MSI/MSI-x interrupts For all PCI functions on the netxen_nic adapter, interrupt mode (INTx or MSI) configuration is dependent on what has been configured by the PCI function zero in the shared interrupt register, as these adapters do not support mixed mode interrupts among the functions of a given adapter. Logic for setting MSI/MSI-x interrupt mode in the shared interrupt register based on PCI function id zero check is not appropriate for all family of netxen adapters, as for some of the netxen family adapters PCI function zero is not really meant to be probed/loaded in the host but rather just act as a management function on the device, which caused all the other PCI functions on the adapter to always use legacy interrupt (INTx) mode instead of choosing MSI/MSI-x interrupt mode. This patch replaces that check with port number so that for all type of adapters driver attempts for MSI/MSI-x interrupt modes. Fixes: b37eb210c076 ("netxen_nic: Avoid mixed mode interrupts") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Link: https://lore.kernel.org/r/20210107101520.6735-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index f21847739ef1..d258e0ccf946 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -564,11 +564,6 @@ static const struct net_device_ops netxen_netdev_ops = { .ndo_set_features = netxen_set_features, }; -static inline bool netxen_function_zero(struct pci_dev *pdev) -{ - return (PCI_FUNC(pdev->devfn) == 0) ? true : false; -} - static inline void netxen_set_interrupt_mode(struct netxen_adapter *adapter, u32 mode) { @@ -664,7 +659,7 @@ static int netxen_setup_intr(struct netxen_adapter *adapter) netxen_initialize_interrupt_registers(adapter); netxen_set_msix_bit(pdev, 0); - if (netxen_function_zero(pdev)) { + if (adapter->portnum == 0) { if (!netxen_setup_msi_interrupts(adapter, num_msix)) netxen_set_interrupt_mode(adapter, NETXEN_MSI_MODE); else -- cgit 1.4.1 From b210de4f8c97d57de051e805686248ec4c6cfc52 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 7 Jan 2021 15:50:18 +0200 Subject: net: ipv6: Validate GSO SKB before finish IPv6 processing There are cases where GSO segment's length exceeds the egress MTU: - Forwarding of a TCP GRO skb, when DF flag is not set. - Forwarding of an skb that arrived on a virtualisation interface (virtio-net/vhost/tap) with TSO/GSO size set by other network stack. - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an interface with a smaller MTU. - Arriving GRO skb (or GSO skb in a virtualised environment) that is bridged to a NETIF_F_TSO tunnel stacked over an interface with an insufficient MTU. If so: - Consume the SKB and its segments. - Issue an ICMP packet with 'Packet Too Big' message containing the MTU, allowing the source host to reduce its Path MTU appropriately. Note: These cases are handled in the same manner in IPv4 output finish. This patch aligns the behavior of IPv6 and the one of IPv4. Fixes: 9e50849054a4 ("netfilter: ipv6: move POSTROUTING invocation before fragmentation") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/1610027418-30438-1-git-send-email-ayal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 749ad72386b2..077d43af8226 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -125,8 +125,43 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return -EINVAL; } +static int +ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + struct sk_buff *segs, *nskb; + netdev_features_t features; + int ret = 0; + + /* Please see corresponding comment in ip_finish_output_gso + * describing the cases where GSO segment length exceeds the + * egress MTU. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + skb_list_walk_safe(segs, segs, nskb) { + int err; + + skb_mark_not_on_list(segs); + err = ip6_fragment(net, sk, segs, ip6_finish_output2); + if (err && ret == 0) + ret = err; + } + + return ret; +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + unsigned int mtu; + #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { @@ -135,7 +170,11 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff } #endif - if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + mtu = ip6_skb_dst_mtu(skb); + if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + if ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); -- cgit 1.4.1 From 0ea02c73775277001c651ad4a0e83781a9acf406 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 11 Nov 2020 19:52:16 +0800 Subject: riscv: Drop a duplicated PAGE_KERNEL_EXEC commit b91540d52a08 ("RISC-V: Add EFI runtime services") add a duplicated PAGE_KERNEL_EXEC, kill it. Signed-off-by: Kefeng Wang Reviewed-by: Pekka Enberg Reviewed-by: Atish Patra Fixes: b91540d52a08 ("RISC-V: Add EFI runtime services") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 41a72861987c..251e1db088fa 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -99,7 +99,6 @@ | _PAGE_DIRTY) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \ -- cgit 1.4.1 From 3502bd9b5762154ff11665f3f18f6d7dcc6f781c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sat, 9 Jan 2021 00:37:45 +0300 Subject: selftests/tls: fix selftests after adding ChaCha20-Poly1305 TLS selftests where broken because of wrong variable types used. Fix it by changing u16 -> uint16_t Fixes: 4f336e88a870 ("selftests/tls: add CHACHA20-POLY1305 to tls selftests") Reported-by: kernel test robot Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1610141865-7142-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index cb0d1890a860..e0088c2d38a5 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -103,8 +103,8 @@ FIXTURE(tls) FIXTURE_VARIANT(tls) { - u16 tls_version; - u16 cipher_type; + uint16_t tls_version; + uint16_t cipher_type; }; FIXTURE_VARIANT_ADD(tls, 12_gcm) -- cgit 1.4.1 From b77413446408fdd256599daf00d5be72b5f3e7c6 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 8 Jan 2021 14:13:37 +0700 Subject: tipc: fix NULL deref in tipc_link_xmit() The buffer list can have zero skb as following path: tipc_named_node_up()->tipc_node_xmit()->tipc_link_xmit(), so we need to check the list before casting an &sk_buff. Fault report: [] tipc: Bulk publication failure [] general protection fault, probably for non-canonical [#1] PREEMPT [...] [] KASAN: null-ptr-deref in range [0x00000000000000c8-0x00000000000000cf] [] CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Not tainted 5.10.0-rc4+ #2 [] Hardware name: Bochs ..., BIOS Bochs 01/01/2011 [] RIP: 0010:tipc_link_xmit+0xc1/0x2180 [] Code: 24 b8 00 00 00 00 4d 39 ec 4c 0f 44 e8 e8 d7 0a 10 f9 48 [...] [] RSP: 0018:ffffc90000006ea0 EFLAGS: 00010202 [] RAX: dffffc0000000000 RBX: ffff8880224da000 RCX: 1ffff11003d3cc0d [] RDX: 0000000000000019 RSI: ffffffff886007b9 RDI: 00000000000000c8 [] RBP: ffffc90000007018 R08: 0000000000000001 R09: fffff52000000ded [] R10: 0000000000000003 R11: fffff52000000dec R12: ffffc90000007148 [] R13: 0000000000000000 R14: 0000000000000000 R15: ffffc90000007018 [] FS: 0000000000000000(0000) GS:ffff888037400000(0000) knlGS:000[...] [] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [] CR2: 00007fffd2db5000 CR3: 000000002b08f000 CR4: 00000000000006f0 Fixes: af9b028e270fd ("tipc: make media xmit call outside node spinlock context") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20210108071337.3598-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/link.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 6ae2140eb4f7..a6a694b78927 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1030,7 +1030,6 @@ void tipc_link_reset(struct tipc_link *l) int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb_peek(list)); struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff_head *transmq = &l->transmq; struct sk_buff *skb, *_skb; @@ -1038,13 +1037,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; int pkt_cnt = skb_queue_len(list); - int imp = msg_importance(hdr); unsigned int mss = tipc_link_mss(l); unsigned int cwin = l->window; unsigned int mtu = l->mtu; + struct tipc_msg *hdr; bool new_bundle; int rc = 0; + int imp; + + if (pkt_cnt <= 0) + return 0; + hdr = buf_msg(skb_peek(list)); if (unlikely(msg_size(hdr) > mtu)) { pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", skb_queue_len(list), msg_user(hdr), @@ -1053,6 +1057,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -EMSGSIZE; } + imp = msg_importance(hdr); /* Allow oversubscription of one data msg per source at congestion */ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { -- cgit 1.4.1 From 57726ebe2733891c9f59105eff028735f73d05fb Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Fri, 8 Jan 2021 16:52:09 +0200 Subject: mlxsw: core: Add validation of transceiver temperature thresholds Validate thresholds to avoid a single failure due to some transceiver unreliability. Ignore the last readouts in case warning temperature is above alarm temperature, since it can cause unexpected thermal shutdown. Stay with the previous values and refresh threshold within the next iteration. This is a rare scenario, but it was observed at a customer site. Fixes: 6a79507cfe94 ("mlxsw: core: Extend thermal module with per QSFP module thermal zones") Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 8fa286ccdd6b..250a85049697 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -176,6 +176,12 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, if (err) return err; + if (crit_temp > emerg_temp) { + dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", + tz->tzdev->type, crit_temp, emerg_temp); + return 0; + } + /* According to the system thermal requirements, the thermal zones are * defined with four trip points. The critical and emergency * temperature thresholds, provided by QSFP module are set as "active" @@ -190,11 +196,8 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; - if (emerg_temp > crit_temp) - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + MLXSW_THERMAL_MODULE_TEMP_SHIFT; - else - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp; return 0; } -- cgit 1.4.1 From b06ca3d5a43ca2dd806f7688a17e8e7e0619a80a Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Fri, 8 Jan 2021 16:52:10 +0200 Subject: mlxsw: core: Increase critical threshold for ASIC thermal zone Increase critical threshold for ASIC thermal zone from 110C to 140C according to the system hardware requirements. All the supported ASICs (Spectrum-1, Spectrum-2, Spectrum-3) could be still operational with ASIC temperature below 140C. With the old critical threshold value system can perform unjustified shutdown. All the systems equipped with the above ASICs implement thermal protection mechanism at firmware level and firmware could decide to perform system thermal shutdown in case the temperature is below 140C. So with the new threshold system will not meltdown, while thermal operating range will be aligned with hardware abilities. Fixes: 41e760841d26 ("mlxsw: core: Replace thermal temperature trips with defines") Fixes: a50c1e35650b ("mlxsw: core: Implement thermal zone") Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 250a85049697..bf85ce9835d7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -19,7 +19,7 @@ #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ -#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ +#define MLXSW_THERMAL_ASIC_TEMP_CRIT 140000 /* 140C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 -- cgit 1.4.1 From f97844f9c518172f813b7ece18a9956b1f70c1bb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 Jan 2021 16:15:16 +0100 Subject: dt-bindings: net: renesas,etheravb: RZ/G2H needs tx-internal-delay-ps The merge resolution of the interaction of commits 307eea32b202864c ("dt-bindings: net: renesas,ravb: Add support for r8a774e1 SoC") and d7adf6331189cbe9 ("dt-bindings: net: renesas,etheravb: Convert to json-schema") missed that "tx-internal-delay-ps" should be a required property on RZ/G2H. Fixes: 8b0308fe319b8002 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210105151516.1540653-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 244befb6402a..de9dd574a2f9 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -163,6 +163,7 @@ allOf: enum: - renesas,etheravb-r8a774a1 - renesas,etheravb-r8a774b1 + - renesas,etheravb-r8a774e1 - renesas,etheravb-r8a7795 - renesas,etheravb-r8a7796 - renesas,etheravb-r8a77961 -- cgit 1.4.1 From fab336b42441e0b2eb1d81becedb45fbdf99606e Mon Sep 17 00:00:00 2001 From: Chen Yi Date: Tue, 5 Jan 2021 23:31:20 +0800 Subject: selftests: netfilter: Pass family parameter "-f" to conntrack tool Fix nft_conntrack_helper.sh false fail report: 1) Conntrack tool need "-f ipv6" parameter to show out ipv6 traffic items. 2) Sleep 1 second after background nc send packet, to make sure check is after this statement executed. False report: FAIL: ns1-lkjUemYw did not show attached helper ip set via ruleset PASS: ns1-lkjUemYw connection on port 2121 has ftp helper attached ... After fix: PASS: ns1-2hUniwU2 connection on port 2121 has ftp helper attached PASS: ns2-2hUniwU2 connection on port 2121 has ftp helper attached ... Fixes: 619ae8e0697a6 ("selftests: netfilter: add test case for conntrack helper assignment") Signed-off-by: Chen Yi Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/nft_conntrack_helper.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh index edf0a48da6bf..bf6b9626c7dd 100755 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -94,7 +94,13 @@ check_for_helper() local message=$2 local port=$3 - ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' + if echo $message |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' if [ $? -ne 0 ] ; then echo "FAIL: ${netns} did not show attached helper $message" 1>&2 ret=1 @@ -111,8 +117,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ip $msg" $port check_for_helper "$ns2" "ip $msg" $port @@ -128,8 +134,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ipv6 $msg" $port check_for_helper "$ns2" "ipv6 $msg" $port -- cgit 1.4.1 From f6351c3f1c27c80535d76cac2299aec44c36291e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 8 Jan 2021 12:44:33 +0100 Subject: netfilter: conntrack: fix reading nf_conntrack_buckets The old way of changing the conntrack hashsize runtime was through changing the module param via file /sys/module/nf_conntrack/parameters/hashsize. This was extended to sysctl change in commit 3183ab8997a4 ("netfilter: conntrack: allow increasing bucket size via sysctl too"). The commit introduced second "user" variable nf_conntrack_htable_size_user which shadow actual variable nf_conntrack_htable_size. When hashsize is changed via module param this "user" variable isn't updated. This results in sysctl net/netfilter/nf_conntrack_buckets shows the wrong value when users update via the old way. This patch fix the issue by always updating "user" variable when reading the proc file. This will take care of changes to the actual variable without sysctl need to be aware. Fixes: 3183ab8997a4 ("netfilter: conntrack: allow increasing bucket size via sysctl too") Reported-by: Yoel Caspersen Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46c5557c1fec..0ee702d374b0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -523,6 +523,9 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write, { int ret; + /* module_param hashsize could have changed value */ + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret < 0 || !write) return ret; -- cgit 1.4.1 From a0adc8eabb402cfb9f32d15edd9f65f65e35cdce Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 7 Jan 2021 20:26:16 +0000 Subject: dma-buf: cma_heap: Fix memory leak in CMA heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bing Song noticed the CMA heap was leaking memory due to a flub I made in commit a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation"), and provided this fix which ensures the pagelist is also freed on release. Cc: Bing Song Cc: Sumit Semwal Cc: Liam Mark Cc: Laura Abbott Cc: Brian Starkey Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: Chris Goldsworthy Cc: Ørjan Eide Cc: Robin Murphy Cc: Ezequiel Garcia Cc: Simon Ser Cc: James Jones Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reported-by: Bing Song Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation") Signed-off-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20210107202616.75170-1-john.stultz@linaro.org --- drivers/dma-buf/heaps/cma_heap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 3c4e34301172..364fc2f3e499 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -251,6 +251,9 @@ static void cma_heap_dma_buf_release(struct dma_buf *dmabuf) buffer->vaddr = NULL; } + /* free page list */ + kfree(buffer->pages); + /* release memory */ cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount); kfree(buffer); } -- cgit 1.4.1 From c98e9daa59a611ff4e163689815f40380c912415 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 5 Jan 2021 08:54:32 -0500 Subject: NFS: Adjust fs_context error logging Several existing dprink()/dfprintk() calls were converted to use the new mount API logging macros by commit ce8866f0913f ("NFS: Attach supplementary error information to fs_context"). If the fs_context was not created using fsopen() then it will not have had a log buffer allocated for it, and the new mount API logging macros will wind up calling printk(). This can result in syslog messages being logged where previously there were none... most notably "NFS4: Couldn't follow remote path", which can happen if the client is auto-negotiating a protocol version with an NFS server that doesn't support the higher v4.x versions. Convert the nfs_errorf(), nfs_invalf(), and nfs_warnf() macros to check for the existence of the fs_context's log buffer and call dprintk() if it doesn't exist. Add nfs_ferrorf(), nfs_finvalf(), and nfs_warnf(), which do the same thing but take an NFS debug flag as an argument and call dfprintk(). Finally, modify the "NFS4: Couldn't follow remote path" message to use nfs_ferrorf(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207385 Signed-off-by: Scott Mayhew Reviewed-by: Benjamin Coddington Fixes: ce8866f0913f ("NFS: Attach supplementary error information to fs_context.") Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 26 +++++++++++++++++++++++--- fs/nfs/nfs4super.c | 4 ++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b840d0a91c9d..6bdee7ab3a6c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -136,9 +136,29 @@ struct nfs_fs_context { } clone_data; }; -#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) -#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) -#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) +#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) + +#define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 984cc42ee54d..d09bcfd7db89 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -227,7 +227,7 @@ int nfs4_try_get_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); @@ -250,7 +250,7 @@ int nfs4_get_referral_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); -- cgit 1.4.1 From 86b53fbf08f48d353a86a06aef537e78e82ba721 Mon Sep 17 00:00:00 2001 From: "j.nixdorf@avm.de" Date: Tue, 5 Jan 2021 15:17:01 +0100 Subject: net: sunrpc: interpret the return value of kstrtou32 correctly A return value of 0 means success. This is documented in lib/kstrtox.c. This was found by trying to mount an NFS share from a link-local IPv6 address with the interface specified by its index: mount("[fe80::1%1]:/srv/nfs", "/mnt", "nfs", 0, "nolock,addr=fe80::1%1") Before this commit this failed with EINVAL and also caused the following message in dmesg: [...] NFS: bad IP address specified: addr=fe80::1%1 The syscall using the same address based on the interface name instead of its index succeeds. Credits for this patch go to my colleague Christian Speich, who traced the origin of this bug to this line of code. Signed-off-by: Johannes Nixdorf Fixes: 00cfaa943ec3 ("replace strict_strto calls") Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 010dcb876f9d..6e4dbd577a39 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -185,7 +185,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, scope_id = dev->ifindex; dev_put(dev); } else { - if (kstrtou32(p, 10, &scope_id) == 0) { + if (kstrtou32(p, 10, &scope_id) != 0) { kfree(p); return 0; } -- cgit 1.4.1 From 67bbceedc9bb8ad48993a8bd6486054756d711f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 13:35:46 -0500 Subject: pNFS: Mark layout for return if return-on-close was not sent If the layout return-on-close failed because the layoutreturn was never sent, then we should mark the layout for return again. Fixes: 9c47b18cf722 ("pNFS: Ensure we do clear the return-on-close layout stateid on fatal errors") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 07f59dc8cb2e..ccc89fab1802 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1560,12 +1560,18 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, int ret) { struct pnfs_layout_hdr *lo = args->layout; + struct inode *inode = args->inode; const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { case -NFS4ERR_NOMATCHING_LAYOUT: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) + pnfs_set_plh_return_info(lo, args->range.iomode, 0); + spin_unlock(&inode->i_lock); break; case 0: if (res->lrs_present) -- cgit 1.4.1 From 078000d02d57f02dde61de4901f289672e98c8bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 13:18:03 -0500 Subject: pNFS: We want return-on-close to complete when evicting the inode If the inode is being evicted, it should be safe to run return-on-close, so we should do it to ensure we don't inadvertently leak layout segments. Fixes: 1c5bd76d17cc ("pNFS: Enable layoutreturn operation for return-on-close") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 ++++++++++---------------- fs/nfs/pnfs.c | 8 +++----- fs/nfs/pnfs.h | 8 +++----- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 14acd2f79107..2f4679a62712 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3536,10 +3536,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, calldata->inode, - &calldata->arg.lr_args, - &calldata->res.lr_res, - &calldata->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &calldata->arg.lr_args, &calldata->res.lr_res, + &calldata->res.lr_ret) == -EAGAIN) goto out_restart; /* hmm. we are done with the inode, and in the process of freeing @@ -6384,10 +6382,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, data->inode, - &data->args.lr_args, - &data->res.lr_res, - &data->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &data->args.lr_args, &data->res.lr_res, + &data->res.lr_ret) == -EAGAIN) goto out_restart; switch (task->tk_status) { @@ -6441,10 +6437,10 @@ static void nfs4_delegreturn_release(void *calldata) struct nfs4_delegreturndata *data = calldata; struct inode *inode = data->inode; + if (data->lr.roc) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); if (inode) { - if (data->lr.roc) - pnfs_roc_release(&data->lr.arg, &data->lr.res, - data->res.lr_ret); nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } @@ -6520,16 +6516,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; - data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred); data->inode = nfs_igrab_and_active(inode); - if (data->inode) { + if (data->inode || issync) { + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, + cred); if (data->lr.roc) { data->args.lr_args = &data->lr.arg; data->res.lr_res = &data->lr.res; } - } else if (data->lr.roc) { - pnfs_roc_release(&data->lr.arg, &data->lr.res, 0); - data->lr.roc = false; } task_setup_data.callback_data = data; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ccc89fab1802..a18b1992b2fb 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1509,10 +1509,8 @@ out_noroc: return false; } -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret) +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret) { struct nfs4_layoutreturn_args *arg = *argpp; int retval = -EAGAIN; @@ -1545,7 +1543,7 @@ int pnfs_roc_done(struct rpc_task *task, struct inode *inode, return 0; case -NFS4ERR_OLD_STATEID: if (!nfs4_layout_refresh_old_stateid(&arg->stateid, - &arg->range, inode)) + &arg->range, arg->inode)) break; *ret = -NFS4ERR_NOMATCHING_LAYOUT; return -EAGAIN; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bbd3de1025f2..d810ae674f4e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -297,10 +297,8 @@ bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, const struct cred *cred); -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret); +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret); void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, int ret); @@ -772,7 +770,7 @@ pnfs_roc(struct inode *ino, } static inline int -pnfs_roc_done(struct rpc_task *task, struct inode *inode, +pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, struct nfs4_layoutreturn_res **respp, int *ret) -- cgit 1.4.1 From c18d1e17ba2f6a1c9257b0b5d2882a6e3f772673 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 15:01:18 -0500 Subject: pNFS: Clean up pnfs_layoutreturn_free_lsegs() Remove the check for whether or not the stateid is NULL, and fix up the callers. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a18b1992b2fb..16a37214aba9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1152,7 +1152,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || !arg_stateid || + if (!pnfs_layout_is_valid(lo) || !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; if (stateid) { @@ -1559,7 +1559,6 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, { struct pnfs_layout_hdr *lo = args->layout; struct inode *inode = args->inode; - const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; @@ -1569,6 +1568,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, if (pnfs_layout_is_valid(lo) && nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) pnfs_set_plh_return_info(lo, args->range.iomode, 0); + pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&inode->i_lock); break; case 0: @@ -1576,11 +1576,10 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, res_stateid = &res->stateid; fallthrough; default: - arg_stateid = &args->stateid; + pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range, + res_stateid); } trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); - pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, - res_stateid); if (ld_private && ld_private->ops && ld_private->ops->free) ld_private->ops->free(ld_private); pnfs_put_layout_hdr(lo); -- cgit 1.4.1 From 2c8d5fc37fe2384a9bdb6965443ab9224d46f704 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jan 2021 06:43:45 -0500 Subject: pNFS: Stricter ordering of layoutget and layoutreturn If a layout return is in progress, we should wait for it to complete, in case the layout segment we are picking up gets returned too. Fixes: 30cb3ee299cb ("pNFS: Handle NFS4ERR_OLD_STATEID on layoutreturn by bumping the state seqid") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 16a37214aba9..fc13a3c8bc48 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2018,6 +2018,27 @@ lookup_again: goto lookup_again; } + /* + * Because we free lsegs when sending LAYOUTRETURN, we need to wait + * for LAYOUTRETURN. + */ + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + dprintk("%s wait for layoutreturn\n", __func__); + lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); + if (!IS_ERR(lseg)) { + pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, + lseg, + PNFS_UPDATE_LAYOUT_RETRY); + goto lookup_again; + } + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_RETURN); + goto out_put_layout_hdr; + } + lseg = pnfs_find_lseg(lo, &arg, strict_iomode); if (lseg) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, @@ -2070,28 +2091,6 @@ lookup_again: nfs4_stateid_copy(&stateid, &lo->plh_stateid); } - /* - * Because we free lsegs before sending LAYOUTRETURN, we need to wait - * for LAYOUTRETURN even if first is true. - */ - if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { - spin_unlock(&ino->i_lock); - dprintk("%s wait for layoutreturn\n", __func__); - lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); - if (!IS_ERR(lseg)) { - if (first) - pnfs_clear_first_layoutget(lo); - pnfs_put_layout_hdr(lo); - dprintk("%s retrying\n", __func__); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, - lseg, PNFS_UPDATE_LAYOUT_RETRY); - goto lookup_again; - } - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, - PNFS_UPDATE_LAYOUT_RETURN); - goto out_put_layout_hdr; - } - if (pnfs_layoutgets_blocked(lo)) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_BLOCKED); -- cgit 1.4.1 From 1757655d780d9d29bc4b60e708342e94924f7ef3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 11:28:30 -0500 Subject: NFS/pNFS: Don't call pnfs_free_bucket_lseg() before removing the request In pnfs_generic_clear_request_commit(), we try calling pnfs_free_bucket_lseg() before we remove the request from the DS bucket. That will always fail, since the point is to test for whether or not that bucket is empty. Fixes: c84bea59449a ("NFS/pNFS: Simplify bucket layout segment reference counting") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 2efcfdd348a1..df20bbe8d15e 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -78,22 +78,18 @@ void pnfs_generic_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { - struct pnfs_layout_segment *freeme = NULL; + struct pnfs_commit_bucket *bucket = NULL; if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; cinfo->ds->nwritten--; - if (list_is_singular(&req->wb_list)) { - struct pnfs_commit_bucket *bucket; - + if (list_is_singular(&req->wb_list)) bucket = list_first_entry(&req->wb_list, - struct pnfs_commit_bucket, - written); - freeme = pnfs_free_bucket_lseg(bucket); - } + struct pnfs_commit_bucket, written); out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg(freeme); + if (bucket) + pnfs_put_lseg(pnfs_free_bucket_lseg(bucket)); } EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); -- cgit 1.4.1 From 46c9ea1d4fee4cf1f8cc6001b9c14aae61b3d502 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 11:54:57 -0500 Subject: NFS/pNFS: Don't leak DS commits in pnfs_generic_retry_commit() We must ensure that we pass a layout segment to nfs_retry_commit() when we're cleaning up after pnfs_bucket_alloc_ds_commits(). Otherwise, requests that should be committed to the DS will get committed to the MDS. Do so by ensuring that pnfs_bucket_get_committing() always tries to return a layout segment when it returns a non-empty page list. Fixes: c84bea59449a ("NFS/pNFS: Simplify bucket layout segment reference counting") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index df20bbe8d15e..49d3389bd813 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -403,12 +403,16 @@ pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { + struct pnfs_layout_segment *lseg; struct list_head *pos; list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, head); - return pnfs_free_bucket_lseg(bucket); + lseg = pnfs_free_bucket_lseg(bucket); + if (!lseg) + lseg = pnfs_get_lseg(bucket->lseg); + return lseg; } static struct nfs_commit_data * @@ -420,8 +424,6 @@ pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, if (!data) return NULL; data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo); - if (!data->lseg) - data->lseg = pnfs_get_lseg(bucket->lseg); return data; } -- cgit 1.4.1 From cb2856c5971723910a86b7d1d0cf623d6919cbc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 14:13:22 -0500 Subject: NFS/pNFS: Fix a leak of the layout 'plh_outstanding' counter If we exit _lgopen_prepare_attached() without setting a layout, we will currently leak the plh_outstanding counter. Fixes: 411ae722d10a ("pNFS: Wait for stale layoutget calls to complete in pnfs_update_layout()") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fc13a3c8bc48..4f274f21c4ab 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2244,6 +2244,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, &rng, GFP_KERNEL); if (!lgp) { pnfs_clear_first_layoutget(lo); + nfs_layoutget_end(lo); pnfs_put_layout_hdr(lo); return; } -- cgit 1.4.1 From 5625dcfbbcf892e40e8d60abbb5f56701a1d031c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Jan 2021 17:12:08 +0530 Subject: Documentation: kbuild: Fix section reference Section 3.11 was incorrectly called 3.9, fix it. Signed-off-by: Viresh Kumar Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index d36768cf1250..9f6a11881951 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -598,7 +598,7 @@ more details, with real examples. explicitly added to $(targets). Assignments to $(targets) are without $(obj)/ prefix. if_changed may be - used in conjunction with custom rules as defined in "3.9 Custom Rules". + used in conjunction with custom rules as defined in "3.11 Custom Rules". Note: It is a typical mistake to forget the FORCE prerequisite. Another common pitfall is that whitespace is sometimes significant; for -- cgit 1.4.1 From 113aac6d567bda783af36d08f73bfda47d8e9a40 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Jan 2021 15:46:06 -0500 Subject: NFS: nfs_delegation_find_inode_server must first reference the superblock Before referencing the inode, we must ensure that the superblock can be referenced. Otherwise, we can end up with iput() calling superblock operations that are no longer valid or accessible. Fixes: e39d8a186ed0 ("NFSv4: Fix an Oops during delegation callbacks") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 816e1427f17e..04bf8066980c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1011,22 +1011,24 @@ nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; - struct inode *freeme, *res = NULL; + struct super_block *freeme = NULL; + struct inode *res = NULL; list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { - freeme = igrab(delegation->inode); - if (freeme && nfs_sb_active(freeme->i_sb)) - res = freeme; + if (nfs_sb_active(server->super)) { + freeme = server->super; + res = igrab(delegation->inode); + } spin_unlock(&delegation->lock); if (res != NULL) return res; if (freeme) { rcu_read_unlock(); - iput(freeme); + nfs_sb_deactive(freeme); rcu_read_lock(); } return ERR_PTR(-EAGAIN); -- cgit 1.4.1 From 896567ee7f17a8a736cda8a28cc987228410a2ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Jan 2021 15:58:08 -0500 Subject: NFS: nfs_igrab_and_active must first reference the superblock Before referencing the inode, we must ensure that the superblock can be referenced. Otherwise, we can end up with iput() calling superblock operations that are no longer valid or accessible. Fixes: ea7c38fef0b7 ("NFSv4: Ensure we reference the inode for return-on-close in delegreturn") Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6bdee7ab3a6c..62d3189745cd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -599,12 +599,14 @@ extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, static inline struct inode *nfs_igrab_and_active(struct inode *inode) { - inode = igrab(inode); - if (inode != NULL && !nfs_sb_active(inode->i_sb)) { - iput(inode); - inode = NULL; + struct super_block *sb = inode->i_sb; + + if (sb && nfs_sb_active(sb)) { + if (igrab(inode)) + return inode; + nfs_sb_deactive(sb); } - return inode; + return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) -- cgit 1.4.1 From 7c53f6b671f4aba70ff15e1b05148b10d58c2837 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Jan 2021 14:34:50 -0800 Subject: Linux 5.11-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8b2c3f88ee5e..9e73f82e0d86 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit 1.4.1 From 869f4fdaf4ca7bb6e0d05caf6fa1108dddc346a7 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 9 Jan 2021 20:01:21 +0800 Subject: netfilter: nf_nat: Fix memleak in nf_nat_init When register_pernet_subsys() fails, nf_nat_bysource should be freed just like when nf_ct_extend_register() fails. Fixes: 1cd472bf036ca ("netfilter: nf_nat: add nat hook register functions to nf_nat") Signed-off-by: Dinghao Liu Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ea923f8cf9c4..b7c3c902290f 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1174,6 +1174,7 @@ static int __init nf_nat_init(void) ret = register_pernet_subsys(&nat_net_ops); if (ret < 0) { nf_ct_extend_unregister(&nat_extend); + kvfree(nf_nat_bysource); return ret; } -- cgit 1.4.1 From 00cb645fd7e29bdd20967cd20fa8f77bcdf422f9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Nov 2020 13:40:58 +0100 Subject: drm/i915/dsi: Use unconditional msleep for the panel_on_delay when there is no reset-deassert MIPI-sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 25b4620ee822 ("drm/i915/dsi: Skip delays for v3 VBTs in vid-mode") added an intel_dsi_msleep() helper which skips sleeping if the MIPI-sequences have a version of 3 or newer and the panel is in vid-mode; and it moved a bunch of msleep-s over to this new helper. This was based on my reading of the big comment around line 730 which starts with "Panel enable/disable sequences from the VBT spec.", where the "v3 video mode seq" column does not have any wait t# entries. Given that this code has been used on a lot of different devices without issues until now, it seems that my interpretation of the spec here is mostly correct. But now I have encountered one device, an Acer Aspire Switch 10 E SW3-016, where the panel will not light up unless we do actually honor the panel_on_delay after exexuting the MIPI_SEQ_PANEL_ON sequence. What seems to set this model apart is that it is lacking a MIPI_SEQ_DEASSERT_RESET sequence, which is where the power-on delay usually happens. Fix the panel not lighting up on this model by using an unconditional msleep(panel_on_delay) instead of intel_dsi_msleep() when there is no MIPI_SEQ_DEASSERT_RESET sequence. Fixes: 25b4620ee822 ("drm/i915/dsi: Skip delays for v3 VBTs in vid-mode") Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201118124058.26021-1-hdegoede@redhat.com (cherry picked from commit 6fdb335f1c9c0845b50625de1624d8445c4c4a07) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/vlv_dsi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index d52f9c177908..f94025ec603a 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -812,10 +812,20 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, intel_dsi_prepare(encoder, pipe_config); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); - /* Deassert reset */ - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + /* + * Give the panel time to power-on and then deassert its reset. + * Depending on the VBT MIPI sequences version the deassert-seq + * may contain the necessary delay, intel_dsi_msleep() will skip + * the delay in that case. If there is no deassert-seq, then an + * unconditional msleep is used to give the panel time to power-on. + */ + if (dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) { + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + } else { + msleep(intel_dsi->panel_on_delay); + } if (IS_GEMINILAKE(dev_priv)) { glk_cold_boot = glk_dsi_enable_io(encoder); -- cgit 1.4.1 From 057fe3535eb35696ad5a849d01d61efa930d2182 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Jan 2021 20:39:05 +0000 Subject: drm/i915: Disable RPM wakeref assertions during driver shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with the regular suspend paths, also disable the wakeref assertions as we disable the driver during shutdown. Reported-by: Hans de Goede Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2899 Fixes: fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Hans de Goede Tested-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210104203905.19248-1-chris@chris-wilson.co.uk (cherry picked from commit 19fe4ac6f0e7163daf9375a4d39947389ae465fa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 88ad754962af..99eb0d7bbc44 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1047,6 +1047,8 @@ static void intel_shutdown_encoders(struct drm_i915_private *dev_priv) void i915_driver_shutdown(struct drm_i915_private *i915) { + disable_rpm_wakeref_asserts(&i915->runtime_pm); + i915_gem_suspend(i915); drm_kms_helper_poll_disable(&i915->drm); @@ -1060,6 +1062,8 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_suspend_encoders(i915); intel_shutdown_encoders(i915); + + enable_rpm_wakeref_asserts(&i915->runtime_pm); } static bool suspend_to_idle(struct drm_i915_private *dev_priv) -- cgit 1.4.1 From bb83d5fb550bb7db75b29e6342417fda2bbb691c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Jan 2021 17:28:41 +0200 Subject: drm/i915/backlight: fix CPU mode backlight takeover on LPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pch_get_backlight(), lpt_get_backlight(), and lpt_set_backlight() functions operate directly on the hardware registers. If inverting the value is needed, using intel_panel_compute_brightness(), it should only be done in the interface between hardware registers and panel->backlight.level. The CPU mode takeover code added in commit 5b1ec9ac7ab5 ("drm/i915/backlight: Fix backlight takeover on LPT, v3.") reads the hardware register and converts to panel->backlight.level correctly, however the value written back should remain in the hardware register "domain". This hasn't been an issue, because GM45 machines are the only known users of i915.invert_brightness and the brightness invert quirk, and without one of them no conversion is made. It's likely nobody's ever hit the problem. Fixes: 5b1ec9ac7ab5 ("drm/i915/backlight: Fix backlight takeover on LPT, v3.") Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Lyude Paul Cc: # v5.1+ Reviewed-by: Lyude Paul Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210108152841.6944-1-jani.nikula@intel.com (cherry picked from commit 0d4ced1c5bfe649196877d90442d4fd618e19153) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_panel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 9f23bac0d792..d64fce1a17cb 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1650,16 +1650,13 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus val = pch_get_backlight(connector); else val = lpt_get_backlight(connector); - val = intel_panel_compute_brightness(connector, val); - panel->backlight.level = clamp(val, panel->backlight.min, - panel->backlight.max); if (cpu_mode) { drm_dbg_kms(&dev_priv->drm, "CPU backlight register was enabled, switching to PCH override\n"); /* Write converted CPU PWM value to PCH override register */ - lpt_set_backlight(connector->base.state, panel->backlight.level); + lpt_set_backlight(connector->base.state, val); intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE); @@ -1667,6 +1664,10 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus cpu_ctl2 & ~BLM_PWM_ENABLE); } + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); + return 0; } -- cgit 1.4.1 From d434ab6db524ab1efd0afad4ffa1ee65ca6ac097 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 11 Jan 2021 04:00:30 +0000 Subject: io_uring: drop mm and files after task_work_run __io_req_task_submit() run by task_work can set mm and files, but io_sq_thread() in some cases, and because __io_sq_thread_acquire_mm() and __io_sq_thread_acquire_files() do a simple current->mm/files check it may end up submitting IO with mm/files of another task. We also need to drop it after in the end to drop potentially grabbed references to them. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2f305c097bd5..7af74c1ec909 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7056,6 +7056,7 @@ static int io_sq_thread(void *data) if (sqt_spin || !time_after(jiffies, timeout)) { io_run_task_work(); + io_sq_thread_drop_mm_files(); cond_resched(); if (sqt_spin) timeout = jiffies + sqd->sq_thread_idle; @@ -7093,6 +7094,7 @@ static int io_sq_thread(void *data) } io_run_task_work(); + io_sq_thread_drop_mm_files(); if (cur_css) io_sq_thread_unassociate_blkcg(); -- cgit 1.4.1 From 621fadc22365f3cf307bcd9048e3372e9ee9cdcc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 11 Jan 2021 04:00:31 +0000 Subject: io_uring: don't take files/mm for a dead task In rare cases a task may be exiting while io_ring_exit_work() trying to cancel/wait its requests. It's ok for __io_sq_thread_acquire_mm() because of SQPOLL check, but is not for __io_sq_thread_acquire_files(). Play safe and fail for both of them. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7af74c1ec909..b0e6d8e607a3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1106,6 +1106,9 @@ static void io_sq_thread_drop_mm_files(void) static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx) { + if (current->flags & PF_EXITING) + return -EFAULT; + if (!current->files) { struct files_struct *files; struct nsproxy *nsproxy; @@ -1133,6 +1136,8 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) { struct mm_struct *mm; + if (current->flags & PF_EXITING) + return -EFAULT; if (current->mm) return 0; -- cgit 1.4.1 From 2af5268180410b874fc06be91a1b2fbb22b1be0c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 9 Dec 2020 17:39:52 +0200 Subject: drm/i915/icl: Fix initing the DSI DSC power refcount during HW readout For an enabled DSC during HW readout the corresponding power reference is taken along the CRTC power domain references in get_crtc_power_domains(). Remove the incorrect get ref from the DSI encoder hook. Fixes: 2b68392e638d ("drm/i915/dsi: add support for DSC") Cc: Vandita Kulkarni Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20201209153952.3397959-1-imre.deak@intel.com (cherry picked from commit 3a9ec563a4ff770ae647f6ee539810f1866866c9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/icl_dsi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index a9439b415603..b3533a32f8ba 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1616,10 +1616,6 @@ static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, get_dsi_io_power_domains(i915, enc_to_intel_dsi(encoder)); - - if (crtc_state->dsc.compression_enable) - intel_display_power_get(i915, - intel_dsc_power_domain(crtc_state)); } static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, -- cgit 1.4.1 From a58015d638cd4e4555297b04bec9b49028369075 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 7 Jan 2021 23:23:48 -0800 Subject: ACPI: scan: Harden acpi_device_add() against device ID overflows Linux VM on Hyper-V crashes with the latest mainline: [ 4.069624] detected buffer overflow in strcpy [ 4.077733] kernel BUG at lib/string.c:1149! .. [ 4.085819] RIP: 0010:fortify_panic+0xf/0x11 ... [ 4.085819] Call Trace: [ 4.085819] acpi_device_add.cold.15+0xf2/0xfb [ 4.085819] acpi_add_single_object+0x2a6/0x690 [ 4.085819] acpi_bus_check_add+0xc6/0x280 [ 4.085819] acpi_ns_walk_namespace+0xda/0x1aa [ 4.085819] acpi_walk_namespace+0x9a/0xc2 [ 4.085819] acpi_bus_scan+0x78/0x90 [ 4.085819] acpi_scan_init+0xfa/0x248 [ 4.085819] acpi_init+0x2c1/0x321 [ 4.085819] do_one_initcall+0x44/0x1d0 [ 4.085819] kernel_init_freeable+0x1ab/0x1f4 This is because of the recent buffer overflow detection in the commit 6a39e62abbaf ("lib: string.h: detect intra-object overflow in fortified string functions") Here acpi_device_bus_id->bus_id can only hold 14 characters, while the the acpi_device_hid(device) returns a 22-char string "HYPER_V_GEN_COUNTER_V1". Per ACPI Spec v6.2, Section 6.1.5 _HID (Hardware ID), if the ID is a string, it must be of the form AAA#### or NNNN####, i.e. 7 chars or 8 chars. The field bus_id in struct acpi_device_bus_id was originally defined as char bus_id[9], and later was enlarged to char bus_id[15] in 2007 in the commit bb0958544f3c ("ACPI: use more understandable bus_id for ACPI devices") Fix the issue by changing the field bus_id to const char *, and use kstrdup_const() to initialize it. Signed-off-by: Dexuan Cui Tested-By: Jethro Beekman [ rjw: Subject change, whitespace adjustment ] Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 2 +- drivers/acpi/scan.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index cb229e24c563..e6a5d997241c 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -97,7 +97,7 @@ void acpi_scan_table_handler(u32 event, void *table, void *context); extern struct list_head acpi_bus_id_list; struct acpi_device_bus_id { - char bus_id[15]; + const char *bus_id; unsigned int instance_no; struct list_head node; }; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 80b668c80073..58ff36340cd7 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -486,6 +486,7 @@ static void acpi_device_del(struct acpi_device *device) acpi_device_bus_id->instance_no--; else { list_del(&acpi_device_bus_id->node); + kfree_const(acpi_device_bus_id->bus_id); kfree(acpi_device_bus_id); } break; @@ -674,7 +675,14 @@ int acpi_device_add(struct acpi_device *device, } if (!found) { acpi_device_bus_id = new_bus_id; - strcpy(acpi_device_bus_id->bus_id, acpi_device_hid(device)); + acpi_device_bus_id->bus_id = + kstrdup_const(acpi_device_hid(device), GFP_KERNEL); + if (!acpi_device_bus_id->bus_id) { + pr_err(PREFIX "Memory allocation error for bus id\n"); + result = -ENOMEM; + goto err_free_new_bus_id; + } + acpi_device_bus_id->instance_no = 0; list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list); } @@ -709,6 +717,11 @@ int acpi_device_add(struct acpi_device *device, if (device->parent) list_del(&device->node); list_del(&device->wakeup_list); + + err_free_new_bus_id: + if (!found) + kfree(new_bus_id); + mutex_unlock(&acpi_device_lock); err_detach: -- cgit 1.4.1 From 843010a815e87b45fc6b64848f02e42f6aee3f22 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Mon, 11 Jan 2021 11:40:33 -0500 Subject: drm/ttm: Fix address passed to dma_mapping_error() in ttm_pool_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check_unmap() is producing a warning about a missing map error check. The return value from dma_map_page() should be checked for an error, not the caller-provided dma_addr. Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Signed-off-by: Jeremy Cline Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/413432/ Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index a00b7ab9c14c..255c45734979 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -190,7 +190,7 @@ static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, size_t size = (1ULL << order) * PAGE_SIZE; addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL); - if (dma_mapping_error(pool->dev, **dma_addr)) + if (dma_mapping_error(pool->dev, addr)) return -EFAULT; } -- cgit 1.4.1 From 7bb83f6fc4ee84e95d0ac0d14452c2619fb3fe70 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 8 Jan 2021 13:19:38 +0900 Subject: tracing/kprobes: Do the notrace functions check without kprobes on ftrace Enable the notrace function check on the architecture which doesn't support kprobes on ftrace but support dynamic ftrace. This notrace function check is not only for the kprobes on ftrace but also sw-breakpoint based kprobes. Thus there is no reason to limit this check for the arch which supports kprobes on ftrace. This also changes the dependency of Kconfig. Because kprobe event uses the function tracer's address list for identifying notrace function, if the CONFIG_DYNAMIC_FTRACE=n, it can not check whether the target function is notrace or not. Link: https://lkml.kernel.org/r/20210105065730.2634785-1-naveen.n.rao@linux.vnet.ibm.com Link: https://lkml.kernel.org/r/161007957862.114704.4512260007555399463.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 45408c4f92506 ("tracing: kprobes: Prohibit probing on notrace function") Acked-by: Naveen N. Rao Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- kernel/trace/trace_kprobe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d5a19413d4f8..c1a62ae7e812 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -538,7 +538,7 @@ config KPROBE_EVENTS config KPROBE_EVENTS_ON_NOTRACE bool "Do NOT protect notrace function from kprobe events" depends on KPROBE_EVENTS - depends on KPROBES_ON_FTRACE + depends on DYNAMIC_FTRACE default n help This is only for the developers who want to debug ftrace itself diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9c31f42245e9..e6fba1798771 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -434,7 +434,7 @@ static int disable_trace_kprobe(struct trace_event_call *call, return 0; } -#if defined(CONFIG_KPROBES_ON_FTRACE) && \ +#if defined(CONFIG_DYNAMIC_FTRACE) && \ !defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE) static bool __within_notrace_func(unsigned long addr) { -- cgit 1.4.1 From a5e92ef3c3fd46320d4e293bdec0cdd4b80a6e0f Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 10 Jan 2021 03:11:42 +0100 Subject: drm: Check actual format for legacy pageflip. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With modifiers one can actually have different format_info structs for the same format, which now matters for AMDGPU since we convert implicit modifiers to explicit modifiers with multiple planes. I checked other drivers and it doesn't look like they end up triggering this case so I think this is safe to relax. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Daniel Vetter Reviewed-by: Zhan Liu Acked-by: Christian König Acked-by: Alex Deucher Fixes: 816853f9dc40 ("drm/amd/display: Set new format info for converted metadata.") Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20210110021142.28221-1-bas@basnieuwenhuizen.nl --- drivers/gpu/drm/drm_plane.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index e6231947f987..a0cb746bcb0a 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1163,7 +1163,14 @@ retry: if (ret) goto out; - if (old_fb->format != fb->format) { + /* + * Only check the FOURCC format code, excluding modifiers. This is + * enough for all legacy drivers. Atomic drivers have their own + * checks in their ->atomic_check implementation, which will + * return -EINVAL if any hw or driver constraint is violated due + * to modifier changes. + */ + if (old_fb->format->format != fb->format->format) { DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n"); ret = -EINVAL; goto out; -- cgit 1.4.1 From 2896c93811e39d63a4d9b63ccf12a8fbc226e5e4 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Tue, 3 Nov 2020 02:21:58 +0100 Subject: scsi: target: Fix XCOPY NAA identifier lookup When attempting to match EXTENDED COPY CSCD descriptors with corresponding se_devices, target_xcopy_locate_se_dev_e4() currently iterates over LIO's global devices list which includes all configured backstores. This change ensures that only initiator-accessible backstores are considered during CSCD descriptor lookup, according to the session's se_node_acl LUN list. To avoid LUN removal race conditions, device pinning is changed from being configfs based to instead using the se_node_acl lun_ref. Reference: CVE-2020-28374 Fixes: cbf031f425fd ("target: Add support for EXTENDED_COPY copy offload emulation") Reviewed-by: Lee Duncan Signed-off-by: David Disseldorp Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 119 ++++++++++++++++++++++--------------- drivers/target/target_core_xcopy.h | 1 + 2 files changed, 71 insertions(+), 49 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 44e15d7fb2f0..66d6f1d06f21 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -46,60 +46,83 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) return 0; } -struct xcopy_dev_search_info { - const unsigned char *dev_wwn; - struct se_device *found_dev; -}; - +/** + * target_xcopy_locate_se_dev_e4_iter - compare XCOPY NAA device identifiers + * + * @se_dev: device being considered for match + * @dev_wwn: XCOPY requested NAA dev_wwn + * @return: 1 on match, 0 on no-match + */ static int target_xcopy_locate_se_dev_e4_iter(struct se_device *se_dev, - void *data) + const unsigned char *dev_wwn) { - struct xcopy_dev_search_info *info = data; unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; int rc; - if (!se_dev->dev_attrib.emulate_3pc) + if (!se_dev->dev_attrib.emulate_3pc) { + pr_debug("XCOPY: emulate_3pc disabled on se_dev %p\n", se_dev); return 0; + } memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); - rc = memcmp(&tmp_dev_wwn[0], info->dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); - if (rc != 0) - return 0; - - info->found_dev = se_dev; - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - - rc = target_depend_item(&se_dev->dev_group.cg_item); + rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); if (rc != 0) { - pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", - rc, se_dev); - return rc; + pr_debug("XCOPY: skip non-matching: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, tmp_dev_wwn); + return 0; } + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", - se_dev, &se_dev->dev_group); return 1; } -static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, - struct se_device **found_dev) +static int target_xcopy_locate_se_dev_e4(struct se_session *sess, + const unsigned char *dev_wwn, + struct se_device **_found_dev, + struct percpu_ref **_found_lun_ref) { - struct xcopy_dev_search_info info; - int ret; - - memset(&info, 0, sizeof(info)); - info.dev_wwn = dev_wwn; - - ret = target_for_each_device(target_xcopy_locate_se_dev_e4_iter, &info); - if (ret == 1) { - *found_dev = info.found_dev; - return 0; - } else { - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); - return -EINVAL; + struct se_dev_entry *deve; + struct se_node_acl *nacl; + struct se_lun *this_lun = NULL; + struct se_device *found_dev = NULL; + + /* cmd with NULL sess indicates no associated $FABRIC_MOD */ + if (!sess) + goto err_out; + + pr_debug("XCOPY 0xe4: searching for: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, dev_wwn); + + nacl = sess->se_node_acl; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_device *this_dev; + int rc; + + this_lun = rcu_dereference(deve->se_lun); + this_dev = rcu_dereference_raw(this_lun->lun_se_dev); + + rc = target_xcopy_locate_se_dev_e4_iter(this_dev, dev_wwn); + if (rc) { + if (percpu_ref_tryget_live(&this_lun->lun_ref)) + found_dev = this_dev; + break; + } } + rcu_read_unlock(); + if (found_dev == NULL) + goto err_out; + + pr_debug("lun_ref held for se_dev: %p se_dev->se_dev_group: %p\n", + found_dev, &found_dev->dev_group); + *_found_dev = found_dev; + *_found_lun_ref = &this_lun->lun_ref; + return 0; +err_out: + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; } static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, @@ -246,12 +269,16 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, switch (xop->op_origin) { case XCOL_SOURCE_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn, - &xop->dst_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->dst_tid_wwn, + &xop->dst_dev, + &xop->remote_lun_ref); break; case XCOL_DEST_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn, - &xop->src_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->src_tid_wwn, + &xop->src_dev, + &xop->remote_lun_ref); break; default: pr_err("XCOPY CSCD descriptor IDs not found in CSCD list - " @@ -391,18 +418,12 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) { - struct se_device *remote_dev; - if (xop->op_origin == XCOL_SOURCE_RECV_OP) - remote_dev = xop->dst_dev; + pr_debug("putting dst lun_ref for %p\n", xop->dst_dev); else - remote_dev = xop->src_dev; - - pr_debug("Calling configfs_undepend_item for" - " remote_dev: %p remote_dev->dev_group: %p\n", - remote_dev, &remote_dev->dev_group.cg_item); + pr_debug("putting src lun_ref for %p\n", xop->src_dev); - target_undepend_item(&remote_dev->dev_group.cg_item); + percpu_ref_put(xop->remote_lun_ref); } static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index c56a1bde9417..e5f20005179a 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -27,6 +27,7 @@ struct xcopy_op { struct se_device *dst_dev; unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + struct percpu_ref *remote_lun_ref; sector_t src_lba; sector_t dst_lba; -- cgit 1.4.1 From 938288349ca8a9d4b936bf5d2f6dd4526a598974 Mon Sep 17 00:00:00 2001 From: Seb Laveze Date: Mon, 11 Jan 2021 09:14:07 +0100 Subject: dt-bindings: net: dwmac: fix queue priority documentation The priority field is not the queue priority (queue priority is fixed) but a bitmask of priorities assigned to this queue. In receive, priorities relate to tagged frames priorities. In transmit, priorities relate to PFC frames. Signed-off-by: Seb Laveze Link: https://lore.kernel.org/r/20210111081406.1348622-1-sebastien.laveze@oss.nxp.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index b2f6083f556a..dfbf5fe4547a 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -161,7 +161,8 @@ properties: * snps,route-dcbcp, DCB Control Packets * snps,route-up, Untagged Packets * snps,route-multi-broad, Multicast & Broadcast Packets - * snps,priority, RX queue priority (Range 0x0 to 0xF) + * snps,priority, bitmask of the tagged frames priorities assigned to + the queue snps,mtl-tx-config: $ref: /schemas/types.yaml#/definitions/phandle @@ -188,7 +189,10 @@ properties: * snps,idle_slope, unlock on WoL * snps,high_credit, max write outstanding req. limit * snps,low_credit, max read outstanding req. limit - * snps,priority, TX queue priority (Range 0x0 to 0xF) + * snps,priority, bitmask of the priorities assigned to the queue. + When a PFC frame is received with priorities matching the bitmask, + the queue is blocked from transmitting for the pause time specified + in the PFC frame. snps,reset-gpio: deprecated: true -- cgit 1.4.1 From 6f83802a1a06e74eafbdbc9b52c05516d3083d02 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Sun, 10 Jan 2021 21:23:02 +0200 Subject: net: mvpp2: Remove Pause and Asym_Pause support Packet Processor hardware not connected to MAC flow control unit and cannot support TX flow control. This patch disable flow control support. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Acked-by: Marcin Wojtas Link: https://lore.kernel.org/r/1610306582-16641-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 4b1808acef58..358119d98358 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5882,8 +5882,6 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, Autoneg); phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); switch (state->interface) { case PHY_INTERFACE_MODE_10GBASER: -- cgit 1.4.1 From e56b3d94d939f52d46209b9e1b6700c5bfff3123 Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Fri, 8 Jan 2021 09:58:39 +0000 Subject: rndis_host: set proper input size for OID_GEN_PHYSICAL_MEDIUM request MSFT ActiveSync implementation requires that the size of the response for incoming query is to be provided in the request input length. Failure to set the input size proper results in failed request transfer, where the ActiveSync counterpart reports the NDIS_STATUS_INVALID_LENGTH (0xC0010014L) error. Set the input size for OID_GEN_PHYSICAL_MEDIUM query to the expected size of the response in order for the ActiveSync to properly respond to the request. Fixes: 039ee17d1baa ("rndis_host: Add RNDIS physical medium checking into generic_rndis_bind()") Signed-off-by: Andrey Zhizhikin Link: https://lore.kernel.org/r/20210108095839.3335-1-andrey.zhizhikin@leica-geosystems.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 6609d21ef894..f813ca9dec53 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -387,7 +387,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags) reply_len = sizeof *phym; retval = rndis_query(dev, intf, u.buf, RNDIS_OID_GEN_PHYSICAL_MEDIUM, - 0, (void **) &phym, &reply_len); + reply_len, (void **)&phym, &reply_len); if (retval != 0 || !phym) { /* OID is optional so don't fail here. */ phym_unspec = cpu_to_le32(RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED); -- cgit 1.4.1 From 29766bcffad03da66892bef82674883e31f78fec Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:32 -0500 Subject: net: support kmap_local forced debugging in skb_frag_foreach Skb frags may be backed by highmem and/or compound pages. Highmem pages need kmap_atomic mappings to access. But kmap_atomic maps a single page, not the entire compound page. skb_foreach_page iterates over an skb frag, in one step in the common case, page by page only if kmap_atomic must be called for each page. The decision logic is captured in skb_frag_must_loop. CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP extends kmap from highmem to all pages, to increase code coverage. Extend skb_frag_must_loop to this new condition. Link: https://lore.kernel.org/linux-mm/20210106180132.41dc249d@gandalf.local.home/ Fixes: 0e91a0c6984c ("mm/highmem: Provide CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP") Reported-by: Steven Rostedt (VMware) Signed-off-by: Linus Torvalds Signed-off-by: Willem de Bruijn Tested-by: Steven Rostedt (VMware) Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 333bcdc39635..c858adfb5a82 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -366,7 +366,7 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) - if (PageHighMem(p)) + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p)) return true; #endif return false; -- cgit 1.4.1 From 97550f6fa59254435d864b92603de3ca4b5a99f8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:33 -0500 Subject: net: compound page support in skb_seq_read skb_seq_read iterates over an skb, returning pointer and length of the next data range with each call. It relies on kmap_atomic to access highmem pages when needed. An skb frag may be backed by a compound page, but kmap_atomic maps only a single page. There are not enough kmap slots to always map all pages concurrently. Instead, if kmap_atomic is needed, iterate over each page. As this increases the number of calls, avoid this unless needed. The necessary condition is captured in skb_frag_must_loop. I tried to make the change as obvious as possible. It should be easy to verify that nothing changes if skb_frag_must_loop returns false. Tested: On an x86 platform with CONFIG_HIGHMEM=y CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y CONFIG_NETFILTER_XT_MATCH_STRING=y Run ip link set dev lo mtu 1500 iptables -A OUTPUT -m string --string 'badstring' -algo bm -j ACCEPT dd if=/dev/urandom of=in bs=1M count=20 nc -l -p 8000 > /dev/null & nc -w 1 -q 0 localhost 8000 < in Signed-off-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 28 +++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c858adfb5a82..5f60c9e907c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,7 @@ struct skb_seq_state { struct sk_buff *root_skb; struct sk_buff *cur_skb; __u8 *frag_data; + __u32 frag_off; }; void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b6f2b520a9b7..0da035c1e53f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3442,6 +3442,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, st->root_skb = st->cur_skb = skb; st->frag_idx = st->stepped_offset = 0; st->frag_data = NULL; + st->frag_off = 0; } EXPORT_SYMBOL(skb_prepare_seq_read); @@ -3496,14 +3497,27 @@ next_skb: st->stepped_offset += skb_headlen(st->cur_skb); while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { + unsigned int pg_idx, pg_off, pg_sz; + frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = skb_frag_size(frag) + st->stepped_offset; + pg_idx = 0; + pg_off = skb_frag_off(frag); + pg_sz = skb_frag_size(frag); + + if (skb_frag_must_loop(skb_frag_page(frag))) { + pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT; + pg_off = offset_in_page(pg_off + st->frag_off); + pg_sz = min_t(unsigned int, pg_sz - st->frag_off, + PAGE_SIZE - pg_off); + } + + block_limit = pg_sz + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) - st->frag_data = kmap_atomic(skb_frag_page(frag)); + st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx); - *data = (u8 *) st->frag_data + skb_frag_off(frag) + + *data = (u8 *)st->frag_data + pg_off + (abs_offset - st->stepped_offset); return block_limit - abs_offset; @@ -3514,8 +3528,12 @@ next_skb: st->frag_data = NULL; } - st->frag_idx++; - st->stepped_offset += skb_frag_size(frag); + st->stepped_offset += pg_sz; + st->frag_off += pg_sz; + if (st->frag_off == skb_frag_size(frag)) { + st->frag_off = 0; + st->frag_idx++; + } } if (st->frag_data) { -- cgit 1.4.1 From 9bd6b629c39e3fa9e14243a6d8820492be1a5b2e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:34 -0500 Subject: esp: avoid unneeded kmap_atomic call esp(6)_output_head uses skb_page_frag_refill to allocate a buffer for the esp trailer. It accesses the page with kmap_atomic to handle highmem. But skb_page_frag_refill can return compound pages, of which kmap_atomic only maps the first underlying page. skb_page_frag_refill does not return highmem, because flag __GFP_HIGHMEM is not set. ESP uses it in the same manner as TCP. That also does not call kmap_atomic, but directly uses page_address, in skb_copy_to_page_nocache. Do the same for ESP. This issue has become easier to trigger with recent kmap local debugging feature CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Signed-off-by: Willem de Bruijn Acked-by: Steffen Klassert Signed-off-by: Jakub Kicinski --- net/ipv4/esp4.c | 7 +------ net/ipv6/esp6.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 8b07f3a4f2db..a3271ec3e162 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -443,7 +443,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -485,14 +484,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 52c2f063529f..2b804fcebcc6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -478,7 +478,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -519,14 +518,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, -- cgit 1.4.1 From 2225a8dda263edc35a0e8b858fe2945cf6240fde Mon Sep 17 00:00:00 2001 From: Ariel Marcovitch Date: Sat, 2 Jan 2021 22:11:56 +0200 Subject: powerpc: Fix alignment bug within the init sections This is a bug that causes early crashes in builds with an .exit.text section smaller than a page and an .init.text section that ends in the beginning of a physical page (this is kinda random, which might explain why this wasn't really encountered before). The init sections are ordered like this: .init.text .exit.text .init.data Currently, these sections aren't page aligned. Because the init code might become read-only at runtime and because the .init.text section can potentially reside on the same physical page as .init.data, the beginning of .init.data might be mapped read-only along with .init.text. Then when the kernel tries to modify a variable in .init.data (like kthreadd_done, used in kernel_init()) the kernel panics. To avoid this, make _einittext page aligned and also align .exit.text to make sure .init.data is always seperated from the text segments. Fixes: 060ef9d89d18 ("powerpc32: PAGE_EXEC required for inittext") Signed-off-by: Ariel Marcovitch Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210102201156.10805-1-ariel.marcovitch@gmail.com --- arch/powerpc/kernel/vmlinux.lds.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8e0b1298bf19..4ab426b8b0e0 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -187,6 +187,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT + + /* + *.init.text might be RO so we must ensure this section ends on + * a page boundary. + */ + . = ALIGN(PAGE_SIZE); _einittext = .; #ifdef CONFIG_PPC64 *(.tramp.ftrace.init); @@ -200,6 +206,8 @@ SECTIONS EXIT_TEXT } + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(16) . = ALIGN(8); -- cgit 1.4.1 From bb52cb0dec8d2fecdb22843a805131478a180728 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 11 Jan 2021 14:44:57 +0100 Subject: drm/ttm: make the pool shrinker lock a mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set_pages_wb() might sleep and so we can't do this in an atomic context. Signed-off-by: Christian König Reported-by: Mikhail Gavrilov Tested-by: Mikhail Gavrilov Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/413409/ --- drivers/gpu/drm/ttm/ttm_pool.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 255c45734979..8cd776adc592 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -66,7 +66,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER]; static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; -static spinlock_t shrinker_lock; +static struct mutex shrinker_lock; static struct list_head shrinker_list; static struct shrinker mm_shrinker; @@ -249,9 +249,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, spin_lock_init(&pt->lock); INIT_LIST_HEAD(&pt->pages); - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); list_add_tail(&pt->shrinker_list, &shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); } /* Remove a pool_type from the global shrinker list and free all pages */ @@ -259,9 +259,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt) { struct page *p, *tmp; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); list_del(&pt->shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); list_for_each_entry_safe(p, tmp, &pt->pages, lru) ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); @@ -302,7 +302,7 @@ static unsigned int ttm_pool_shrink(void) unsigned int num_freed; struct page *p; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); p = ttm_pool_type_take(pt); @@ -314,7 +314,7 @@ static unsigned int ttm_pool_shrink(void) } list_move_tail(&pt->shrinker_list, &shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); return num_freed; } @@ -564,7 +564,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); seq_puts(m, "\t "); for (i = 0; i < MAX_ORDER; ++i) @@ -600,7 +600,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) seq_printf(m, "\ntotal\t: %8lu of %8lu\n", atomic_long_read(&allocated_pages), page_pool_size); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); return 0; } @@ -644,7 +644,7 @@ int ttm_pool_mgr_init(unsigned long num_pages) if (!page_pool_size) page_pool_size = num_pages; - spin_lock_init(&shrinker_lock); + mutex_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); for (i = 0; i < MAX_ORDER; ++i) { -- cgit 1.4.1 From 2d6ffc63f12417b979955a5b22ad9a76d2af5de9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:20 +0800 Subject: iommu/vt-d: Fix unaligned addresses for intel_flush_svm_range_dev() The VT-d hardware will ignore those Addr bits which have been masked by the AM field in the PASID-based-IOTLB invalidation descriptor. As the result, if the starting address in the descriptor is not aligned with the address mask, some IOTLB caches might not invalidate. Hence people will see below errors. [ 1093.704661] dmar_fault: 29 callbacks suppressed [ 1093.704664] DMAR: DRHD: handling fault status reg 3 [ 1093.712738] DMAR: [DMA Read] Request device [7a:02.0] PASID 2 fault addr 7f81c968d000 [fault reason 113] SM: Present bit in first-level paging entry is clear Fix this by using aligned address for PASID-based-IOTLB invalidation. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Reported-and-tested-by: Guo Kaijie Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 790ef3497e7e..18a9f05df407 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -118,8 +118,10 @@ void intel_svm_check(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_SVM_CAPABLE; } -static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, - unsigned long address, unsigned long pages, int ih) +static void __flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) { struct qi_desc desc; @@ -170,6 +172,22 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } } +static void intel_flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) +{ + unsigned long shift = ilog2(__roundup_pow_of_two(pages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); + unsigned long start = ALIGN_DOWN(address, align); + unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); + + while (start < end) { + __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); + start += align; + } +} + static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, unsigned long pages, int ih) { -- cgit 1.4.1 From b812834b5329fe78d643c9a61350d227db904361 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 9 Jan 2021 17:56:21 +0100 Subject: iommu: arm-smmu-qcom: Add sdm630/msm8998 compatibles for qcom quirks SDM630 and MSM8998 are among the SoCs that use Qualcomm's implementation of SMMUv2 which has already proven to be problematic over the years. Add their compatibles to the lookup list to prevent the platforms from being shut down by the hypervisor at MMU probe. Signed-off-by: Konrad Dybcio Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20210109165622.149777-1-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 1b83d140742f..bcda17012aee 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -325,7 +325,9 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, } static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { + { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, -- cgit 1.4.1 From 694a1c0adebee9152a9ba0320468f7921aca647d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 28 Dec 2020 09:26:14 +0800 Subject: iommu/vt-d: Fix duplicate included linux/dma-map-ops.h linux/dma-map-ops.h is included more than once, Remove the one that isn't necessary. Signed-off-by: Tian Tao Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609118774-10083-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 65cf06d70bf4..f665322a0991 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- cgit 1.4.1 From ffaf97899c4a58b9fefb11534f730785443611a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:18 +0000 Subject: drm/i915/gt: Limit VFE threads based on GT MEDIA_STATE_VFE only accepts the 'maximum number of threads' in the range [0, n-1] where n is #EU * (#threads/EU) with the number of threads based on plaform and the number of EU based on the number of slices and subslices. This is a fixed number per platform/gt, so appropriately limit the number of threads we spawn to match the device. v2: Oversaturate the system with tasks to force execution on every HW thread; if the thread idles it is returned to the pool and may be reused again before an unused thread. v3: Fix more state commands, which was causing Baytrail to barf. v4: STATE_CACHE_INVALIDATE requires a stall on Ivybridge Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2024 Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Jon Bloomfield Cc: Rodrigo Vivi Cc: Randy Wright Cc: stable@vger.kernel.org # v5.7+ Reviewed-by: Akeem G Abodunrin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-1-chris@chris-wilson.co.uk (cherry picked from commit eebfb32e26851662d24ea86dd381fd0f83cd4b47) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen7_renderclear.c | 157 +++++++++++++++++------------ 1 file changed, 94 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index d93d85cd3027..94465374ca2f 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -7,8 +7,6 @@ #include "i915_drv.h" #include "intel_gpu_commands.h" -#define MAX_URB_ENTRIES 64 -#define STATE_SIZE (4 * 1024) #define GT3_INLINE_DATA_DELAYS 0x1E00 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) @@ -34,38 +32,59 @@ struct batch_chunk { }; struct batch_vals { - u32 max_primitives; - u32 max_urb_entries; - u32 cmd_size; - u32 state_size; + u32 max_threads; u32 state_start; - u32 batch_size; + u32 surface_start; u32 surface_height; u32 surface_width; - u32 scratch_size; - u32 max_size; + u32 size; }; +static inline int num_primitives(const struct batch_vals *bv) +{ + /* + * We need to saturate the GPU with work in order to dispatch + * a shader on every HW thread, and clear the thread-local registers. + * In short, we have to dispatch work faster than the shaders can + * run in order to fill the EU and occupy each HW thread. + */ + return bv->max_threads; +} + static void batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) { if (IS_HASWELL(i915)) { - bv->max_primitives = 280; - bv->max_urb_entries = MAX_URB_ENTRIES; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: + bv->max_threads = 70; + break; + case 2: + bv->max_threads = 140; + break; + case 3: + bv->max_threads = 280; + break; + } bv->surface_height = 16 * 16; bv->surface_width = 32 * 2 * 16; } else { - bv->max_primitives = 128; - bv->max_urb_entries = MAX_URB_ENTRIES / 2; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: /* including vlv */ + bv->max_threads = 36; + break; + case 2: + bv->max_threads = 128; + break; + } bv->surface_height = 16 * 8; bv->surface_width = 32 * 16; } - bv->cmd_size = bv->max_primitives * 4096; - bv->state_size = STATE_SIZE; - bv->state_start = bv->cmd_size; - bv->batch_size = bv->cmd_size + bv->state_size; - bv->scratch_size = bv->surface_height * bv->surface_width; - bv->max_size = bv->batch_size + bv->scratch_size; + bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K); + bv->surface_start = bv->state_start + SZ_4K; + bv->size = bv->surface_start + bv->surface_height * bv->surface_width; } static void batch_init(struct batch_chunk *bc, @@ -155,7 +174,8 @@ static u32 gen7_fill_binding_table(struct batch_chunk *state, const struct batch_vals *bv) { - u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + u32 surface_start = + gen7_fill_surface_state(state, bv->surface_start, bv); u32 *cs = batch_alloc_items(state, 32, 8); u32 offset = batch_offset(state, cs); @@ -214,9 +234,9 @@ static void gen7_emit_state_base_address(struct batch_chunk *batch, u32 surface_state_base) { - u32 *cs = batch_alloc_items(batch, 0, 12); + u32 *cs = batch_alloc_items(batch, 0, 10); - *cs++ = STATE_BASE_ADDRESS | (12 - 2); + *cs++ = STATE_BASE_ADDRESS | (10 - 2); /* general */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* surface */ @@ -233,8 +253,6 @@ gen7_emit_state_base_address(struct batch_chunk *batch, *cs++ = BASE_ADDRESS_MODIFY; *cs++ = 0; *cs++ = BASE_ADDRESS_MODIFY; - *cs++ = 0; - *cs++ = 0; batch_advance(batch, cs); } @@ -244,8 +262,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, u32 urb_size, u32 curbe_size, u32 mode) { - u32 urb_entries = bv->max_urb_entries; - u32 threads = bv->max_primitives - 1; + u32 threads = bv->max_threads - 1; u32 *cs = batch_alloc_items(batch, 32, 8); *cs++ = MEDIA_VFE_STATE | (8 - 2); @@ -254,7 +271,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, *cs++ = 0; /* number of threads & urb entries for GPGPU vs Media Mode */ - *cs++ = threads << 16 | urb_entries << 8 | mode << 2; + *cs++ = threads << 16 | 1 << 8 | mode << 2; *cs++ = 0; @@ -293,17 +310,12 @@ gen7_emit_media_object(struct batch_chunk *batch, { unsigned int x_offset = (media_object_index % 16) * 64; unsigned int y_offset = (media_object_index / 16) * 16; - unsigned int inline_data_size; - unsigned int media_batch_size; - unsigned int i; + unsigned int pkt = 6 + 3; u32 *cs; - inline_data_size = 112 * 8; - media_batch_size = inline_data_size + 6; - - cs = batch_alloc_items(batch, 8, media_batch_size); + cs = batch_alloc_items(batch, 8, pkt); - *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + *cs++ = MEDIA_OBJECT | (pkt - 2); /* interface descriptor offset */ *cs++ = 0; @@ -317,25 +329,44 @@ gen7_emit_media_object(struct batch_chunk *batch, *cs++ = 0; /* inline */ - *cs++ = (y_offset << 16) | (x_offset); + *cs++ = y_offset << 16 | x_offset; *cs++ = 0; *cs++ = GT3_INLINE_DATA_DELAYS; - for (i = 3; i < inline_data_size; i++) - *cs++ = 0; batch_advance(batch, cs); } static void gen7_emit_pipeline_flush(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 5); + u32 *cs = batch_alloc_items(batch, 0, 4); - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + + batch_advance(batch, cs); +} + +static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 8); + + /* ivb: Stall before STATE_CACHE_INVALIDATE */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_CS_STALL; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); } @@ -344,34 +375,34 @@ static void emit_batch(struct i915_vma * const vma, const struct batch_vals *bv) { struct drm_i915_private *i915 = vma->vm->i915; - unsigned int desc_count = 64; - const u32 urb_size = 112; + const unsigned int desc_count = 1; + const unsigned int urb_size = 1; struct batch_chunk cmds, state; - u32 interface_descriptor; + u32 descriptors; unsigned int i; - batch_init(&cmds, vma, start, 0, bv->cmd_size); - batch_init(&state, vma, start, bv->state_start, bv->state_size); + batch_init(&cmds, vma, start, 0, bv->state_start); + batch_init(&state, vma, start, bv->state_start, SZ_4K); - interface_descriptor = - gen7_fill_interface_descriptor(&state, bv, - IS_HASWELL(i915) ? - &cb_kernel_hsw : - &cb_kernel_ivb, - desc_count); - gen7_emit_pipeline_flush(&cmds); + descriptors = gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : + &cb_kernel_ivb, + desc_count); + + gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); - gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_invalidate(&cmds); + gen7_emit_pipeline_flush(&cmds); + gen7_emit_state_base_address(&cmds, descriptors); + gen7_emit_pipeline_invalidate(&cmds); gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); - gen7_emit_interface_descriptor_load(&cmds, - interface_descriptor, - desc_count); - - for (i = 0; i < bv->max_primitives; i++) + for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); batch_add(&cmds, MI_BATCH_BUFFER_END); @@ -385,15 +416,15 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, batch_get_defaults(engine->i915, &bv); if (!vma) - return bv.max_size; + return bv.size; - GEM_BUG_ON(vma->obj->base.size < bv.max_size); + GEM_BUG_ON(vma->obj->base.size < bv.size); batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); if (IS_ERR(batch)) return PTR_ERR(batch); - emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + emit_batch(vma, memset(batch, 0, bv.size), &bv); i915_gem_object_flush_map(vma->obj); __i915_gem_object_release_map(vma->obj); -- cgit 1.4.1 From 09aa9e45863e9e25dfbf350bae89fc3c2964482c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:19 +0000 Subject: drm/i915/gt: Restore clear-residual mitigations for Ivybridge, Baytrail The mitigation is required for all gen7 platforms, now that it does not cause GPU hangs, restore it for Ivybridge and Baytrail. Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Bloomfield Jon Reviewed-by: Akeem G Abodunrin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-2-chris@chris-wilson.co.uk (cherry picked from commit 008ead6ef8f588a8c832adfe9db201d9be5fd410) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a41b43f445b8..d809789f9cfb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1290,7 +1290,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); - if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { err = gen7_ctx_switch_bb_init(engine); if (err) goto err_ring_unpin; -- cgit 1.4.1 From 984cadea032b103c5824a5f29d0a36b3e9df6333 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:20 +0000 Subject: drm/i915: Allow the sysadmin to override security mitigations The clear-residuals mitigation is a relatively heavy hammer and under some circumstances the user may wish to forgo the context isolation in order to meet some performance requirement. Introduce a generic module parameter to allow selectively enabling/disabling different mitigations. To disable just the clear-residuals mitigation (on Ivybridge, Baytrail, or Haswell) use the module parameter: i915.mitigations=auto,!residuals Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1858 Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jon Bloomfield Cc: Rodrigo Vivi Cc: stable@vger.kernel.org # v5.7 Reviewed-by: Jon Bloomfield Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-3-chris@chris-wilson.co.uk (cherry picked from commit f7452c7cbd5b5dfb9a6c84cb20bea04c89be50cd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/gt/intel_ring_submission.c | 4 +- drivers/gpu/drm/i915/i915_mitigations.c | 146 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_mitigations.h | 13 +++ 4 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_mitigations.c create mode 100644 drivers/gpu/drm/i915/i915_mitigations.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e5574e506a5c..6d9e81ea67f4 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_config.o \ i915_irq.o \ i915_getparam.o \ + i915_mitigations.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index d809789f9cfb..ecf3a6118a6d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -32,6 +32,7 @@ #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" +#include "i915_mitigations.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" @@ -886,7 +887,8 @@ static int switch_context(struct i915_request *rq) GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); if (engine->wa_ctx.vma && ce != engine->kernel_context) { - if (engine->wa_ctx.vma->private != ce) { + if (engine->wa_ctx.vma->private != ce && + i915_mitigate_clear_residuals()) { ret = clear_residuals(rq); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_mitigations.c b/drivers/gpu/drm/i915/i915_mitigations.c new file mode 100644 index 000000000000..84f12598d145 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include +#include +#include + +#include "i915_drv.h" +#include "i915_mitigations.h" + +static unsigned long mitigations __read_mostly = ~0UL; + +enum { + CLEAR_RESIDUALS = 0, +}; + +static const char * const names[] = { + [CLEAR_RESIDUALS] = "residuals", +}; + +bool i915_mitigate_clear_residuals(void) +{ + return READ_ONCE(mitigations) & BIT(CLEAR_RESIDUALS); +} + +static int mitigations_set(const char *val, const struct kernel_param *kp) +{ + unsigned long new = ~0UL; + char *str, *sep, *tok; + bool first = true; + int err = 0; + + BUILD_BUG_ON(ARRAY_SIZE(names) >= BITS_PER_TYPE(mitigations)); + + str = kstrdup(val, GFP_KERNEL); + if (!str) + return -ENOMEM; + + for (sep = str; (tok = strsep(&sep, ","));) { + bool enable = true; + int i; + + /* Be tolerant of leading/trailing whitespace */ + tok = strim(tok); + + if (first) { + first = false; + + if (!strcmp(tok, "auto")) + continue; + + new = 0; + if (!strcmp(tok, "off")) + continue; + } + + if (*tok == '!') { + enable = !enable; + tok++; + } + + if (!strncmp(tok, "no", 2)) { + enable = !enable; + tok += 2; + } + + if (*tok == '\0') + continue; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if (!strcmp(tok, names[i])) { + if (enable) + new |= BIT(i); + else + new &= ~BIT(i); + break; + } + } + if (i == ARRAY_SIZE(names)) { + pr_err("Bad \"%s.mitigations=%s\", '%s' is unknown\n", + DRIVER_NAME, val, tok); + err = -EINVAL; + break; + } + } + kfree(str); + if (err) + return err; + + WRITE_ONCE(mitigations, new); + return 0; +} + +static int mitigations_get(char *buffer, const struct kernel_param *kp) +{ + unsigned long local = READ_ONCE(mitigations); + int count, i; + bool enable; + + if (!local) + return scnprintf(buffer, PAGE_SIZE, "%s\n", "off"); + + if (local & BIT(BITS_PER_LONG - 1)) { + count = scnprintf(buffer, PAGE_SIZE, "%s,", "auto"); + enable = false; + } else { + enable = true; + count = 0; + } + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if ((local & BIT(i)) != enable) + continue; + + count += scnprintf(buffer + count, PAGE_SIZE - count, + "%s%s,", enable ? "" : "!", names[i]); + } + + buffer[count - 1] = '\n'; + return count; +} + +static const struct kernel_param_ops ops = { + .set = mitigations_set, + .get = mitigations_get, +}; + +module_param_cb_unsafe(mitigations, &ops, NULL, 0600); +MODULE_PARM_DESC(mitigations, +"Selectively enable security mitigations for all Intel® GPUs in the system.\n" +"\n" +" auto -- enables all mitigations required for the platform [default]\n" +" off -- disables all mitigations\n" +"\n" +"Individual mitigations can be enabled by passing a comma-separated string,\n" +"e.g. mitigations=residuals to enable only clearing residuals or\n" +"mitigations=auto,noresiduals to disable only the clear residual mitigation.\n" +"Either '!' or 'no' may be used to switch from enabling the mitigation to\n" +"disabling it.\n" +"\n" +"Active mitigations for Ivybridge, Baytrail, Haswell:\n" +" residuals -- clear all thread-local registers between contexts" +); diff --git a/drivers/gpu/drm/i915/i915_mitigations.h b/drivers/gpu/drm/i915/i915_mitigations.h new file mode 100644 index 000000000000..1359d8135287 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_MITIGATIONS_H__ +#define __I915_MITIGATIONS_H__ + +#include + +bool i915_mitigate_clear_residuals(void); + +#endif /* __I915_MITIGATIONS_H__ */ -- cgit 1.4.1 From d78050ee35440d7879ed94011c52994b8932e96e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 7 Jan 2021 14:40:08 +0000 Subject: arm64: Remove arm64_dma32_phys_limit and its uses With the introduction of a dynamic ZONE_DMA range based on DT or IORT information, there's no need for CMA allocations from the wider ZONE_DMA32 since on most platforms ZONE_DMA will cover the 32-bit addressable range. Remove the arm64_dma32_phys_limit and set arm64_dma_phys_limit to cover the smallest DMA range required on the platform. CMA allocation and crashkernel reservation now go in the dynamically sized ZONE_DMA, allowing correct functionality on RPi4. Signed-off-by: Catalin Marinas Cc: Chen Zhou Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne # On RPi4B --- arch/arm64/include/asm/processor.h | 3 +-- arch/arm64/mm/init.c | 33 ++++++++++++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 69ad25fbeae4..ca2cd75d3286 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -94,8 +94,7 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; -extern phys_addr_t arm64_dma32_phys_limit; -#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1) +#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7deddf56f7c3..709d98fea90c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); /* - * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of - * memory as some devices, namely the Raspberry Pi 4, have peripherals with - * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 - * bit addressable memory area. + * If the corresponding config options are enabled, we create both ZONE_DMA + * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory + * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). + * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, + * otherwise it is empty. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -84,7 +84,7 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, + crash_base = memblock_find_in_range(0, arm64_dma_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -196,6 +196,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; unsigned int __maybe_unused acpi_zone_dma_bits; unsigned int __maybe_unused dt_zone_dma_bits; + phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32); #ifdef CONFIG_ZONE_DMA acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); @@ -205,8 +206,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); + if (!arm64_dma_phys_limit) + arm64_dma_phys_limit = dma32_phys_limit; #endif + if (!arm64_dma_phys_limit) + arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -394,16 +399,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma32_phys_limit = max_zone_phys(32); - else - arm64_dma32_phys_limit = PHYS_MASK + 1; - reserve_elfcorehdr(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - - dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -438,6 +436,11 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); + /* + * Reserve the CMA area after arm64_dma_phys_limit was initialised. + */ + dma_contiguous_reserve(arm64_dma_phys_limit); + /* * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. @@ -455,7 +458,7 @@ void __init bootmem_init(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; -- cgit 1.4.1 From 8e14f610159d524cd7aac37982826d3ef75c09e8 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Sat, 9 Jan 2021 15:17:06 +0000 Subject: dm crypt: do not call bio_endio() from the dm-crypt tasklet Sometimes, when dm-crypt executes decryption in a tasklet, we may get "BUG: KASAN: use-after-free in tasklet_action_common.constprop..." with a kasan-enabled kernel. When the decryption fully completes in the tasklet, dm-crypt will call bio_endio(), which in turn will call clone_endio() from dm.c core code. That function frees the resources associated with the bio, including per bio private structures. For dm-crypt it will free the current struct dm_crypt_io, which contains our tasklet object, causing use-after-free, when the tasklet is being dequeued by the kernel. To avoid this, do not call bio_endio() from the current tasklet context, but delay its execution to the dm-crypt IO workqueue. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: # v5.9+ Signed-off-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d2d6d3000f5b..b4d7418b5036 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1730,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_io_bio_endio(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + bio_endio(io->base_bio); +} + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1752,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io) kfree(io->integrity_metadata); base_bio->bi_status = error; - bio_endio(base_bio); + + /* + * If we are running this function from our tasklet, + * we can't call bio_endio() here, because it will call + * clone_endio() from dm.c, which in turn will + * free the current struct dm_crypt_io structure with + * our tasklet. In this case we need to delay bio_endio() + * execution to after the tasklet is done and dequeued. + */ + if (tasklet_trylock(&io->tasklet)) { + tasklet_unlock(&io->tasklet); + bio_endio(base_bio); + return; + } + + INIT_WORK(&io->work, kcryptd_io_bio_endio); + queue_work(cc->io_queue, &io->work); } /* -- cgit 1.4.1 From 17ffc193cdc6dc7a613d00d8ad47fc1f801b9bf0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 12 Jan 2021 14:54:47 -0500 Subject: dm integrity: fix the maximum number of arguments Advance the maximum number of arguments from 9 to 15 to account for all potential feature flags that may be supplied. Linux 4.19 added "meta_device" (356d9d52e1221ba0c9f10b8b38652f78a5298329) and "recalculate" (a3fcf7253139609bf9ff901fbf955fba047e75dd) flags. Commit 468dfca38b1a6fbdccd195d875599cb7c8875cd9 added "sectors_per_bit" and "bitmap_flush_interval". Commit 84597a44a9d86ac949900441cea7da0af0f2f473 added "allow_discards". And the commit d537858ac8aaf4311b51240893add2fc62003b97 added "fix_padding". Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 11c7c538f7a9..81df019ab284 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3792,7 +3792,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned extra_args; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 9, "Invalid number of feature args"}, + {0, 15, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; -- cgit 1.4.1 From df85bc140a4d6cbaa78d8e9c35154e1a2f0622c7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 11 Jan 2021 18:07:07 +0100 Subject: net: dcb: Accept RTM_GETDCB messages carrying set-like DCB commands In commit 826f328e2b7e ("net: dcb: Validate netlink message in DCB handler"), Linux started rejecting RTM_GETDCB netlink messages if they contained a set-like DCB_CMD_ command. The reason was that privileges were only verified for RTM_SETDCB messages, but the value that determined the action to be taken is the command, not the message type. And validation of message type against the DCB command was the obvious missing piece. Unfortunately it turns out that mlnx_qos, a somewhat widely deployed tool for configuration of DCB, accesses the DCB set-like APIs through RTM_GETDCB. Therefore do not bounce the discrepancy between message type and command. Instead, in addition to validating privileges based on the actual message type, validate them also based on the expected message type. This closes the loophole of allowing DCB configuration on non-admin accounts, while maintaining backward compatibility. Fixes: 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver") Fixes: 826f328e2b7e ("net: dcb: Validate netlink message in DCB handler") Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/a3edcfda0825f2aa2591801c5232f2bbf2d8a554.1610384801.git.me@pmachata.org Signed-off-by: Jakub Kicinski --- net/dcb/dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 7d49b6fd6cef..653e3bc9c87b 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1765,7 +1765,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, fn = &reply_funcs[dcb->cmd]; if (!fn->cb) return -EOPNOTSUPP; - if (fn->type != nlh->nlmsg_type) + if (fn->type == RTM_SETDCB && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!tb[DCB_ATTR_IFNAME]) -- cgit 1.4.1 From 8ff60eb052eeba95cfb3efe16b08c9199f8121cf Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 12 Jan 2021 15:49:04 -0800 Subject: mm, slub: consider rest of partial list if acquire_slab() fails acquire_slab() fails if there is contention on the freelist of the page (probably because some other CPU is concurrently freeing an object from the page). In that case, it might make sense to look for a different page (since there might be more remote frees to the page from other CPUs, and we don't want contention on struct page). However, the current code accidentally stops looking at the partial list completely in that case. Especially on kernels without CONFIG_NUMA set, this means that get_partial() fails and new_slab_objects() falls back to new_slab(), allocating new pages. This could lead to an unnecessary increase in memory fragmentation. Link: https://lkml.kernel.org/r/20201228130853.1871516-1-jannh@google.com Fixes: 7ced37197196 ("slub: Acquire_slab() avoid loop") Signed-off-by: Jann Horn Acked-by: David Rientjes Acked-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index dc5b42e700b8..d9e4e10683cc 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,7 +1973,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) - break; + continue; /* cmpxchg raced */ available += objects; if (!object) { -- cgit 1.4.1 From ce8f86ee94fabcc98537ddccd7e82cfd360a4dc5 Mon Sep 17 00:00:00 2001 From: Hailong liu Date: Tue, 12 Jan 2021 15:49:08 -0800 Subject: mm/page_alloc: add a missing mm_page_alloc_zone_locked() tracepoint The trace point *trace_mm_page_alloc_zone_locked()* in __rmqueue() does not currently cover all branches. Add the missing tracepoint and check the page before do that. [akpm@linux-foundation.org: use IS_ENABLED() to suppress warning] Link: https://lkml.kernel.org/r/20201228132901.41523-1-carver4lio@163.com Signed-off-by: Hailong liu Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bdbec4c98173..027f6481ba59 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2862,20 +2862,20 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, { struct page *page; -#ifdef CONFIG_CMA - /* - * Balance movable allocations between regular and CMA areas by - * allocating from CMA when over half of the zone's free memory - * is in the CMA area. - */ - if (alloc_flags & ALLOC_CMA && - zone_page_state(zone, NR_FREE_CMA_PAGES) > - zone_page_state(zone, NR_FREE_PAGES) / 2) { - page = __rmqueue_cma_fallback(zone, order); - if (page) - return page; + if (IS_ENABLED(CONFIG_CMA)) { + /* + * Balance movable allocations between regular and CMA areas by + * allocating from CMA when over half of the zone's free memory + * is in the CMA area. + */ + if (alloc_flags & ALLOC_CMA && + zone_page_state(zone, NR_FREE_CMA_PAGES) > + zone_page_state(zone, NR_FREE_PAGES) / 2) { + page = __rmqueue_cma_fallback(zone, order); + if (page) + goto out; + } } -#endif retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { @@ -2886,8 +2886,9 @@ retry: alloc_flags)) goto retry; } - - trace_mm_page_alloc_zone_locked(page, order, migratetype); +out: + if (page) + trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; } -- cgit 1.4.1 From 7ea510b92c7c9b4eb5ff72e6b4bbad4b0407a914 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 12 Jan 2021 15:49:11 -0800 Subject: mm/memcontrol: fix warning in mem_cgroup_page_lruvec() Boot a CONFIG_MEMCG=y kernel with "cgroup_disabled=memory" and you are met by a series of warnings from the VM_WARN_ON_ONCE_PAGE(!memcg, page) recently added to the inline mem_cgroup_page_lruvec(). An earlier attempt to place that warning, in mem_cgroup_lruvec(), had been careful to do so after weeding out the mem_cgroup_disabled() case; but was itself invalid because of the mem_cgroup_lruvec(NULL, pgdat) in clear_pgdat_congested() and age_active_anon(). Warning in mem_cgroup_page_lruvec() was once useful in detecting a KSM charge bug, so may be worth keeping: but skip if mem_cgroup_disabled(). Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101032056260.1093@eggly.anvils Fixes: 9a1ac2288cf1 ("mm/memcontrol:rewrite mem_cgroup_page_lruvec()") Signed-off-by: Hugh Dickins Reviewed-by: Alex Shi Acked-by: Roman Gushchin Acked-by: Chris Down Reviewed-by: Baoquan He Acked-by: Vlastimil Babka Cc: Hui Su Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Johannes Weiner Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d827bd7f3bfe..eeb0b52203e9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -665,7 +665,7 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, { struct mem_cgroup *memcg = page_memcg(page); - VM_WARN_ON_ONCE_PAGE(!memcg, page); + VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); return mem_cgroup_lruvec(memcg, pgdat); } -- cgit 1.4.1 From 29970dc24faf0078beb4efab5455b4f504d2198d Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Tue, 12 Jan 2021 15:49:14 -0800 Subject: arm/kasan: fix the array size of kasan_early_shadow_pte[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of kasan_early_shadow_pte[] now is PTRS_PER_PTE which defined to 512 for arm. This means that it only covers the prev Linux pte entries, but not the HWTABLE pte entries for arm. The reason it currently works is that the symbol kasan_early_shadow_page immediately following kasan_early_shadow_pte in memory is page aligned, which makes kasan_early_shadow_pte look like a 4KB size array. But we can't ensure the order is always right with different compiler/linker, or if more bss symbols are introduced. We had a test with QEMU + vexpress:put a 512KB-size symbol with attribute __section(".bss..page_aligned") after kasan_early_shadow_pte, and poisoned it after kasan_early_init(). Then enabled CONFIG_KASAN, it failed to boot up. Link: https://lkml.kernel.org/r/20210109044622.8312-1-hailongliiu@yeah.net Signed-off-by: Hailong Liu Signed-off-by: Ziliang Guo Reviewed-by: Linus Walleij Cc: Andrey Ryabinin Cc: Russell King Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ard Biesheuvel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 +++++- mm/kasan/init.c | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5e0655fb2a6f..fe1ae73ff8b5 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -35,8 +35,12 @@ struct kunit_kasan_expectation { #define KASAN_SHADOW_INIT 0 #endif +#ifndef PTE_HWTABLE_PTRS +#define PTE_HWTABLE_PTRS 0 +#endif + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; -extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; +extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; diff --git a/mm/kasan/init.c b/mm/kasan/init.c index bc0ad208b3a7..7ca0b92d5886 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -64,7 +64,8 @@ static inline bool kasan_pmd_table(pud_t pud) return false; } #endif -pte_t kasan_early_shadow_pte[PTRS_PER_PTE] __page_aligned_bss; +pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] + __page_aligned_bss; static inline bool kasan_pte_table(pmd_t pmd) { -- cgit 1.4.1 From c22ee5284cf58017fa8c6d21d8f8c68159b6faab Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jan 2021 15:49:18 -0800 Subject: mm/vmalloc.c: fix potential memory leak In VM_MAP_PUT_PAGES case, we should put pages and free array in vfree. But we missed to set area->nr_pages in vmap(). So we would fail to put pages in __vunmap() because area->nr_pages = 0. Link: https://lkml.kernel.org/r/20210107123541.39206-1-linmiaohe@huawei.com Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap") Signed-off-by: Shijie Luo Signed-off-by: Miaohe Lin Reviewed-by: Uladzislau Rezki (Sony) Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4d88fe5a277a..e6f352bf0498 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2420,8 +2420,10 @@ void *vmap(struct page **pages, unsigned int count, return NULL; } - if (flags & VM_MAP_PUT_PAGES) + if (flags & VM_MAP_PUT_PAGES) { area->pages = pages; + area->nr_pages = count; + } return area->addr; } EXPORT_SYMBOL(vmap); -- cgit 1.4.1 From f555befd185dc097ede887eb7b308c2e1c1369d4 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Tue, 12 Jan 2021 15:49:21 -0800 Subject: mm: migrate: initialize err in do_migrate_pages After commit 236c32eb1096 ("mm: migrate: clean up migrate_prep{_local}")', do_migrate_pages can return uninitialized variable 'err' (which is propagated to user-space as error) when 'from' and 'to' nodesets are identical. This can be reproduced with LTP migrate_pages01, which calls migrate_pages() with same set for both old/new_nodes. Add 'err' initialization back. Link: https://lkml.kernel.org/r/456a021c7ef3636d7668cec9dcb4a446a4244812.1609855564.git.jstancek@redhat.com Fixes: 236c32eb1096 ("mm: migrate: clean up migrate_prep{_local}") Signed-off-by: Jan Stancek Acked-by: Michal Hocko Acked-by: Yang Shi Cc: Zi Yan Cc: Jan Kara Cc: Matthew Wilcox Cc: Mel Gorman Cc: Song Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8cf96bd21341..2c3a86502053 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1111,7 +1111,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { int busy = 0; - int err; + int err = 0; nodemask_t tmp; migrate_prep(); -- cgit 1.4.1 From 0eb98f1588c2cc7a79816d84ab18a55d254f481c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jan 2021 15:49:24 -0800 Subject: mm/hugetlb: fix potential missing huge page size info The huge page size is encoded for VM_FAULT_HWPOISON errors only. So if we return VM_FAULT_HWPOISON, huge page size would just be ignored. Link: https://lkml.kernel.org/r/20210107123449.38481-1-linmiaohe@huawei.com Fixes: aa50d3a7aa81 ("Encode huge page size for VM_FAULT_HWPOISON errors") Signed-off-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a2602969873d..18f6ee317900 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4371,7 +4371,7 @@ retry: * So we need to block hugepage fault by PG_hwpoison bit check. */ if (unlikely(PageHWPoison(page))) { - ret = VM_FAULT_HWPOISON | + ret = VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); goto backout_unlocked; } -- cgit 1.4.1 From 7e5f1126b54a29c078c07a5fe245e269f3c05500 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 12 Jan 2021 15:49:27 -0800 Subject: MAINTAINERS: add Vlastimil as slab allocators maintainer I would like to help with slab allocators maintenance, from the perspective of being responsible for SLAB and more recently also SLUB in an enterprise distro kernel and supporting its users. Recently I've been focusing on improving SLUB's debugging features, and patch review in the area, including the kmemcg accounting rewrite last year. Link: https://lkml.kernel.org/r/20210108110353.19971-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Christoph Lameter Acked-by: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index cc1e6a5ee6e6..28cbe0ec6610 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16319,6 +16319,7 @@ M: Pekka Enberg M: David Rientjes M: Joonsoo Kim M: Andrew Morton +M: Vlastimil Babka L: linux-mm@kvack.org S: Maintained F: include/linux/sl?b*.h -- cgit 1.4.1 From 6696d2a6f38c0beedf03c381edfc392ecf7631b4 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 12 Jan 2021 15:49:30 -0800 Subject: mm,hwpoison: fix printing of page flags Format %pG expects a lower case 'p' in order to print the flags. Fix it. Link: https://lkml.kernel.org/r/20210108085202.4506-1-osalvador@suse.de Fixes: 8295d535e2aa ("mm,hwpoison: refactor get_any_page") Signed-off-by: Oscar Salvador Reported-by: Dan Carpenter Reviewed-by: Anshuman Khandual Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5a38e9eade94..04d9f154a130 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1940,7 +1940,7 @@ retry: goto retry; } } else if (ret == -EIO) { - pr_info("%s: %#lx: unknown page type: %lx (%pGP)\n", + pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n", __func__, pfn, page->flags, &page->flags); } -- cgit 1.4.1 From eb351d75ce1e75b4f793d609efac08426ca50acd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 12 Jan 2021 15:49:33 -0800 Subject: mm/process_vm_access.c: include compat.h Fix the build error: mm/process_vm_access.c:277:5: error: implicit declaration of function 'in_compat_syscall'; did you mean 'in_ia32_syscall'? [-Werror=implicit-function-declaration] Fixes: 38dc5079da7081e "Fix compat regression in process_vm_rw()" Reported-by: syzbot+5b0d0de84d6c65b8dd2b@syzkaller.appspotmail.com Cc: Kyle Huey Cc: Jens Axboe Cc: Al Viro Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/process_vm_access.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 4bcc11958089..f5fee9cf90f8 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include -- cgit 1.4.1 From a18caa97b1bda0a3d126a7be165ddcfc56c2dde6 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 11 Jan 2021 09:59:32 +0100 Subject: net: phy: smsc: fix clk error handling Commit bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support") added the phy clk support. The commit already checks if clk_get_optional() throw an error but instead of returning the error it ignores it. Fixes: bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support") Suggested-by: Jakub Kicinski Signed-off-by: Marco Felsch Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210111085932.28680-1-m.felsch@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 33372756a451..ddb78fb4d6dc 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -317,7 +317,8 @@ static int smsc_phy_probe(struct phy_device *phydev) /* Make clk optional to keep DTB backward compatibility. */ priv->refclk = clk_get_optional(dev, NULL); if (IS_ERR(priv->refclk)) - dev_err_probe(dev, PTR_ERR(priv->refclk), "Failed to request clock\n"); + return dev_err_probe(dev, PTR_ERR(priv->refclk), + "Failed to request clock\n"); ret = clk_prepare_enable(priv->refclk); if (ret) -- cgit 1.4.1 From 07b90056cb15ff9877dca0d8f1b6583d1051f724 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Jan 2021 01:09:43 +0200 Subject: net: dsa: unbind all switches from tree when DSA master unbinds Currently the following happens when a DSA master driver unbinds while there are DSA switches attached to it: $ echo 0000:00:00.5 > /sys/bus/pci/drivers/mscc_felix/unbind ------------[ cut here ]------------ WARNING: CPU: 0 PID: 392 at net/core/dev.c:9507 Call trace: rollback_registered_many+0x5fc/0x688 unregister_netdevice_queue+0x98/0x120 dsa_slave_destroy+0x4c/0x88 dsa_port_teardown.part.16+0x78/0xb0 dsa_tree_teardown_switches+0x58/0xc0 dsa_unregister_switch+0x104/0x1b8 felix_pci_remove+0x24/0x48 pci_device_remove+0x48/0xf0 device_release_driver_internal+0x118/0x1e8 device_driver_detach+0x28/0x38 unbind_store+0xd0/0x100 Located at the above location is this WARN_ON: /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); Other stacked interfaces, like VLAN, do indeed listen for NETDEV_UNREGISTER on the real_dev and also unregister themselves at that time, which is clearly the behavior that rollback_registered_many expects. But DSA interfaces are not VLAN. They have backing hardware (platform devices, PCI devices, MDIO, SPI etc) which have a life cycle of their own and we can't just trigger an unregister from the DSA framework when we receive a netdev notifier that the master unregisters. Luckily, there is something we can do, and that is to inform the driver core that we have a runtime dependency to the DSA master interface's device, and create a device link where that is the supplier and we are the consumer. Having this device link will make the DSA switch unbind before the DSA master unbinds, which is enough to avoid the WARN_ON from rollback_registered_many. Note that even before the blamed commit, DSA did nothing intelligent when the master interface got unregistered either. See the discussion here: https://lore.kernel.org/netdev/20200505210253.20311-1-f.fainelli@gmail.com/ But this time, at least the WARN_ON is loud enough that the upper_dev_link commit can be blamed. The advantage with this approach vs dev_hold(master) in the attached link is that the latter is not meant for long term reference counting. With dev_hold, the only thing that will happen is that when the user attempts an unbind of the DSA master, netdev_wait_allrefs will keep waiting and waiting, due to DSA keeping the refcount forever. DSA would not access freed memory corresponding to the master interface, but the unbind would still result in a freeze. Whereas with device links, graceful teardown is ensured. It even works with cascaded DSA trees. $ echo 0000:00:00.2 > /sys/bus/pci/drivers/fsl_enetc/unbind [ 1818.797546] device swp0 left promiscuous mode [ 1819.301112] sja1105 spi2.0: Link is Down [ 1819.307981] DSA: tree 1 torn down [ 1819.312408] device eno2 left promiscuous mode [ 1819.656803] mscc_felix 0000:00:00.5: Link is Down [ 1819.667194] DSA: tree 0 torn down [ 1819.711557] fsl_enetc 0000:00:00.2 eno2: Link is Down This approach allows us to keep the DSA framework absolutely unchanged, and the driver core will just know to unbind us first when the master goes away - as opposed to the large (and probably impossible) rework required if attempting to listen for NETDEV_UNREGISTER. As per the documentation at Documentation/driver-api/device_link.rst, specifying the DL_FLAG_AUTOREMOVE_CONSUMER flag causes the device link to be automatically purged when the consumer fails to probe or later unbinds. So we don't need to keep the consumer_link variable in struct dsa_switch. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20210111230943.3701806-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/master.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/dsa/master.c b/net/dsa/master.c index 5a0f6fec4271..cb3a5cf99b25 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -309,8 +309,18 @@ static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead; + struct dsa_switch *ds = cpu_dp->ds; + struct device_link *consumer_link; int ret; + /* The DSA master must use SET_NETDEV_DEV for this to work. */ + consumer_link = device_link_add(ds->dev, dev->dev.parent, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!consumer_link) + netdev_err(dev, + "Failed to create a device link to DSA switch %s\n", + dev_name(ds->dev)); + rtnl_lock(); ret = dev_set_mtu(dev, mtu); rtnl_unlock(); -- cgit 1.4.1 From 91158e1680b164c8d101144ca916a3dca10c3e17 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Jan 2021 02:48:31 +0200 Subject: net: dsa: clear devlink port type before unregistering slave netdevs Florian reported a use-after-free bug in devlink_nl_port_fill found with KASAN: (devlink_nl_port_fill) (devlink_port_notify) (devlink_port_unregister) (dsa_switch_teardown.part.3) (dsa_tree_teardown_switches) (dsa_unregister_switch) (bcm_sf2_sw_remove) (platform_remove) (device_release_driver_internal) (device_links_unbind_consumers) (device_release_driver_internal) (device_driver_detach) (unbind_store) Allocated by task 31: alloc_netdev_mqs+0x5c/0x50c dsa_slave_create+0x110/0x9c8 dsa_register_switch+0xdb0/0x13a4 b53_switch_register+0x47c/0x6dc bcm_sf2_sw_probe+0xaa4/0xc98 platform_probe+0x90/0xf4 really_probe+0x184/0x728 driver_probe_device+0xa4/0x278 __device_attach_driver+0xe8/0x148 bus_for_each_drv+0x108/0x158 Freed by task 249: free_netdev+0x170/0x194 dsa_slave_destroy+0xac/0xb0 dsa_port_teardown.part.2+0xa0/0xb4 dsa_tree_teardown_switches+0x50/0xc4 dsa_unregister_switch+0x124/0x250 bcm_sf2_sw_remove+0x98/0x13c platform_remove+0x44/0x5c device_release_driver_internal+0x150/0x254 device_links_unbind_consumers+0xf8/0x12c device_release_driver_internal+0x84/0x254 device_driver_detach+0x30/0x34 unbind_store+0x90/0x134 What happens is that devlink_port_unregister emits a netlink DEVLINK_CMD_PORT_DEL message which associates the devlink port that is getting unregistered with the ifindex of its corresponding net_device. Only trouble is, the net_device has already been unregistered. It looks like we can stub out the search for a corresponding net_device if we clear the devlink_port's type. This looks like a bit of a hack, but also seems to be the reason why the devlink_port_type_clear function exists in the first place. Fixes: 3122433eb533 ("net: dsa: Register devlink ports before calling DSA driver setup()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Florian fainelli Reported-by: Florian Fainelli Link: https://lore.kernel.org/r/20210112004831.3778323-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 183003e45762..a47e0f9b20d0 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -353,9 +353,13 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { + struct devlink_port *dlp = &dp->devlink_port; + if (!dp->setup) return; + devlink_port_type_clear(dlp); + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: break; -- cgit 1.4.1 From cb82a54904a99df9e8f9e9d282046055dae5a730 Mon Sep 17 00:00:00 2001 From: Leon Schuermann Date: Mon, 11 Jan 2021 20:03:13 +0100 Subject: r8152: Add Lenovo Powered USB-C Travel Hub This USB-C Hub (17ef:721e) based on the Realtek RTL8153B chip used to use the cdc_ether driver. However, using this driver, with the system suspended the device constantly sends pause-frames as soon as the receive buffer fills up. This causes issues with other devices, where some Ethernet switches stop forwarding packets altogether. Using the Realtek driver (r8152) fixes this issue. Pause frames are no longer sent while the host system is suspended. Signed-off-by: Leon Schuermann Tested-by: Leon Schuermann Link: https://lore.kernel.org/r/20210111190312.12589-2-leon@is.currently.online Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8c1d61c2cbac..6aaa0675c28a 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -793,6 +793,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x721e, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c448d6089821..67cd6986634f 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6877,6 +6877,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x721e)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, -- cgit 1.4.1 From 2284bbd0cf3981462dc6d729c89851c66b05a66a Mon Sep 17 00:00:00 2001 From: Leon Schuermann Date: Mon, 11 Jan 2021 20:03:15 +0100 Subject: r8153_ecm: Add Lenovo Powered USB-C Hub as a fallback of r8152 This commit enables the use of the r8153_ecm driver, introduced with commit c1aedf015ebdd0 ("net/usb/r8153_ecm: support ECM mode for RTL8153") for the Lenovo Powered USB-C Hub (17ef:721e) based on the Realtek RTL8153B chip. This results in the following driver preference: - if r8152 is available, use the r8152 driver - if r8152 is not available, use the r8153_ecm driver This is done to prevent the NIC from constantly sending pause frames when the host system enters standby (fixed by using the r8152 driver in "r8152: Add Lenovo Powered USB-C Travel Hub"), while still allowing the device to work with the r8153_ecm driver as a fallback. Signed-off-by: Leon Schuermann Tested-by: Leon Schuermann Link: https://lore.kernel.org/r/20210111190312.12589-3-leon@is.currently.online Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8153_ecm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/r8153_ecm.c b/drivers/net/usb/r8153_ecm.c index 2c3fabd38b16..20b2df8d74ae 100644 --- a/drivers/net/usb/r8153_ecm.c +++ b/drivers/net/usb/r8153_ecm.c @@ -122,12 +122,20 @@ static const struct driver_info r8153_info = { }; static const struct usb_device_id products[] = { +/* Realtek RTL8153 Based USB 3.0 Ethernet Adapters */ { USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_REALTEK, 0x8153, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&r8153_info, }, +/* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_LENOVO, 0x721e, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&r8153_info, +}, + { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); -- cgit 1.4.1 From 869c4d5eb1e6fbda66aa790c48bdb946d71494a0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 11 Jan 2021 04:26:39 -0500 Subject: bnxt_en: Improve stats context resource accounting with RDMA driver loaded. The function bnxt_get_ulp_stat_ctxs() does not count the stats contexts used by the RDMA driver correctly when the RDMA driver is freeing the MSIX vectors. It assumes that if the RDMA driver is registered, the additional stats contexts will be needed. This is not true when the RDMA driver is about to unregister and frees the MSIX vectors. This slight error leads to over accouting of the stats contexts needed after the RDMA driver has unloaded. This will cause some firmware warning and error messages in dmesg during subsequent config. changes or ifdown/ifup. Fix it by properly accouting for extra stats contexts only if the RDMA driver is registered and MSIX vectors have been successfully requested. Fixes: c027c6b4e91f ("bnxt_en: get rid of num_stat_ctxs variable") Reviewed-by: Yongping Zhang Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 8c8368c2f335..64dbbb04b043 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -222,8 +222,12 @@ int bnxt_get_ulp_msix_base(struct bnxt *bp) int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) { - if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) - return BNXT_MIN_ROCE_STAT_CTXS; + if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) { + struct bnxt_en_dev *edev = bp->edev; + + if (edev->ulp_tbl[BNXT_ROCE_ULP].msix_requested) + return BNXT_MIN_ROCE_STAT_CTXS; + } return 0; } -- cgit 1.4.1 From 687487751814a493fba953efb9b1542b2f90614c Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 11 Jan 2021 04:26:40 -0500 Subject: bnxt_en: Clear DEFRAG flag in firmware message when retry flashing. When the FW tells the driver to retry the INSTALL_UPDATE command after it has cleared the NVM area, the driver is not clearing the previously used ALLOWED_TO_DEFRAG flag. As a result the FW tries to defrag the NVM area a second time in a loop and can fail the request. Fixes: 1432c3f6a6ca ("bnxt_en: Retry installing FW package under NO_SPACE error condition.") Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9ff79d5d14c4..2f8b193a772d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2532,7 +2532,7 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware if (rc && ((struct hwrm_err_output *)&resp)->cmd_err == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { - install.flags |= + install.flags = cpu_to_le16(NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); rc = _hwrm_send_message_silent(bp, &install, @@ -2546,6 +2546,7 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware * UPDATE directory and try the flash again */ defrag_attempted = true; + install.flags = 0; rc = __bnxt_flash_nvram(bp->dev, BNX_DIR_TYPE_UPDATE, BNX_DIR_ORDINAL_FIRST, -- cgit 1.4.1 From 20bc80b6f582ad1151c52ca09ab66b472768c9c8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 12 Jan 2021 18:25:23 +0100 Subject: mptcp: more strict state checking for acks Syzkaller found a way to trigger division by zero in mptcp_subflow_cleanup_rbuf(). The current checks implemented into tcp_can_send_ack() are too week, let's be more accurate. Reported-by: Christoph Paasch Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling") Fixes: fd8976790a6c ("mptcp: be careful on MPTCP-level ack.") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6628d8d74203..2ff8c7caf74f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -427,7 +427,7 @@ static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) static bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & - (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE)); + (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); } static void mptcp_send_ack(struct mptcp_sock *msk) -- cgit 1.4.1 From 76e2a55d16259b51116767b28b19d759bff43f72 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 12 Jan 2021 18:25:24 +0100 Subject: mptcp: better msk-level shutdown. Instead of re-implementing most of inet_shutdown, re-use such helper, and implement the MPTCP-specific bits at the 'proto' level. The msk-level disconnect() can now be invoked, lets provide a suitable implementation. As a side effect, this fixes bad state management for listener sockets. The latter could lead to division by 0 oops since commit ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling"). Fixes: 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine") Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 62 ++++++++++++++-------------------------------------- 1 file changed, 17 insertions(+), 45 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2ff8c7caf74f..81faeff8f3bb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2642,11 +2642,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - /* Should never be called. - * inet_stream_connect() calls ->disconnect, but that - * refers to the subflow socket, not the mptcp one. - */ - WARN_ON_ONCE(1); + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_flush_join_list(msk); + mptcp_for_each_subflow(msk, subflow) + tcp_disconnect(mptcp_subflow_tcp_sock(subflow), flags); return 0; } @@ -3089,6 +3090,14 @@ bool mptcp_finish_join(struct sock *ssk) return true; } +static void mptcp_shutdown(struct sock *sk, int how) +{ + pr_debug("sk=%p, how=%d", sk, how); + + if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) + __mptcp_wr_shutdown(sk); +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -3098,7 +3107,7 @@ static struct proto mptcp_prot = { .accept = mptcp_accept, .setsockopt = mptcp_setsockopt, .getsockopt = mptcp_getsockopt, - .shutdown = tcp_shutdown, + .shutdown = mptcp_shutdown, .destroy = mptcp_destroy, .sendmsg = mptcp_sendmsg, .recvmsg = mptcp_recvmsg, @@ -3344,43 +3353,6 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } -static int mptcp_shutdown(struct socket *sock, int how) -{ - struct mptcp_sock *msk = mptcp_sk(sock->sk); - struct sock *sk = sock->sk; - int ret = 0; - - pr_debug("sk=%p, how=%d", msk, how); - - lock_sock(sk); - - how++; - if ((how & ~SHUTDOWN_MASK) || !how) { - ret = -EINVAL; - goto out_unlock; - } - - if (sock->state == SS_CONNECTING) { - if ((1 << sk->sk_state) & - (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) - sock->state = SS_DISCONNECTING; - else - sock->state = SS_CONNECTED; - } - - sk->sk_shutdown |= how; - if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) - __mptcp_wr_shutdown(sk); - - /* Wake up anyone sleeping in poll. */ - sk->sk_state_change(sk); - -out_unlock: - release_sock(sk); - - return ret; -} - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -3394,7 +3366,7 @@ static const struct proto_ops mptcp_stream_ops = { .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = mptcp_listen, - .shutdown = mptcp_shutdown, + .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, @@ -3444,7 +3416,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, .listen = mptcp_listen, - .shutdown = mptcp_shutdown, + .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet6_sendmsg, -- cgit 1.4.1 From 7cd1af107a92eb63b93a96dc07406dcbc5269436 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Dec 2020 16:20:51 -0800 Subject: riscv: Trace irq on only interrupt is enabled We should call irq trace only if interrupt is going to be enabled during excecption handling. Otherwise, it results in following warning during boot with lock debugging enabled. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled) [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:4085 lockdep_hardirqs_on_prepare+0x22a/0x22e [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-00022-ge20097fb37e2-dirty #548 [ 0.000000] epc: c005d5d4 ra : c005d5d4 sp : c1c01e80 [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : 00000000 [ 0.000000] t1 : ffffffff t2 : 00000000 s0 : c1c01ea0 [ 0.000000] s1 : c100f360 a0 : 0000002d a1 : c00666ee [ 0.000000] a2 : 00000000 a3 : 00000000 a4 : 00000000 [ 0.000000] a5 : 00000000 a6 : c1c6b390 a7 : 3ffff00e [ 0.000000] s2 : c2384fe8 s3 : 00000000 s4 : 00000001 [ 0.000000] s5 : c1c0a980 s6 : c1d48000 s7 : c1613b4c [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 [ 0.000000] s11: 00000000 t3 : 00000000 t4 : 00000000 [ 0.000000] t5 : 00000001 t6 : 00000000 Fixes: 3c4697982982 ("riscv:Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT") Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 48de316c68c1..744f3209c48d 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -124,15 +124,15 @@ skip_context_tracking: REG_L a1, (a1) jr a1 1: -#ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on -#endif /* * Exceptions run with interrupts enabled or disabled depending on the * state of SR_PIE in m/sstatus. */ andi t0, s1, SR_PIE beqz t0, 1f +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_on +#endif csrs CSR_STATUS, SR_IE 1: -- cgit 1.4.1 From 25fe2c9c4cd2e97c5f5b69f3aefe69aad3057936 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Jan 2021 17:21:21 +0100 Subject: smc: fix out of bound access in smc_nl_get_sys_info() smc_clc_get_hostname() sets the host pointer to a buffer which is not NULL-terminated (see smc_clc_init()). Reported-by: syzbot+f4708c391121cfc58396@syzkaller.appspotmail.com Fixes: 099b990bd11a ("net/smc: Add support for obtaining system information") Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 59342b519e34..8d866b4ed8f6 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -246,7 +246,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) goto errattr; smc_clc_get_hostname(&host); if (host) { - snprintf(hostname, sizeof(hostname), "%s", host); + memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); + hostname[SMC_MAX_HOSTNAME_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname)) goto errattr; } -- cgit 1.4.1 From 8a4465368964b4fbaf084760c94c7aabf61059fb Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Tue, 12 Jan 2021 17:21:22 +0100 Subject: net/smc: use memcpy instead of snprintf to avoid out of bounds read Using snprintf() to convert not null-terminated strings to null terminated strings may cause out of bounds read in the source string. Therefore use memcpy() and terminate the target string with a null afterwards. Fixes: a3db10efcc4c ("net/smc: Add support for obtaining SMCR device list") Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 17 +++++++++++------ net/smc/smc_ib.c | 6 +++--- net/smc/smc_ism.c | 3 ++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8d866b4ed8f6..0df85a12651e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -258,7 +258,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) smc_ism_get_system_eid(smcd_dev, &seid); mutex_unlock(&smcd_dev_list.mutex); if (seid && smc_ism_is_v2_capable()) { - snprintf(smc_seid, sizeof(smc_seid), "%s", seid); + memcpy(smc_seid, seid, SMC_MAX_EID_LEN); + smc_seid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid)) goto errattr; } @@ -296,7 +297,8 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) goto errattr; - snprintf(smc_target, sizeof(smc_target), "%s", lgr->pnet_id); + memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); + smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) goto errattr; @@ -313,7 +315,7 @@ static int smc_nl_fill_lgr_link(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + char smc_ibname[IB_DEVICE_NAME_MAX]; u8 smc_gid_target[41]; struct nlattr *attrs; u32 link_uid = 0; @@ -462,7 +464,8 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd))) goto errattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", lgr->smcd->pnetid); + memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) goto errattr; @@ -475,10 +478,12 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, goto errv2attr; if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) goto errv2attr; - snprintf(smc_host, sizeof(smc_host), "%s", lgr->peer_hostname); + memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); + smc_host[SMC_MAX_HOSTNAME_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) goto errv2attr; - snprintf(smc_eid, sizeof(smc_eid), "%s", lgr->negotiated_eid); + memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); + smc_eid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) goto errv2attr; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index ddd7fac98b1d..7d7ba0320d5a 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -371,8 +371,8 @@ static int smc_nl_handle_dev_port(struct sk_buff *skb, if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcibdev->pnetid_by_user[port])) goto errattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", - (char *)&smcibdev->pnetid[port]); + memcpy(smc_pnet, &smcibdev->pnetid[port], SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) goto errattr; if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV, @@ -414,7 +414,7 @@ static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + char smc_ibname[IB_DEVICE_NAME_MAX]; struct smc_pci_dev smc_pci_dev; struct pci_dev *pci_dev; unsigned char is_crit; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 524ef64a191a..9c6e95882553 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -250,7 +250,8 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd, goto errattr; if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcd->pnetid_by_user)) goto errportattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", smcd->pnetid); + memcpy(smc_pnet, smcd->pnetid, SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) goto errportattr; -- cgit 1.4.1 From 80709af7325d179b433817f421c85449f2454046 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 23 Dec 2020 00:01:52 +0800 Subject: riscv: cacheinfo: Fix using smp_processor_id() in preemptible Use raw_smp_processor_id instead of smp_processor_id() to fix warning, BUG: using smp_processor_id() in preemptible [00000000] code: init/1 caller is debug_smp_processor_id+0x1c/0x26 CPU: 0 PID: 1 Comm: init Not tainted 5.10.0-rc4 #211 Call Trace: walk_stackframe+0x0/0xaa show_stack+0x32/0x3e dump_stack+0x76/0x90 check_preemption_disabled+0xaa/0xac debug_smp_processor_id+0x1c/0x26 get_cache_size+0x18/0x68 load_elf_binary+0x868/0xece bprm_execve+0x224/0x498 kernel_execve+0xdc/0x142 run_init_process+0x90/0x9e try_to_run_init_process+0x12/0x3c kernel_init+0xb4/0xf8 ret_from_exception+0x0/0xc The issue is found when CONFIG_DEBUG_PREEMPT enabled. Reviewed-by: Atish Patra Tested-by: Atish Patra Signed-off-by: Kefeng Wang [Palmer: Added a comment.] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cacheinfo.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index de59dd457b41..d86781357044 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -26,7 +26,16 @@ cache_get_priv_group(struct cacheinfo *this_leaf) static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id()); + /* + * Using raw_smp_processor_id() elides a preemptability check, but this + * is really indicative of a larger problem: the cacheinfo UABI assumes + * that cores have a homonogenous view of the cache hierarchy. That + * happens to be the case for the current set of RISC-V systems, but + * likely won't be true in general. Since there's no way to provide + * correct information for these systems via the current UABI we're + * just eliding the check for now. + */ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id()); struct cacheinfo *this_leaf; int index; -- cgit 1.4.1 From 0aa2ec8a475fb505fd98d93bbcf4e03beeeebcb6 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 2 Jan 2021 13:24:34 +0000 Subject: riscv: Fixup CONFIG_GENERIC_TIME_VSYSCALL The patch fix commit: ad5d112 ("riscv: use vDSO common flow to reduce the latency of the time-related functions"). The GENERIC_TIME_VSYSCALL should be CONFIG_GENERIC_TIME_VSYSCALL or vgettimeofday won't work. Signed-off-by: Guo Ren Reviewed-by: Pekka Enberg Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso.h | 2 +- arch/riscv/kernel/vdso.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 8454f746bbfd..1453a2f563bc 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,7 +10,7 @@ #include -#ifndef GENERIC_TIME_VSYSCALL +#ifndef CONFIG_GENERIC_TIME_VSYSCALL struct vdso_data { }; #endif diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 678204231700..3f1d35e7c98a 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -12,7 +12,7 @@ #include #include #include -#ifdef GENERIC_TIME_VSYSCALL +#ifdef CONFIG_GENERIC_TIME_VSYSCALL #include #else #include -- cgit 1.4.1 From 69e976831cd53f9ba304fd20305b2025ecc78eab Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 10 Jan 2021 14:21:05 +0000 Subject: MIPS: relocatable: fix possible boot hangup with KASLR enabled LLVM-built Linux triggered a boot hangup with KASLR enabled. arch/mips/kernel/relocate.c:get_random_boot() uses linux_banner, which is a string constant, as a random seed, but accesses it as an array of unsigned long (in rotate_xor()). When the address of linux_banner is not aligned to sizeof(long), such access emits unaligned access exception and hangs the kernel. Use PTR_ALIGN() to align input address to sizeof(long) and also align down the input length to prevent possible access-beyond-end. Fixes: 405bc8fd12f5 ("MIPS: Kernel: Implement KASLR using CONFIG_RELOCATABLE") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Alexander Lobakin Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/relocate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 47aeb3350a76..0e365b7c742d 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -187,8 +187,14 @@ static int __init relocate_exception_table(long offset) static inline __init unsigned long rotate_xor(unsigned long hash, const void *area, size_t size) { - size_t i; - unsigned long *ptr = (unsigned long *)area; + const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash)); + size_t diff, i; + + diff = (void *)ptr - area; + if (unlikely(size < diff + sizeof(hash))) + return hash; + + size = ALIGN_DOWN(size - diff, sizeof(hash)); for (i = 0; i < size / sizeof(hash); i++) { /* Rotate by odd number of bits and XOR. */ -- cgit 1.4.1 From 7b490a8ab0f2d3ab8d838a4ff22ae86edafd34a1 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 11 Jan 2021 05:27:25 -0800 Subject: MIPS: OCTEON: fix unreachable code in octeon_irq_init_ciu The type of 'r' in octeon_irq_init_ciu is 'unsigned int', so 'r < 0' can't be true. Fix this by change the type of 'r' and 'i' from 'unsigned int' to 'int'. As 'i' won't be negative, this change works. Fixes: 99fbc70f8547 ("MIPS: Octeon: irq: Alloc desc before configuring IRQ") Signed-off-by: Menglong Dong Reviewed-by: Alexander Sverdlin Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/octeon-irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index bd47e15d02c7..be5d4afcd30f 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1444,7 +1444,7 @@ static void octeon_irq_setup_secondary_ciu2(void) static int __init octeon_irq_init_ciu( struct device_node *ciu_node, struct device_node *parent) { - unsigned int i, r; + int i, r; struct irq_chip *chip; struct irq_chip *chip_edge; struct irq_chip *chip_mbox; -- cgit 1.4.1 From ef3a575baf53571dc405ee4028e26f50856898e7 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 12 Jan 2021 12:53:58 +0100 Subject: xen/privcmd: allow fetching resource sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow issuing an IOCTL_PRIVCMD_MMAP_RESOURCE ioctl with num = 0 and addr = 0 in order to fetch the size of a specific resource. Add a shortcut to the default map resource path, since fetching the size requires no address to be passed in, and thus no VMA to setup. This is missing from the initial implementation, and causes issues when mapping resources that don't have fixed or known sizes. Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Tested-by: Andrew Cooper Cc: stable@vger.kernel.org # >= 4.18 Link: https://lore.kernel.org/r/20210112115358.23346-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index b0c73c58f987..720a7b7abd46 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -717,14 +717,15 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata) return 0; } -static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) +static long privcmd_ioctl_mmap_resource(struct file *file, + struct privcmd_mmap_resource __user *udata) { struct privcmd_data *data = file->private_data; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct privcmd_mmap_resource kdata; xen_pfn_t *pfns = NULL; - struct xen_mem_acquire_resource xdata; + struct xen_mem_acquire_resource xdata = { }; int rc; if (copy_from_user(&kdata, udata, sizeof(kdata))) @@ -734,6 +735,22 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) if (data->domid != DOMID_INVALID && data->domid != kdata.dom) return -EPERM; + /* Both fields must be set or unset */ + if (!!kdata.addr != !!kdata.num) + return -EINVAL; + + xdata.domid = kdata.dom; + xdata.type = kdata.type; + xdata.id = kdata.id; + + if (!kdata.addr && !kdata.num) { + /* Query the size of the resource. */ + rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); + if (rc) + return rc; + return __put_user(xdata.nr_frames, &udata->num); + } + mmap_write_lock(mm); vma = find_vma(mm, kdata.addr); @@ -768,10 +785,6 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) } else vma->vm_private_data = PRIV_VMA_LOCKED; - memset(&xdata, 0, sizeof(xdata)); - xdata.domid = kdata.dom; - xdata.type = kdata.type; - xdata.id = kdata.id; xdata.frame = kdata.idx; xdata.nr_frames = kdata.num; set_xen_guest_handle(xdata.frame_list, pfns); -- cgit 1.4.1 From df06824767cc9a32fbdb0e3d3b7e169292a5b5fe Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 7 Jan 2021 14:53:10 +0000 Subject: arm64: entry: remove redundant IRQ flag tracing All EL0 returns go via ret_to_user(), which masks IRQs and notifies lockdep and tracing before calling into do_notify_resume(). Therefore, there's no need for do_notify_resume() to call trace_hardirqs_off(), and the comment is stale. The call is simply redundant. In ret_to_user() we call exit_to_user_mode(), which notifies lockdep and tracing the IRQs will be enabled in userspace, so there's no need for el0_svc_common() to call trace_hardirqs_on() before returning. Further, at the start of ret_to_user() we call trace_hardirqs_off(), so not only is this redundant, but it is immediately undone. In addition to being redundant, the trace_hardirqs_on() in el0_svc_common() leaves lockdep inconsistent with the hardware state, and is liable to cause issues for any C code or instrumentation between this and the call to trace_hardirqs_off() which undoes it in ret_to_user(). This patch removes the redundant tracing calls and associated stale comments. Fixes: 23529049c684 ("arm64: entry: fix non-NMI user<->kernel transitions") Signed-off-by: Mark Rutland Acked-by: Will Deacon Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20210107145310.44616-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 7 ------- arch/arm64/kernel/syscall.c | 9 +-------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f71d6ce4673f..6237486ff6bb 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { - /* - * The assembly code enters us with IRQs off, but it hasn't - * informed the tracing code of that for efficiency reasons. - * Update the trace code with the current status. - */ - trace_hardirqs_off(); - do { if (thread_flags & _TIF_NEED_RESCHED) { /* Unmask Debug and SError for the next task */ diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index f61e9d8cc55a..0bfac95fe464 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -165,15 +165,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { - /* - * We're off to userspace, where interrupts are - * always enabled after we restore the flags from - * the SPSR. - */ - trace_hardirqs_on(); + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) return; - } local_daif_restore(DAIF_PROCCTX); } -- cgit 1.4.1 From b90d72a6bfdb5e5c62cd223a8cdf4045bfbcb94d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:18:55 +0000 Subject: Revert "arm64: Enable perf events based hard lockup detector" This reverts commit 367c820ef08082e68df8a3bc12e62393af21e4b5. lockup_detector_init() makes heavy use of per-cpu variables and must be called with preemption disabled. Usually, it's handled early during boot in kernel_init_freeable(), before SMP has been initialised. Since we do not know whether or not our PMU interrupt can be signalled as an NMI until considerably later in the boot process, the Arm PMU driver attempts to re-initialise the lockup detector off the back of a device_initcall(). Unfortunately, this is called from preemptible context and results in the following splat: | BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 | caller is debug_smp_processor_id+0x20/0x2c | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #276 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x3c0 | show_stack+0x20/0x6c | dump_stack+0x2f0/0x42c | check_preemption_disabled+0x1cc/0x1dc | debug_smp_processor_id+0x20/0x2c | hardlockup_detector_event_create+0x34/0x18c | hardlockup_detector_perf_init+0x2c/0x134 | watchdog_nmi_probe+0x18/0x24 | lockup_detector_init+0x44/0xa8 | armv8_pmu_driver_init+0x54/0x78 | do_one_initcall+0x184/0x43c | kernel_init_freeable+0x368/0x380 | kernel_init+0x1c/0x1cc | ret_from_fork+0x10/0x30 Rather than bodge this with raw_smp_processor_id() or randomly disabling preemption, simply revert the culprit for now until we figure out how to do this properly. Reported-by: Lecopzer Chen Signed-off-by: Will Deacon Acked-by: Mark Rutland Cc: Sumit Garg Cc: Alexandru Elisei Link: https://lore.kernel.org/r/20201221162249.3119-1-lecopzer.chen@mediatek.com Link: https://lore.kernel.org/r/20210112221855.10666-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 -- arch/arm64/kernel/perf_event.c | 41 ++--------------------------------------- drivers/perf/arm_pmu.c | 5 ----- include/linux/perf/arm_pmu.h | 2 -- 4 files changed, 2 insertions(+), 48 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 05e17351e4f3..f39568b28ec1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -174,8 +174,6 @@ config ARM64 select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS - select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 38bb07eff872..3605f77ad4df 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -23,8 +23,6 @@ #include #include #include -#include -#include /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { - int ret; - if (acpi_disabled) - ret = platform_driver_register(&armv8_pmu_driver); + return platform_driver_register(&armv8_pmu_driver); else - ret = arm_pmu_acpi_probe(armv8_pmuv3_init); - - /* - * Try to re-initialize lockup detector after PMU init in - * case PMU events are triggered via NMIs. - */ - if (ret == 0 && arm_pmu_irq_is_nmi()) - lockup_detector_init(); - - return ret; + return arm_pmu_acpi_probe(armv8_pmuv3_init); } device_initcall(armv8_pmu_driver_init) @@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; } - -#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF -/* - * Safe maximum CPU frequency in case a particular platform doesn't implement - * cpufreq driver. Although, architecture doesn't put any restrictions on - * maximum frequency but 5 GHz seems to be safe maximum given the available - * Arm CPUs in the market which are clocked much less than 5 GHz. On the other - * hand, we can't make it much higher as it would lead to a large hard-lockup - * detection timeout on parts which are running slower (eg. 1GHz on - * Developerbox) and doesn't possess a cpufreq driver. - */ -#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz -u64 hw_nmi_get_sample_period(int watchdog_thresh) -{ - unsigned int cpu = smp_processor_id(); - unsigned long max_cpu_freq; - - max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; - if (!max_cpu_freq) - max_cpu_freq = SAFE_MAX_CPU_FREQ; - - return (u64)max_cpu_freq * watchdog_thresh; -} -#endif diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 794a37d50853..cb2f55f450e4 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -726,11 +726,6 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) return per_cpu(hw_events->irq, cpu); } -bool arm_pmu_irq_is_nmi(void) -{ - return has_nmi; -} - /* * PMU hardware loses all context when a CPU goes offline. * When a CPU is hotplugged back in, since some hardware registers are diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index bf7966776c55..505480217cf1 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,8 +163,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif -bool arm_pmu_irq_is_nmi(void); - /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- cgit 1.4.1 From 71e70184f1d1314ad56e834d1befc07daa2af8e6 Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Tue, 12 Jan 2021 09:58:13 +0800 Subject: arm64: rename S_FRAME_SIZE to PT_REGS_SIZE S_FRAME_SIZE is the size of the pt_regs structure, no longer the size of the kernel stack frame, the name is misleading. In keeping with arm32, rename S_FRAME_SIZE to PT_REGS_SIZE. Signed-off-by: Jianlin Lv Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210112015813.2340969-1-Jianlin.Lv@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/entry-ftrace.S | 12 ++++++------ arch/arm64/kernel/entry.S | 14 +++++++------- arch/arm64/kernel/probes/kprobes_trampoline.S | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f42fd9e33981..301784463587 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -75,7 +75,7 @@ int main(void) DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); - DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_COMPAT DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index a338f40e64d3..b3e4f9a088b1 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -35,7 +35,7 @@ */ .macro ftrace_regs_entry, allregs=0 /* Make room for pt_regs, plus a callee frame */ - sub sp, sp, #(S_FRAME_SIZE + 16) + sub sp, sp, #(PT_REGS_SIZE + 16) /* Save function arguments (and x9 for simplicity) */ stp x0, x1, [sp, #S_X0] @@ -61,15 +61,15 @@ .endif /* Save the callsite's SP and LR */ - add x10, sp, #(S_FRAME_SIZE + 16) + add x10, sp, #(PT_REGS_SIZE + 16) stp x9, x10, [sp, #S_LR] /* Save the PC after the ftrace callsite */ str x30, [sp, #S_PC] /* Create a frame record for the callsite above pt_regs */ - stp x29, x9, [sp, #S_FRAME_SIZE] - add x29, sp, #S_FRAME_SIZE + stp x29, x9, [sp, #PT_REGS_SIZE] + add x29, sp, #PT_REGS_SIZE /* Create our frame record within pt_regs. */ stp x29, x30, [sp, #S_STACKFRAME] @@ -120,7 +120,7 @@ ftrace_common_return: ldr x9, [sp, #S_PC] /* Restore the callsite's SP */ - add sp, sp, #S_FRAME_SIZE + 16 + add sp, sp, #PT_REGS_SIZE + 16 ret x9 SYM_CODE_END(ftrace_common) @@ -130,7 +130,7 @@ SYM_CODE_START(ftrace_graph_caller) ldr x0, [sp, #S_PC] sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) add x1, sp, #S_LR // parent_ip (callsite's LR) - ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP) + ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP) bl prepare_ftrace_return b ftrace_common_return SYM_CODE_END(ftrace_graph_caller) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a8c3e7aaca74..c9bae73f2621 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -75,7 +75,7 @@ alternative_else_nop_endif .endif #endif - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE #ifdef CONFIG_VMAP_STACK /* * Test whether the SP has overflowed, without corrupting a GPR. @@ -96,7 +96,7 @@ alternative_else_nop_endif * userspace, and can clobber EL0 registers to free up GPRs. */ - /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */ + /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */ msr tpidr_el0, x0 /* Recover the original x0 value and stash it in tpidrro_el0 */ @@ -253,7 +253,7 @@ alternative_else_nop_endif scs_load tsk, x20 .else - add x21, sp, #S_FRAME_SIZE + add x21, sp, #PT_REGS_SIZE get_current_task tsk .endif /* \el == 0 */ mrs x22, elr_el1 @@ -377,7 +377,7 @@ alternative_else_nop_endif ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp + add sp, sp, #PT_REGS_SIZE // restore sp .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 @@ -580,12 +580,12 @@ __bad_stack: /* * Store the original GPRs to the new stack. The orginal SP (minus - * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry. + * PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry. */ - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE kernel_entry 1 mrs x0, tpidr_el0 - add x0, x0, #S_FRAME_SIZE + add x0, x0, #PT_REGS_SIZE str x0, [sp, #S_SP] /* Stash the regs for handle_bad_stack */ diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 890ca72c5a51..288a84e253cc 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -25,7 +25,7 @@ stp x24, x25, [sp, #S_X24] stp x26, x27, [sp, #S_X26] stp x28, x29, [sp, #S_X28] - add x0, sp, #S_FRAME_SIZE + add x0, sp, #PT_REGS_SIZE stp lr, x0, [sp, #S_LR] /* * Construct a useful saved PSTATE @@ -62,7 +62,7 @@ .endm SYM_CODE_START(kretprobe_trampoline) - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE save_all_base_regs @@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline) restore_all_base_regs - add sp, sp, #S_FRAME_SIZE + add sp, sp, #PT_REGS_SIZE ret SYM_CODE_END(kretprobe_trampoline) -- cgit 1.4.1 From c35a824c31834d947fb99b0c608c1b9f922b4ba0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Jan 2021 10:19:56 +0100 Subject: arm64: make atomic helpers __always_inline With UBSAN enabled and building with clang, there are occasionally warnings like WARNING: modpost: vmlinux.o(.text+0xc533ec): Section mismatch in reference from the function arch_atomic64_or() to the variable .init.data:numa_nodes_parsed The function arch_atomic64_or() references the variable __initdata numa_nodes_parsed. This is often because arch_atomic64_or lacks a __initdata annotation or the annotation of numa_nodes_parsed is wrong. for functions that end up not being inlined as intended but operating on __initdata variables. Mark these as __always_inline, along with the corresponding asm-generic wrappers. Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210108092024.4034860-1-arnd@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic.h | 10 +++++----- include/asm-generic/bitops/atomic.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 015ddffaf6ca..b56a4b2bc248 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -17,7 +17,7 @@ #include #define ATOMIC_OP(op) \ -static inline void arch_##op(int i, atomic_t *v) \ +static __always_inline void arch_##op(int i, atomic_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, op) \ -static inline int arch_##op##name(int i, atomic_t *v) \ +static __always_inline int arch_##op##name(int i, atomic_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return) #undef ATOMIC_FETCH_OPS #define ATOMIC64_OP(op) \ -static inline void arch_##op(long i, atomic64_t *v) \ +static __always_inline void arch_##op(long i, atomic64_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, op) \ -static inline long arch_##op##name(long i, atomic64_t *v) \ +static __always_inline long arch_##op##name(long i, atomic64_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return) #undef ATOMIC64_FETCH_OP #undef ATOMIC64_FETCH_OPS -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) { return __lse_ll_sc_body(atomic64_dec_if_positive, v); } diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index dd90c9792909..0e7316a86240 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -11,19 +11,19 @@ * See Documentation/atomic_bitops.txt for details. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void set_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void clear_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void clear_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void change_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void change_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p); -- cgit 1.4.1 From 3499ba8198cad47b731792e5e56b9ec2a78a83a2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Jan 2021 13:26:02 +0000 Subject: xen: Fix event channel callback via INTX/GSI For a while, event channel notification via the PCI platform device has been broken, because we attempt to communicate with xenstore before we even have notifications working, with the xs_reset_watches() call in xs_init(). We tend to get away with this on Xen versions below 4.0 because we avoid calling xs_reset_watches() anyway, because xenstore might not cope with reading a non-existent key. And newer Xen *does* have the vector callback support, so we rarely fall back to INTX/GSI delivery. To fix it, clean up a bit of the mess of xs_init() and xenbus_probe() startup. Call xs_init() directly from xenbus_init() only in the !XS_HVM case, deferring it to be called from xenbus_probe() in the XS_HVM case instead. Then fix up the invocation of xenbus_probe() to happen either from its device_initcall if the callback is available early enough, or when the callback is finally set up. This means that the hack of calling xenbus_probe() from a workqueue after the first interrupt, or directly from the PCI platform device setup, is no longer needed. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210113132606.422794-2-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 2 +- drivers/xen/events/events_base.c | 10 ----- drivers/xen/platform-pci.c | 1 - drivers/xen/xenbus/xenbus.h | 1 + drivers/xen/xenbus/xenbus_comms.c | 8 ---- drivers/xen/xenbus/xenbus_probe.c | 81 ++++++++++++++++++++++++++++++++------- include/xen/xenbus.h | 2 +- 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 60e901cd0de6..5a957a9a0984 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -371,7 +371,7 @@ static int __init xen_guest_init(void) } gnttab_init(); if (!xen_initial_domain()) - xenbus_probe(NULL); + xenbus_probe(); /* * Making sure board specific code will not set up ops for diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6038c4c35db5..bbebe248b726 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -2010,16 +2010,6 @@ static struct irq_chip xen_percpu_chip __read_mostly = { .irq_ack = ack_dynirq, }; -int xen_set_callback_via(uint64_t via) -{ - struct xen_hvm_param a; - a.domid = DOMID_SELF; - a.index = HVM_PARAM_CALLBACK_IRQ; - a.value = via; - return HYPERVISOR_hvm_op(HVMOP_set_param, &a); -} -EXPORT_SYMBOL_GPL(xen_set_callback_via); - #ifdef CONFIG_XEN_PVHVM /* Vector callbacks are better than PCI interrupts to receive event * channel notifications because we can receive vector callbacks on any diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index dd911e1ff782..9db557b76511 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -149,7 +149,6 @@ static int platform_pci_probe(struct pci_dev *pdev, ret = gnttab_init(); if (ret) goto grant_out; - xenbus_probe(NULL); return 0; grant_out: gnttab_free_auto_xlat_frames(); diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 2a93b7c9c159..dc1537335414 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -115,6 +115,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); int xenbus_probe_devices(struct xen_bus_type *bus); +void xenbus_probe(void); void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index eb5151fc8efa..e5fda0256feb 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -57,16 +57,8 @@ DEFINE_MUTEX(xs_response_mutex); static int xenbus_irq; static struct task_struct *xenbus_task; -static DECLARE_WORK(probe_work, xenbus_probe); - - static irqreturn_t wake_waiting(int irq, void *unused) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); - } - wake_up(&xb_waitq); return IRQ_HANDLED; } diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 44634d970a5c..c8f0282bb649 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -683,29 +683,76 @@ void unregister_xenstore_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(struct work_struct *unused) +void xenbus_probe(void) { xenstored_ready = 1; + /* + * In the HVM case, xenbus_init() deferred its call to + * xs_init() in case callbacks were not operational yet. + * So do it now. + */ + if (xen_store_domain_type == XS_HVM) + xs_init(); + /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -EXPORT_SYMBOL_GPL(xenbus_probe); -static int __init xenbus_probe_initcall(void) +/* + * Returns true when XenStore init must be deferred in order to + * allow the PCI platform device to be initialised, before we + * can actually have event channel interrupts working. + */ +static bool xs_hvm_defer_init_for_callback(void) { - if (!xen_domain()) - return -ENODEV; +#ifdef CONFIG_XEN_PVHVM + return xen_store_domain_type == XS_HVM && + !xen_have_vector_callback; +#else + return false; +#endif +} - if (xen_initial_domain() || xen_hvm_domain()) - return 0; +static int __init xenbus_probe_initcall(void) +{ + /* + * Probe XenBus here in the XS_PV case, and also XS_HVM unless we + * need to wait for the platform PCI device to come up. + */ + if (xen_store_domain_type == XS_PV || + (xen_store_domain_type == XS_HVM && + !xs_hvm_defer_init_for_callback())) + xenbus_probe(); - xenbus_probe(NULL); return 0; } - device_initcall(xenbus_probe_initcall); +int xen_set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; + int ret; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + a.value = via; + + ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a); + if (ret) + return ret; + + /* + * If xenbus_probe_initcall() deferred the xenbus_probe() + * due to the callback not functioning yet, we can do it now. + */ + if (!xenstored_ready && xs_hvm_defer_init_for_callback()) + xenbus_probe(); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_set_callback_via); + /* Set up event channel for xenstored which is run as a local process * (this is normally used only in dom0) */ @@ -818,11 +865,17 @@ static int __init xenbus_init(void) break; } - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - pr_warn("Error initializing xenstore comms: %i\n", err); - goto out_error; + /* + * HVM domains may not have a functional callback yet. In that + * case let xs_init() be called from xenbus_probe(), which will + * get invoked at an appropriate time. + */ + if (xen_store_domain_type != XS_HVM) { + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; + } } if ((xen_store_domain_type != XS_LOCAL) && diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 00c7235ae93e..2c43b0ef1e4d 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -192,7 +192,7 @@ void xs_suspend_cancel(void); struct work_struct; -void xenbus_probe(struct work_struct *); +void xenbus_probe(void); #define XENBUS_IS_ERR_READ(str) ({ \ if (!IS_ERR(str) && strlen(str) == 0) { \ -- cgit 1.4.1 From 8f4fd86aa5d6aa122619623910065d236592e37c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:55 +0000 Subject: xen: Set platform PCI device INTX affinity to CPU0 With INTX or GSI delivery, Xen uses the event channel structures of CPU0. If the interrupt gets handled by Linux on a different CPU, then no events are seen as pending. Rather than introducing locking to allow other CPUs to process CPU0's events, just ensure that the PCI interrupts happens only on CPU0. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-3-dwmw2@infradead.org Signed-off-by: Juergen Gross --- drivers/xen/platform-pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 9db557b76511..18f0ed8b1f93 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -132,6 +132,13 @@ static int platform_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); goto out; } + /* + * It doesn't strictly *have* to run on CPU0 but it sure + * as hell better process the event channel ports delivered + * to CPU0. + */ + irq_set_affinity(pdev->irq, cpumask_of(0)); + callback_via = get_callback_via(pdev); ret = xen_set_callback_via(callback_via); if (ret) { -- cgit 1.4.1 From b36b0fe96af13460278bf9b173beced1bd15f85d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:56 +0000 Subject: x86/xen: Add xen_no_vector_callback option to test PCI INTX delivery It's useful to be able to test non-vector event channel delivery, to make sure Linux will work properly on older Xen which doesn't have it. It's also useful for those working on Xen and Xen-compatible hypervisors, because there are guest kernels still in active use which use PCI INTX even when vector delivery is available. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-4-dwmw2@infradead.org Signed-off-by: Juergen Gross --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ arch/x86/xen/enlighten_hvm.c | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 44fde25bb221..035d27b6272c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5964,6 +5964,10 @@ This option is obsoleted by the "nopv" option, which has equivalent effect for XEN platform. + xen_no_vector_callback + [KNL,X86,XEN] Disable the vector callback for Xen + event channel interrupts. + xen_scrub_pages= [XEN] Boolean option to control scrubbing pages before giving them back to Xen, for use by other domains. Can be also changed at runtime diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9e87ab010c82..ec50b7423a4c 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -188,6 +188,8 @@ static int xen_cpu_dead_hvm(unsigned int cpu) return 0; } +static bool no_vector_callback __initdata; + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -207,7 +209,7 @@ static void __init xen_hvm_guest_init(void) xen_panic_handler_init(); - if (xen_feature(XENFEAT_hvm_callback_vector)) + if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; xen_hvm_smp_init(); @@ -233,6 +235,13 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); +static __init int xen_parse_no_vector_callback(char *arg) +{ + no_vector_callback = true; + return 0; +} +early_param("xen_no_vector_callback", xen_parse_no_vector_callback); + bool __init xen_hvm_need_lapic(void) { if (xen_pv_domain()) -- cgit 1.4.1 From 4621dc6a5bf1235249e92231db30c96dfd1a18b9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:57 +0000 Subject: x86/xen: Don't register Xen IPIs when they aren't going to be used In the case where xen_have_vector_callback is false, we still register the IPI vectors in xen_smp_intr_init() for the secondary CPUs even though they aren't going to be used. Stop doing that. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-5-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index ec50b7423a4c..e68ea5f4ad1c 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -164,10 +164,10 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) else per_cpu(xen_vcpu_id, cpu) = cpu; rc = xen_vcpu_setup(cpu); - if (rc) + if (rc || !xen_have_vector_callback) return rc; - if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) + if (xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); -- cgit 1.4.1 From 3d7746bea92530e8695258a3cf3ddec7a135edd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:58 +0000 Subject: x86/xen: Fix xen_hvm_smp_init() when vector callback not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the IPI-related functions in the smp_ops should be conditional on the vector callback being available. The rest should still happen: • xen_hvm_smp_prepare_boot_cpu() This function does two things, both of which should still happen if there is no vector callback support. The call to xen_vcpu_setup() for vCPU0 should still happen as it just sets up the vcpu_info for CPU0. That does happen for the secondary vCPUs too, from xen_cpu_up_prepare_hvm(). The second thing it does is call xen_init_spinlocks(), which perhaps counter-intuitively should *also* still be happening in the case without vector callbacks, so that it can clear its local xen_pvspin flag and disable the virt_spin_lock_key accordingly. Checking xen_have_vector_callback in xen_init_spinlocks() itself would affect PV guests, so set the global nopvspin flag in xen_hvm_smp_init() instead, when vector callbacks aren't available. • xen_hvm_smp_prepare_cpus() This does some IPI-related setup by calling xen_smp_intr_init() and xen_init_lock_cpu(), which can be made conditional. And it sets the xen_vcpu_id to XEN_VCPU_ID_INVALID for all possible CPUS, which does need to happen. • xen_smp_cpus_done() This offlines any vCPUs which doesn't fit in the global shared_info page, if separate vcpu_info placement isn't available. That part also needs to happen regardless of vector callback support. • xen_hvm_cpu_die() This doesn't actually do anything other than commin_cpu_die() right right now in the !vector_callback case; all three teardown functions it calls should be no-ops. But to guard against future regressions it's useful to call it anyway, and for it to explicitly check for xen_have_vector_callback before calling those additional functions. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-6-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/x86/xen/smp_hvm.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f5e7db4f82ab..056430a1080b 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -33,9 +33,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) int cpu; native_smp_prepare_cpus(max_cpus); - WARN_ON(xen_smp_intr_init(0)); - xen_init_lock_cpu(0); + if (xen_have_vector_callback) { + WARN_ON(xen_smp_intr_init(0)); + xen_init_lock_cpu(0); + } for_each_possible_cpu(cpu) { if (cpu == 0) @@ -50,9 +52,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) static void xen_hvm_cpu_die(unsigned int cpu) { if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); + if (xen_have_vector_callback) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + } } } #else @@ -64,14 +68,17 @@ static void xen_hvm_cpu_die(unsigned int cpu) void __init xen_hvm_smp_init(void) { - if (!xen_have_vector_callback) + smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; + smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; + smp_ops.smp_cpus_done = xen_smp_cpus_done; + smp_ops.cpu_die = xen_hvm_cpu_die; + + if (!xen_have_vector_callback) { + nopvspin = true; return; + } - smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; - smp_ops.cpu_die = xen_hvm_cpu_die; smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; - smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; - smp_ops.smp_cpus_done = xen_smp_cpus_done; } -- cgit 1.4.1 From b4411616c26f26c4017b8fa4d3538b1a02028733 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Jan 2021 12:42:24 +0000 Subject: io_uring: fix null-deref in io_disable_sqo_submit general protection fault, probably for non-canonical address 0xdffffc0000000022: 0000 [#1] KASAN: null-ptr-deref in range [0x0000000000000110-0x0000000000000117] RIP: 0010:io_ring_set_wakeup_flag fs/io_uring.c:6929 [inline] RIP: 0010:io_disable_sqo_submit+0xdb/0x130 fs/io_uring.c:8891 Call Trace: io_uring_create fs/io_uring.c:9711 [inline] io_uring_setup+0x12b1/0x38e0 fs/io_uring.c:9739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 io_disable_sqo_submit() might be called before user rings were allocated, don't do io_ring_set_wakeup_flag() in those cases. Reported-by: syzbot+ab412638aeb652ded540@syzkaller.appspotmail.com Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b0e6d8e607a3..66db2c46ab82 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8895,7 +8895,8 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); /* make sure callers enter the ring to get error */ - io_ring_set_wakeup_flag(ctx); + if (ctx->rings) + io_ring_set_wakeup_flag(ctx); } /* -- cgit 1.4.1 From 06585c497b55045ec21aa8128e340f6a6587351c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Jan 2021 12:42:25 +0000 Subject: io_uring: do sqo disable on install_fd error WARNING: CPU: 0 PID: 8494 at fs/io_uring.c:8717 io_ring_ctx_wait_and_kill+0x4f2/0x600 fs/io_uring.c:8717 Call Trace: io_uring_release+0x3e/0x50 fs/io_uring.c:8759 __fput+0x283/0x920 fs/file_table.c:280 task_work_run+0xdd/0x190 kernel/task_work.c:140 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:174 [inline] exit_to_user_mode_prepare+0x249/0x250 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x50 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 failed io_uring_install_fd() is a special case, we don't do io_ring_ctx_wait_and_kill() directly but defer it to fput, though still need to io_disable_sqo_submit() before. note: it doesn't fix any real problem, just a warning. That's because sqring won't be available to the userspace in this case and so SQPOLL won't submit anything. Reported-by: syzbot+9c9c35374c0ecac06516@syzkaller.appspotmail.com Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 66db2c46ab82..372be9caf340 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9708,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, */ ret = io_uring_install_fd(ctx, file); if (ret < 0) { + io_disable_sqo_submit(ctx); /* fput will clean it up */ fput(file); return ret; -- cgit 1.4.1 From d52e419ac8b50c8bef41b398ed13528e75d7ad48 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 Jan 2021 15:23:51 +0000 Subject: rxrpc: Fix handling of an unsupported token type in rxrpc_read() Clang static analysis reports the following: net/rxrpc/key.c:657:11: warning: Assigned value is garbage or undefined toksize = toksizes[tok++]; ^ ~~~~~~~~~~~~~~~ rxrpc_read() contains two consecutive loops. The first loop calculates the token sizes and stores the results in toksizes[] and the second one uses the array. When there is an error in identifying the token in the first loop, the token is skipped, no change is made to the toksizes[] array. When the same error happens in the second loop, the token is not skipped. This will cause the toksizes[] array to be out of step and will overrun past the calculated sizes. Fix this by making both loops log a message and return an error in this case. This should only happen if a new token type is incompletely implemented, so it should normally be impossible to trigger this. Fixes: 9a059cd5ca7d ("rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read()") Reported-by: Tom Rix Signed-off-by: David Howells Reviewed-by: Tom Rix Link: https://lore.kernel.org/r/161046503122.2445787.16714129930607546635.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/key.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 9631aa8543b5..8d2073e0e3da 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -598,7 +598,7 @@ static long rxrpc_read(const struct key *key, default: /* we have a ticket we can't encode */ pr_err("Unsupported key token type (%u)\n", token->security_index); - continue; + return -ENOPKG; } _debug("token[%u]: toksize=%u", ntoks, toksize); @@ -674,7 +674,9 @@ static long rxrpc_read(const struct key *key, break; default: - break; + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; } ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, -- cgit 1.4.1 From a95d25dd7b94a5ba18246da09b4218f132fed60e Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Tue, 12 Jan 2021 15:59:15 +0000 Subject: rxrpc: Call state should be read with READ_ONCE() under some circumstances The call state may be changed at any time by the data-ready routine in response to received packets, so if the call state is to be read and acted upon several times in a function, READ_ONCE() must be used unless the call state lock is held. As it happens, we used READ_ONCE() to read the state a few lines above the unmarked read in rxrpc_input_data(), so use that value rather than re-reading it. Fixes: a158bdd3247b ("rxrpc: Fix call timeouts") Signed-off-by: Baptiste Lepers Signed-off-by: David Howells Link: https://lore.kernel.org/r/161046715522.2450566.488819910256264150.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 667c44aa5a63..dc201363f2c4 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -430,7 +430,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) return; } - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; -- cgit 1.4.1 From 77b6ec01c29aade01701aa30bf1469acc7f2be76 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 5 Jan 2021 12:21:26 -0800 Subject: cifs: check pointer before freeing clang static analysis reports this problem dfs_cache.c:591:2: warning: Argument to kfree() is a constant address (18446744073709551614), which is not memory allocated by malloc() kfree(vi); ^~~~~~~~~ In dfs_cache_del_vol() the volume info pointer 'vi' being freed is the return of a call to find_vol(). The large constant address is find_vol() returning an error. Add an error check to dfs_cache_del_vol() similar to the one done in dfs_cache_update_vol(). Fixes: 54be1f6c1c37 ("cifs: Add DFS cache routines") Signed-off-by: Tom Rix Reviewed-by: Nathan Chancellor CC: # v5.0+ Signed-off-by: Steve French --- fs/cifs/dfs_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 6ad6ba5f6ebe..0fdb0de7ff86 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1260,7 +1260,8 @@ void dfs_cache_del_vol(const char *fullpath) vi = find_vol(fullpath); spin_unlock(&vol_list_lock); - kref_put(&vi->refcnt, vol_release); + if (!IS_ERR(vi)) + kref_put(&vi->refcnt, vol_release); } /** -- cgit 1.4.1 From 2659d3bff3e1b000f49907d0839178b101a89887 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 13 Jan 2021 14:16:16 -0300 Subject: cifs: fix interrupted close commands Retry close command if it gets interrupted to not leak open handles on the server. Signed-off-by: Paulo Alcantara (SUSE) Reported-by: Duncan Findlay Suggested-by: Pavel Shilovsky Fixes: 6988a619f5b7 ("cifs: allow syscalls to be restarted in __smb_send_rqst()") Cc: stable@vger.kernel.org Reviewd-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 067eb44c7baa..794fc3b68b4f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3248,7 +3248,7 @@ close_exit: free_rsp_buf(resp_buftype, rsp); /* retry close in a worker thread if this one is interrupted */ - if (rc == -EINTR) { + if (is_interrupt_error(rc)) { int tmp_rc; tmp_rc = smb2_handle_cancelled_close(tcon, persistent_fid, -- cgit 1.4.1 From c13e7af042270724b42a466edc48a70a43f571f2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 12 Jan 2021 01:13:40 -0800 Subject: fs: cifs: remove unneeded variable in smb3_fs_context_dup 'rc' in smb3_fs_context_dup is not used and can be removed. Signed-off-by: Menglong Dong Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/fs_context.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 0afccbbed2e6..076bcadc756a 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -303,8 +303,6 @@ do { \ int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx) { - int rc = 0; - memcpy(new_ctx, ctx, sizeof(*ctx)); new_ctx->prepath = NULL; new_ctx->mount_options = NULL; @@ -327,7 +325,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(nodename); DUP_CTX_STR(iocharset); - return rc; + return 0; } static int -- cgit 1.4.1 From ed6b1920f84bc5c3d666dc383ff3bbc60f0f62a5 Mon Sep 17 00:00:00 2001 From: YANG LI Date: Mon, 11 Jan 2021 17:15:28 +0800 Subject: cifs: connect: style: Simplify bool comparison Fix the following coccicheck warning: ./fs/cifs/connect.c:3740:6-21: WARNING: Comparison of 0/1 to bool variable Signed-off-by: YANG LI Reported-by: Abaci Robot Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b9df85506938..5d39129406ea 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3740,7 +3740,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, if (!ses->binding) { ses->capabilities = server->capabilities; - if (linuxExtEnabled == 0) + if (!linuxExtEnabled) ses->capabilities &= (~server->vals->cap_unix); if (ses->auth_key.response) { -- cgit 1.4.1 From e54fd0716c3db20c0cba73fee2c3a4274b08c24e Mon Sep 17 00:00:00 2001 From: YANG LI Date: Wed, 30 Dec 2020 14:35:45 +0800 Subject: cifs: style: replace one-element array with flexible-array There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use "flexible array members"[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/ deprecated.html#zero-length-and-one-element-arrays Signed-off-by: YANG LI Reported-by: Abaci Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 204a622b89ed..d85edf5d1429 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -424,7 +424,7 @@ struct smb2_rdma_transform_capabilities_context { __le16 TransformCount; __u16 Reserved1; __u32 Reserved2; - __le16 RDMATransformIds[1]; + __le16 RDMATransformIds[]; } __packed; /* Signing algorithms */ -- cgit 1.4.1 From b42b3a2744b3e8f427de79896720c72823af91ad Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 12 Jan 2021 10:16:43 +0100 Subject: can: isotp: isotp_getname(): fix kernel information leak Initialize the sockaddr_can structure to prevent a data leak to user space. Suggested-by: Cong Wang Reported-by: syzbot+057884e2f453e8afebc8@syzkaller.appspotmail.com Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20210112091643.11789-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/isotp.c b/net/can/isotp.c index 7839c3b9e5be..3ef7f78e553b 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1155,6 +1155,7 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (peer) return -EOPNOTSUPP; + memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = so->ifindex; addr->can_addr.tp.rx_id = so->rxid; -- cgit 1.4.1 From ca4c6ebeeb50112f5178f14bfb6d9e8ddf148545 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Wed, 13 Jan 2021 15:31:00 +0800 Subject: can: mcp251xfd: mcp251xfd_handle_rxif_one(): fix wrong NULL pointer check If alloc_canfd_skb() returns NULL, 'cfg' is an uninitialized variable, so we should check 'skb' rather than 'cfd' after calling alloc_canfd_skb(priv->ndev, &cfd). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Signed-off-by: Qinglang Miao Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210113073100.79552-1-miaoqinglang@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 36235afb0bc6..f07e8b737d31 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1491,7 +1491,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, else skb = alloc_can_skb(priv->ndev, (struct can_frame **)&cfd); - if (!cfd) { + if (!skb) { stats->rx_dropped++; return 0; } -- cgit 1.4.1 From 1f02efd1bb35bee95feed6aab46d1217f29d555b Mon Sep 17 00:00:00 2001 From: Seb Laveze Date: Tue, 12 Jan 2021 15:01:22 +0100 Subject: net: stmmac: use __napi_schedule() for PREEMPT_RT Use of __napi_schedule_irqoff() is not safe with PREEMPT_RT in which hard interrupts are not disabled while running the threaded interrupt. Using __napi_schedule() works for both PREEMPT_RT and mainline Linux, just at the cost of an additional check if interrupts are disabled for mainline (since they are already disabled). Similar to the fix done for enetc commit 215602a8d212 ("enetc: use napi_schedule to be compatible with PREEMPT_RT") Signed-off-by: Seb Laveze Link: https://lore.kernel.org/r/20210112140121.1487619-1-sebastien.laveze@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5b1c12ff98c0..2d90d6856ec5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2184,7 +2184,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->rx_napi); + __napi_schedule(&ch->rx_napi); } } @@ -2193,7 +2193,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->tx_napi); + __napi_schedule(&ch->tx_napi); } } -- cgit 1.4.1 From 7128c834d30e6b2cf649f14d8fc274941786d0e1 Mon Sep 17 00:00:00 2001 From: Cristian Dumitrescu Date: Mon, 11 Jan 2021 18:11:38 +0000 Subject: i40e: fix potential NULL pointer dereferencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the function i40e_construct_skb_zc only frees the input xdp buffer when the output skb is successfully built. On error, the function i40e_clean_rx_irq_zc does not commit anything for the current packet descriptor and simply exits the packet descriptor processing loop, with the plan to restart the processing of this descriptor on the next invocation. Therefore, on error the ring next-to-clean pointer should not advance, the xdp i.e. *bi buffer should not be freed and the current buffer info should not be invalidated by setting *bi to NULL. Therefore, the *bi should only be set to NULL when the function i40e_construct_skb_zc is successful, otherwise a NULL *bi will be dereferenced when the work for the current descriptor is eventually restarted. Fixes: 3b4f0b66c2b3 ("i40e, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL") Signed-off-by: Cristian Dumitrescu Acked-by: Björn Töpel Link: https://lore.kernel.org/r/20210111181138.49757-1-cristian.dumitrescu@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 47eb9c584a12..492ce213208d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -348,12 +348,12 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) * SBP is *not* set in PRT_SBPVSI (default not set). */ skb = i40e_construct_skb_zc(rx_ring, *bi); - *bi = NULL; if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; break; } + *bi = NULL; cleaned_count++; i40e_inc_ntc(rx_ring); -- cgit 1.4.1 From 8ad2a970d2010add3963e7219eb50367ab3fa4eb Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Tue, 12 Jan 2021 11:06:00 +0530 Subject: cxgb4/chtls: Fix tid stuck due to wrong update of qid TID stuck is seen when there is a race in CPL_PASS_ACCEPT_RPL/CPL_ABORT_REQ and abort is arriving before the accept reply, which sets the queue number. In this case HW ends up sending CPL_ABORT_RPL_RSS to an incorrect ingress queue. V1->V2: - Removed the unused variable len in chtls_set_quiesce_ctrl(). V2->V3: - As kfree_skb() has a check for null skb, so removed this check before calling kfree_skb() in func chtls_send_reset(). Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Link: https://lore.kernel.org/r/20210112053600.24590-1-ayush.sawal@chelsio.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 7 ++++ .../ethernet/chelsio/inline_crypto/chtls/chtls.h | 4 +++ .../chelsio/inline_crypto/chtls/chtls_cm.c | 32 +++++++++++++++-- .../chelsio/inline_crypto/chtls/chtls_hw.c | 41 ++++++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 92473dda55d9..22a0220123ad 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -40,6 +40,13 @@ #define TCB_L2T_IX_M 0xfffULL #define TCB_L2T_IX_V(x) ((x) << TCB_L2T_IX_S) +#define TCB_T_FLAGS_W 1 +#define TCB_T_FLAGS_S 0 +#define TCB_T_FLAGS_M 0xffffffffffffffffULL +#define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S) + +#define TCB_FIELD_COOKIE_TFLAG 1 + #define TCB_SMAC_SEL_W 0 #define TCB_SMAC_SEL_S 24 #define TCB_SMAC_SEL_M 0xffULL diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 72bb123d53db..9e2378013642 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -575,7 +575,11 @@ int send_tx_flowc_wr(struct sock *sk, int compl, void chtls_tcp_push(struct sock *sk, int flags); int chtls_push_frames(struct chtls_sock *csk, int comp); int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val); +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t); int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode, int cipher_type); +void chtls_set_quiesce_ctrl(struct sock *sk, int val); void skb_entail(struct sock *sk, struct sk_buff *skb, int flags); unsigned int keyid_to_addr(int start_addr, int keyid); void free_tls_keyid(struct sock *sk); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 51dd030b3b36..e5cfbe196ba6 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -32,6 +32,7 @@ #include "chtls.h" #include "chtls_cm.h" #include "clip_tbl.h" +#include "t4_tcb.h" /* * State transitions and actions for close. Note that if we are in SYN_SENT @@ -267,7 +268,9 @@ static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb) if (sk->sk_state != TCP_SYN_RECV) chtls_send_abort(sk, mode, skb); else - goto out; + chtls_set_tcb_field_rpl_skb(sk, TCB_T_FLAGS_W, + TCB_T_FLAGS_V(TCB_T_FLAGS_M), 0, + TCB_FIELD_COOKIE_TFLAG, 1); return; out: @@ -1949,6 +1952,8 @@ static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb) else if (tcp_sk(sk)->linger2 < 0 && !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN)) chtls_abort_conn(sk, skb); + else if (csk_flag_nochk(csk, CSK_TX_DATA_SENT)) + chtls_set_quiesce_ctrl(sk, 0); break; default: pr_info("close_con_rpl in bad state %d\n", sk->sk_state); @@ -2292,6 +2297,28 @@ static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb) return 0; } +static int chtls_set_tcb_rpl(struct chtls_dev *cdev, struct sk_buff *skb) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb) + RSS_HDR; + unsigned int hwtid = GET_TID(rpl); + struct sock *sk; + + sk = lookup_tid(cdev->tids, hwtid); + + /* return EINVAL if socket doesn't exist */ + if (!sk) + return -EINVAL; + + /* Reusing the skb as size of cpl_set_tcb_field structure + * is greater than cpl_abort_req + */ + if (TCB_COOKIE_G(rpl->cookie) == TCB_FIELD_COOKIE_TFLAG) + chtls_send_abort(sk, CPL_ABORT_SEND_RST, NULL); + + kfree_skb(skb); + return 0; +} + chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_PASS_OPEN_RPL] = chtls_pass_open_rpl, [CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl, @@ -2304,5 +2331,6 @@ chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = chtls_conn_cpl, [CPL_ABORT_REQ_RSS] = chtls_conn_cpl, [CPL_ABORT_RPL_RSS] = chtls_conn_cpl, - [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_SET_TCB_RPL] = chtls_set_tcb_rpl, }; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c index a4fb463af22a..1e67140b0f80 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c @@ -88,6 +88,24 @@ static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val) return ret < 0 ? ret : 0; } +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t) +{ + struct sk_buff *skb; + unsigned int wrlen; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return; + + __set_tcb_field(sk, skb, word, mask, val, cookie, 0); + send_or_defer(sk, tcp_sk(sk), skb, through_l2t); +} + /* * Set one of the t_flags bits in the TCB. */ @@ -113,6 +131,29 @@ static int chtls_set_tcb_quiesce(struct sock *sk, int val) TF_RX_QUIESCE_V(val)); } +void chtls_set_quiesce_ctrl(struct sock *sk, int val) +{ + struct chtls_sock *csk; + struct sk_buff *skb; + unsigned int wrlen; + int ret; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_ATOMIC); + if (!skb) + return; + + csk = rcu_dereference_sk_user_data(sk); + + __set_tcb_field(sk, skb, 1, TF_RX_QUIESCE_V(1), 0, 0, 1); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id); + ret = cxgb4_ofld_send(csk->egress_dev, skb); + if (ret < 0) + kfree_skb(skb); +} + /* TLS Key bitmap processing */ int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi) { -- cgit 1.4.1 From 5b55299eed78538cc4746e50ee97103a1643249c Mon Sep 17 00:00:00 2001 From: David Wu Date: Wed, 13 Jan 2021 11:41:09 +0800 Subject: net: stmmac: Fixed mtu channged by cache aligned Since the original mtu is not used when the mtu is updated, the mtu is aligned with cache, this will get an incorrect. For example, if you want to configure the mtu to be 1500, but mtu 1536 is configured in fact. Fixed: eaf4fac478077 ("net: stmmac: Do not accept invalid MTU values") Signed-off-by: David Wu Link: https://lore.kernel.org/r/20210113034109.27865-1-david.wu@rock-chips.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2d90d6856ec5..26b971cd4da5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4026,6 +4026,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) { struct stmmac_priv *priv = netdev_priv(dev); int txfifosz = priv->plat->tx_fifo_size; + const int mtu = new_mtu; if (txfifosz == 0) txfifosz = priv->dma_cap.tx_fifo_size; @@ -4043,7 +4044,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) if ((txfifosz < new_mtu) || (new_mtu > BUF_SIZE_16KiB)) return -EINVAL; - dev->mtu = new_mtu; + dev->mtu = mtu; netdev_update_features(dev); -- cgit 1.4.1 From c25a053e15778f6b4d6553708673736e27a6c2cf Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 13 Jan 2021 10:24:10 +0800 Subject: riscv: Fix KASAN memory mapping. Use virtual address instead of physical address when translating the address to shadow memory by kasan_mem_to_shadow(). Signed-off-by: Nick Hu Signed-off-by: Nylon Chen Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 12ddd1f6bf70..a8a2ffd9114a 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -93,8 +93,8 @@ void __init kasan_init(void) VMALLOC_END)); for_each_mem_range(i, &_start, &_end) { - void *start = (void *)_start; - void *end = (void *)_end; + void *start = (void *)__va(_start); + void *end = (void *)__va(_end); if (start >= end) break; -- cgit 1.4.1 From 41131a5e54ae7ba5a2bb8d7b30d1818b3f5b13d2 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 12 Jan 2021 11:55:15 +0000 Subject: powerpc/vdso: Fix clock_gettime_fallback for vdso32 The second argument of __kernel_clock_gettime64 points to a struct __kernel_timespec, with 64-bit time_t, so use the clock_gettime64 syscall in the fallback function for the 32-bit VDSO. Similarly, clock_getres_fallback should use the clock_getres_time64 syscall, though it isn't yet called from the 32-bit VDSO. Fixes: d0e3fc69d00d ("powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32") Signed-off-by: Andreas Schwab [chleroy: Moved into a single #ifdef __powerpc64__ block] Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c0ab0eb3cc80687c326f76ff0dd5762b8812ecc.1610452505.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 81671aa365b3..77c635c2c90d 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -103,6 +103,8 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz); } +#ifdef __powerpc64__ + static __always_inline int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { @@ -115,10 +117,22 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); } -#ifdef CONFIG_VDSO32 +#else #define BUILD_VDSO32 1 +static __always_inline +int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_gettime64, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_getres_time64, _clkid, (unsigned long)_ts); +} + static __always_inline int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) { -- cgit 1.4.1 From be969b7cfbcfa8a835a528f1dc467f0975c6d883 Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:10 -0800 Subject: dts: phy: fix missing mdio device and probe failure of vsc8541-01 device HiFive unleashed A00 board has VSC8541-01 ethernet phy, this device is identified as a Revision B device as described in device identification registers. In order to use this phy in the unmanaged mode, it requires a specific reset sequence of logical 0-1-0-1 transition on the NRESET pin as documented here [1]. Currently, the bootloader (fsbl or u-boot-spl) takes care of the phy reset. If due to some reason the phy device hasn't received the reset by the prior stages before the linux macb driver comes into the picture, the MACB mii bus gets probed but the mdio scan fails and is not even able to read the phy ID registers. It gives an error message: "libphy: MACB_mii_bus: probed mdio_bus 10090000.ethernet-ffffffff: MDIO device at address 0 is missing." Thus adding the device OUI (Organizationally Unique Identifier) to the phy device node helps to probe the phy device. [1]: VSC8541-01 datasheet: https://www.mouser.com/ds/2/523/Microsemi_VSC8541-01_Datasheet_10496_V40-1148034.pdf Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4a2729f5ca3f..60846e88ae4b 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -88,6 +88,7 @@ phy-mode = "gmii"; phy-handle = <&phy0>; phy0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0771"; reg = <0>; }; }; -- cgit 1.4.1 From a0fa9d727043da2238432471e85de0bdb8a8df65 Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:11 -0800 Subject: dts: phy: add GPIO number and active state used for phy reset The GEMGXL_RST line on HiFive Unleashed is pulled low and is using GPIO number 12. Add these reset-gpio details to dt-node using which the linux phylib can reset the phy. Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 60846e88ae4b..24d75a146e02 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -90,6 +90,7 @@ phy0: ethernet-phy@0 { compatible = "ethernet-phy-id0007.0771"; reg = <0>; + reset-gpios = <&gpio 12 GPIO_ACTIVE_LOW>; }; }; -- cgit 1.4.1 From 0983834a83931606a647c275e5d4165ce4e7b49f Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:12 -0800 Subject: riscv: defconfig: enable gpio support for HiFive Unleashed Ethernet phy VSC8541-01 on HiFive Unleashed has its reset line connected to a gpio, so enable GPIO driver's required to reset the phy. Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index d222d353d86d..8c3d1e451703 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -64,6 +64,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y +CONFIG_GPIOLIB=y +CONFIG_GPIO_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y -- cgit 1.4.1 From 101c2fae5108d78915517d0279323ee215e70df2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 15:14:29 -0500 Subject: MAINTAINERS: update radeon/amdgpu/amdkfd git trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FDO is out of space, so move to gitlab. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cc1e6a5ee6e6..82a47081f62a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -907,7 +907,7 @@ AMD KFD M: Felix Kuehling L: amd-gfx@lists.freedesktop.org S: Supported -T: git git://people.freedesktop.org/~agd5f/linux +T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd*.[ch] F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h @@ -14818,7 +14818,7 @@ M: Alex Deucher M: Christian König L: amd-gfx@lists.freedesktop.org S: Supported -T: git git://people.freedesktop.org/~agd5f/linux +T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/ F: drivers/gpu/drm/radeon/ F: include/uapi/drm/amdgpu_drm.h -- cgit 1.4.1 From ff9346dbabbb6595c5c20d90d88ae4a2247487a9 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Thu, 7 Jan 2021 18:53:03 -0500 Subject: drm/amdgpu: fix DRM_INFO flood if display core is not supported (bug 210921) This fix bug 210921 where DRM_INFO floods log when hitting an unsupported ASIC in amdgpu_device_asic_has_dc_support(). This info should be only called once. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=210921 Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b69c34074d8d..087afab67e22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3034,7 +3034,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif default: if (amdgpu_dc > 0) - DRM_INFO("Display Core has been requested via kernel parameter " + DRM_INFO_ONCE("Display Core has been requested via kernel parameter " "but isn't supported by ASIC, ignoring\n"); return false; } -- cgit 1.4.1 From f14a5c34d143f6627f0be70c0de1d962f3a6ff1c Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Tue, 5 Jan 2021 15:04:01 +0800 Subject: drm/amdgpu/psp: fix psp gfx ctrl cmds psp GFX_CTRL_CMD_ID_CONSUME_CMD different for windows and linux, according to psp, linux cmds are not correct. v2: only correct GFX_CTRL_CMD_ID_CONSUME_CMD. Signed-off-by: Victor Zhao Reviewed-by: Emily.Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index d65a5339d354..3ba7bdfde65d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -47,7 +47,7 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */ - GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x00090000, /* send interrupt to psp for updating write pointer of vf */ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ -- cgit 1.4.1 From 73644143b31cb95866c19e0d94be9e3127ec3a6b Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Wed, 9 Dec 2020 18:04:04 -0500 Subject: drm/amd/display: NULL pointer hang [Why] In dc_link_dp_set_test_pattern, we assume all pipes have a stream, which can cause null pointer dereference. [How] Add a null pointer check before accessing stream. Tested-by: Daniel Wheeler Signed-off-by: Qingqing Zhuo Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 2fc12239b22c..1bd1a0935290 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3992,7 +3992,7 @@ bool dc_link_dp_set_test_pattern( unsigned int cust_pattern_size) { struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx; - struct pipe_ctx *pipe_ctx = &pipes[0]; + struct pipe_ctx *pipe_ctx = NULL; unsigned int lane; unsigned int i; unsigned char link_qual_pattern[LANE_COUNT_DP_MAX] = {0}; @@ -4002,12 +4002,18 @@ bool dc_link_dp_set_test_pattern( memset(&training_pattern, 0, sizeof(training_pattern)); for (i = 0; i < MAX_PIPES; i++) { + if (pipes[i].stream == NULL) + continue; + if (pipes[i].stream->link == link && !pipes[i].top_pipe && !pipes[i].prev_odm_pipe) { pipe_ctx = &pipes[i]; break; } } + if (pipe_ctx == NULL) + return false; + /* Reset CRTC Test Pattern if it is currently running and request is VideoMode */ if (link->test_pattern_enabled && test_pattern == DP_TEST_PATTERN_VIDEO_MODE) { -- cgit 1.4.1 From 4336be4b07ed3b03a18ac35564c3127eeea05ab6 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Mon, 14 Dec 2020 15:35:02 -0500 Subject: drm/amd/display: Initialize stack variable [WHY] The stack variable "val" is potentially unpopulate it, so initialize it with the value 0xf (indicating an invalid mux) Tested-by: Daniel Wheeler Signed-off-by: Wesley Chalmers Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 100ce0e28fd5..b096011acb49 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -470,7 +470,7 @@ void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock) unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - uint32_t val = 0; + uint32_t val = 0xf; if (opp_id < MAX_OPP && REG(MUX[opp_id])) REG_GET(MUX[opp_id], MPC_OUT_MUX, &val); -- cgit 1.4.1 From 0eb31a82e378cab17beec1d213e1414e9fea1767 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Tue, 29 Dec 2020 23:33:32 -0500 Subject: drm/amd/display: Add a missing DCN3.01 API mapping [why] Required for DSC MST Tested-by: Daniel Wheeler Signed-off-by: Nikola Cornij Reviewed-by: Zhan Liu Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 4825c5c1c6ed..35f5bf08ae96 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1731,6 +1731,7 @@ static struct resource_funcs dcn301_res_pool_funcs = { .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, .set_mcif_arb_params = dcn30_set_mcif_arb_params, -- cgit 1.4.1 From 9d03bb102028b4a3f4a64d6069b219e2e1c1f306 Mon Sep 17 00:00:00 2001 From: "Li, Roman" Date: Wed, 30 Dec 2020 18:03:02 +0000 Subject: drm/amd/display: disable dcn10 pipe split by default [Why] The initial purpose of dcn10 pipe split is to support some high bandwidth mode which requires dispclk greater than max dispclk. By initial bring up power measurement data, it showed power consumption is less with pipe split for dcn block. This could be reason for enable pipe split by default. By battery life measurement of some Chromebooks, result shows battery life is longer with pipe split disabled. [How] Disable pipe split by default. Pipe split could be still enabled when required dispclk is greater than max dispclk. Tested-by: Daniel Wheeler Signed-off-by: Hersen Wu Signed-off-by: Roman Li Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 36745193c391..90e912fef2b3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -608,8 +608,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_pplib_clock_request = false, .disable_pplib_wm_range = false, .pplib_wm_report_mode = WM_REPORT_DEFAULT, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = true, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .voltage_align_fclk = true, .disable_stereo_support = true, -- cgit 1.4.1 From 4eec66c014e9a406d8d453de958f6791d05427e4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 11 Jan 2021 11:31:51 -0500 Subject: Revert "drm/amd/display: Fixed Intermittent blue screen on OLED panel" commit a861736dae64 ("drm/amd/display: Fixed Intermittent blue screen on OLED panel") causes power regression for many users. It seems that this change causes the MCLK to get forced high; this creates a regression for many users since their devices were not able to drop to a low state after this change. For this reason, this reverts commit a861736dae644a0d7abbca0c638ae6aad28feeb8. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1407 Cc: Aurabindo Pillai Cc: Alex Deucher Cc: Harry Wentland Cc: Naveed Ashfaq Cc: Hersen Wu Cc: Roman Li Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 860e72a51534..80170f9721ce 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -2635,14 +2635,15 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP } if (mode_lib->vba.DRAMClockChangeSupportsVActive && - mode_lib->vba.MinActiveDRAMClockChangeMargin > 60 && - mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) { mode_lib->vba.DRAMClockChangeWatermark += 25; for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.DRAMClockChangeWatermark > - dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) - mode_lib->vba.MinTTUVBlank[k] += 25; + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + if (mode_lib->vba.DRAMClockChangeWatermark > + dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) + mode_lib->vba.MinTTUVBlank[k] += 25; + } } mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; -- cgit 1.4.1 From 8b335bff643f3b39935c7377dbcd361c5b605d98 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Mon, 11 Jan 2021 16:05:28 -0500 Subject: drm/amdkfd: Fix out-of-bounds read in kdf_create_vcrat_image_cpu() KASAN reported a slab-out-of-bounds read of size 1 in kdf_create_vcrat_image_cpu(). This occurs when, for example, when on an x86_64 with a single NUMA node because kfd_fill_iolink_info_for_cpu() is a no-op, but afterwards the sub_type_hdr->length, which is out-of-bounds, is read and multiplied by entries. Fortunately, entries is 0 in this case so the overall crat_table->length is still correct. Check if there were any entries before de-referencing sub_type_hdr which may be pointing to out-of-bounds memory. Fixes: b7b6c38529c9 ("drm/amdkfd: Calculate CPU VCRAT size dynamically (v2)") Suggested-by: Felix Kuehling Signed-off-by: Jeremy Cline Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 8cac497c2c45..a5640a6138cf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,11 +1040,14 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) (struct crat_subtype_iolink *)sub_type_hdr); if (ret < 0) return ret; - crat_table->length += (sub_type_hdr->length * entries); - crat_table->total_entries += entries; - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length * entries); + if (entries) { + crat_table->length += (sub_type_hdr->length * entries); + crat_table->total_entries += entries; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length * entries); + } #else pr_info("IO link not available for non x86 platforms\n"); #endif -- cgit 1.4.1 From 04eb6e773e9f3167a5921d74e8ad99cdcc4166c3 Mon Sep 17 00:00:00 2001 From: chen gong Date: Fri, 8 Jan 2021 12:46:44 +0800 Subject: drm/amdgpu/gfx10: add updated GOLDEN_TSC_COUNT_UPPER/LOWER register offsets for VGH The address of the GOLDEN_TSC_COUNT_UPPER/GOLDEN_TSC_COUNT_LOWER for Vnagogh are different from the others. The offset of the GOLDEN_TSC_COUNT_UPPER for Vangogh is 0x0025 by calculation. The offset of the GOLDEN_TSC_COUNT_LOWER for Vangogh is 0x0026 by calculation. Signed-off-by: chen gong Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ba1086784525..34141b785aa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -99,6 +99,10 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025 +#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 +#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1 #define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 #define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 #define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 @@ -7377,8 +7381,16 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + switch (adev->asic_type) { + case CHIP_VANGOGH: + clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) | + ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); + break; + default: + clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | + ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + break; + } mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; -- cgit 1.4.1 From 12f2df72205fe348481d941c3e593e8068d2d23d Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 13 Jan 2021 20:13:17 +0800 Subject: drm/amdgpu: fix vram type and bandwidth error for DDR5 and DDR4 This patch is to update atomfirmware parser for the memory type and bandwidth of DDR5 and DDR4. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 53 ++++++++++++++++-------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 306077884a67..6107ac91db25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -112,6 +112,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; + struct atom_integrated_system_info_v2_1 v21; }; union umc_info { @@ -209,24 +210,42 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (adev->flags & AMD_IS_APU) { igp_info = (union igp_info *) (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 11: - mem_channel_number = igp_info->v11.umachannelnumber; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; - mem_type = igp_info->v11.memorytype; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + switch (frev) { + case 1: + switch (crev) { + case 11: + case 12: + mem_channel_number = igp_info->v11.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v11.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; + default: + return -EINVAL; + } break; - case 12: - mem_channel_number = igp_info->v12.umachannelnumber; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; - mem_type = igp_info->v12.memorytype; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + case 2: + switch (crev) { + case 1: + case 2: + mem_channel_number = igp_info->v21.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v21.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; + default: + return -EINVAL; + } break; default: return -EINVAL; -- cgit 1.4.1 From 21702c8cae51535e09b91341a069503c6ef3d2a3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 2 Oct 2020 10:58:55 -0400 Subject: drm/amdgpu: add green_sardine device id (v2) Add green_sardine PCI id support and map it to renoir asic type. v2: add apu flag Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 72efd579ec5e..e191383f34f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1085,6 +1085,7 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, -- cgit 1.4.1 From 53f1e7f6a1720f8299b5283857eedc8f07d29533 Mon Sep 17 00:00:00 2001 From: mengwang Date: Wed, 12 Aug 2020 11:49:29 +0800 Subject: drm/amdgpu: add new device id for Renior add DID 0x164C into pciidlist under CHIP_RENOIR family. Signed-off-by: mengwang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e191383f34f7..7169fb5e3d9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1086,6 +1086,7 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8a23636ecc27..0b3516c4eefb 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1239,7 +1239,8 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RENOIR: adev->asic_funcs = &soc15_asic_funcs; - if (adev->pdev->device == 0x1636) + if ((adev->pdev->device == 0x1636) || + (adev->pdev->device == 0x164c)) adev->apu_flags |= AMD_APU_IS_RENOIR; else adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; -- cgit 1.4.1 From 3c516e038f0cc3915825bdac619d448c2b1811f2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 14 Jan 2021 15:19:23 +0800 Subject: Documentation: ACPI: EINJ: Fix error type values for PCIe errors Fix the error type value for PCI Express uncorrectable non-fatal error to 0x00000080 and fix the error type value for PCI Express uncorrectable fatal error to 0x00000100. See Advanced Configuration and Power Interface Specification, version 6.2, table "18-409 Error Type Definition". Signed-off-by: Qiuxu Zhuo Reported-by: Lijian Zhao [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/apei/einj.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst index e588bccf5158..c042176e1707 100644 --- a/Documentation/firmware-guide/acpi/apei/einj.rst +++ b/Documentation/firmware-guide/acpi/apei/einj.rst @@ -50,8 +50,8 @@ The following files belong to it: 0x00000010 Memory Uncorrectable non-fatal 0x00000020 Memory Uncorrectable fatal 0x00000040 PCI Express Correctable - 0x00000080 PCI Express Uncorrectable fatal - 0x00000100 PCI Express Uncorrectable non-fatal + 0x00000080 PCI Express Uncorrectable non-fatal + 0x00000100 PCI Express Uncorrectable fatal 0x00000200 Platform Correctable 0x00000400 Platform Uncorrectable non-fatal 0x00000800 Platform Uncorrectable fatal -- cgit 1.4.1 From 7de843dbaaa68aa514090e6226ed7c6374fd7e49 Mon Sep 17 00:00:00 2001 From: Nicholas Miell Date: Sun, 10 Jan 2021 22:09:25 -0800 Subject: HID: logitech-hidpp: Add product ID for MX Ergo in Bluetooth mode The Logitech MX Ergo trackball supports HID++ 4.5 over Bluetooth. Add its product ID to the table so we can get battery monitoring support. (The hid-logitech-hidpp driver already recognizes it when connected via a Unifying Receiver.) [jkosina@suse.cz: fix whitespace damage] Signed-off-by: Nicholas Miell Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index f85781464807..7eb9a6ddb46a 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4053,6 +4053,8 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX Master mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { /* MX Master 3 mouse over Bluetooth */ -- cgit 1.4.1 From c87a95dc28b1431c7e77e2c0c983cf37698089d2 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 13 Jan 2021 19:17:17 +0000 Subject: dm crypt: defer decryption to a tasklet if interrupts disabled On some specific hardware on early boot we occasionally get: [ 1193.920255][ T0] BUG: sleeping function called from invalid context at mm/mempool.c:381 [ 1193.936616][ T0] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/69 [ 1193.953233][ T0] no locks held by swapper/69/0. [ 1193.965871][ T0] irq event stamp: 575062 [ 1193.977724][ T0] hardirqs last enabled at (575061): [] tick_nohz_idle_exit+0xe2/0x3e0 [ 1194.002762][ T0] hardirqs last disabled at (575062): [] flush_smp_call_function_from_idle+0x4f/0x80 [ 1194.029035][ T0] softirqs last enabled at (575050): [] asm_call_irq_on_stack+0x12/0x20 [ 1194.054227][ T0] softirqs last disabled at (575043): [] asm_call_irq_on_stack+0x12/0x20 [ 1194.079389][ T0] CPU: 69 PID: 0 Comm: swapper/69 Not tainted 5.10.6-cloudflare-kasan-2021.1.4-dev #1 [ 1194.104103][ T0] Hardware name: NULL R162-Z12-CD/MZ12-HD4-CD, BIOS R10 06/04/2020 [ 1194.119591][ T0] Call Trace: [ 1194.130233][ T0] dump_stack+0x9a/0xcc [ 1194.141617][ T0] ___might_sleep.cold+0x180/0x1b0 [ 1194.153825][ T0] mempool_alloc+0x16b/0x300 [ 1194.165313][ T0] ? remove_element+0x160/0x160 [ 1194.176961][ T0] ? blk_mq_end_request+0x4b/0x490 [ 1194.188778][ T0] crypt_convert+0x27f6/0x45f0 [dm_crypt] [ 1194.201024][ T0] ? rcu_read_lock_sched_held+0x3f/0x70 [ 1194.212906][ T0] ? module_assert_mutex_or_preempt+0x3e/0x70 [ 1194.225318][ T0] ? __module_address.part.0+0x1b/0x3a0 [ 1194.237212][ T0] ? is_kernel_percpu_address+0x5b/0x190 [ 1194.249238][ T0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [dm_crypt] [ 1194.261593][ T0] ? is_module_address+0x25/0x40 [ 1194.272905][ T0] ? static_obj+0x8a/0xc0 [ 1194.283582][ T0] ? lockdep_init_map_waits+0x26a/0x700 [ 1194.295570][ T0] ? __raw_spin_lock_init+0x39/0x110 [ 1194.307330][ T0] kcryptd_crypt_read_convert+0x31c/0x560 [dm_crypt] [ 1194.320496][ T0] ? kcryptd_queue_crypt+0x1be/0x380 [dm_crypt] [ 1194.333203][ T0] blk_update_request+0x6d7/0x1500 [ 1194.344841][ T0] ? blk_mq_trigger_softirq+0x190/0x190 [ 1194.356831][ T0] blk_mq_end_request+0x4b/0x490 [ 1194.367994][ T0] ? blk_mq_trigger_softirq+0x190/0x190 [ 1194.379693][ T0] flush_smp_call_function_queue+0x24b/0x560 [ 1194.391847][ T0] flush_smp_call_function_from_idle+0x59/0x80 [ 1194.403969][ T0] do_idle+0x287/0x450 [ 1194.413891][ T0] ? arch_cpu_idle_exit+0x40/0x40 [ 1194.424716][ T0] ? lockdep_hardirqs_on_prepare+0x286/0x3f0 [ 1194.436399][ T0] ? _raw_spin_unlock_irqrestore+0x39/0x40 [ 1194.447759][ T0] cpu_startup_entry+0x19/0x20 [ 1194.458038][ T0] secondary_startup_64_no_verify+0xb0/0xbb IO completion can be queued to a different CPU by the block subsystem as a "call single function/data". The CPU may run these routines from the idle task, but it does so with interrupts disabled. It is not a good idea to do decryption with irqs disabled even in an idle task context, so just defer it to a tasklet (as is done with requests from hard irqs). Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b4d7418b5036..8c874710f0bc 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2221,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { - if (in_irq()) { - /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */ + /* + * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. + * irqs_disabled(): the kernel may run some IO completion from the idle thread, but + * it is being executed with irqs disabled. + */ + if (in_irq() || irqs_disabled()) { tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; -- cgit 1.4.1 From 55ed4560774d81d7343223b8fd2784c530a9c6c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 9 Dec 2020 14:27:44 +0900 Subject: tools/bootconfig: Add tracing_on support to helper scripts Add ftrace.instance.INSTANCE.tracing_on support to ftrace2bconf.sh and bconf2ftrace.sh. commit 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") added the per-instance tracing_on option, but forgot to update the helper scripts. Link: https://lkml.kernel.org/r/160749166410.3497930.14204335886811029800.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/scripts/bconf2ftrace.sh | 1 + tools/bootconfig/scripts/ftrace2bconf.sh | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh index 595e164dc352..feb30c2c7881 100755 --- a/tools/bootconfig/scripts/bconf2ftrace.sh +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -152,6 +152,7 @@ setup_instance() { # [instance] set_array_of ${instance}.options ${instancedir}/trace_options set_value_of ${instance}.trace_clock ${instancedir}/trace_clock set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask + set_value_of ${instance}.tracing_on ${instancedir}/tracing_on set_value_of ${instance}.tracer ${instancedir}/current_tracer set_array_of ${instance}.ftrace.filters \ ${instancedir}/set_ftrace_filter diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh index 6c0d4b61e0c2..a0c3bcc6da4f 100755 --- a/tools/bootconfig/scripts/ftrace2bconf.sh +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -221,6 +221,10 @@ instance_options() { # [instance-name] if [ `echo $val | sed -e s/f//g`x != x ]; then emit_kv $PREFIX.cpumask = $val fi + val=`cat $INSTANCE/tracing_on` + if [ `echo $val | sed -e s/f//g`x != x ]; then + emit_kv $PREFIX.tracing_on = $val + fi val= for i in `cat $INSTANCE/set_event`; do -- cgit 1.4.1 From b79f2dc5ffe17b03ec8c55f0d63f65e87bcac676 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Wed, 13 Jan 2021 14:16:59 +0200 Subject: RDMA/umem: Avoid undefined behavior of rounddown_pow_of_two() rounddown_pow_of_two() is undefined when the input is 0. Therefore we need to avoid it in ib_umem_find_best_pgsz and return 0. Otherwise, it could result in not rejecting an invalid page size which eventually causes a kernel oops due to the logical inconsistency. Fixes: 3361c29e9279 ("RDMA/umem: Use simpler logic for ib_umem_find_best_pgsz()") Link: https://lore.kernel.org/r/20210113121703.559778-2-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Jason Gunthorpe Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 7ca4112e3e8f..917338db7ac1 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -135,7 +135,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, */ if (mask) pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); - return rounddown_pow_of_two(pgsz_bitmap); + return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0; } EXPORT_SYMBOL(ib_umem_find_best_pgsz); -- cgit 1.4.1 From 2cb091f6293df898b47f4e0f2e54324e2bbaf816 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jan 2021 14:17:00 +0200 Subject: IB/mlx5: Fix error unwinding when set_has_smi_cap fails When set_has_smi_cap() fails, multiport master cleanup is missed. Fix it by doing the correct error unwinding goto. Fixes: a989ea01cb10 ("RDMA/mlx5: Move SMI caps logic") Link: https://lore.kernel.org/r/20210113121703.559778-3-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3bae9ba0ead8..fbe3b75f866b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3956,7 +3956,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err = set_has_smi_cap(dev); if (err) - return err; + goto err_mp; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { -- cgit 1.4.1 From 1c3aa6bd0b823105c2030af85d92d158e815d669 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 13 Jan 2021 14:17:03 +0200 Subject: RDMA/mlx5: Fix wrong free of blue flame register on error If the allocation of the fast path blue flame register fails, the driver should free the regular blue flame register allocated a statement above, not the one that it just failed to allocate. Fixes: 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages") Link: https://lore.kernel.org/r/20210113121703.559778-6-leon@kernel.org Reported-by: Hans Petter Selasky Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fbe3b75f866b..d26f3f3e0462 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4319,7 +4319,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); if (err) - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); + mlx5_free_bfreg(dev->mdev, &dev->bfreg); return err; } -- cgit 1.4.1 From 7c7b3e5d9aeed31d35c5dab0bf9c0fd4c8923206 Mon Sep 17 00:00:00 2001 From: Neta Ostrovsky Date: Wed, 13 Jan 2021 15:02:14 +0200 Subject: RDMA/cma: Fix error flow in default_roce_mode_store In default_roce_mode_store(), we took a reference to cma_dev, but didn't return it with cma_dev_put in the error flow. Fixes: 1c15b4f2a42f ("RDMA/core: Modify enum ib_gid_type and enum rdma_network_type") Link: https://lore.kernel.org/r/20210113130214.562108-1-leon@kernel.org Signed-off-by: Neta Ostrovsky Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma_configfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7f70e5a7de10..97a77ea8d3c9 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item, return ret; gid_type = ib_cache_gid_parse_type_str(buf); - if (gid_type < 0) + if (gid_type < 0) { + cma_configfs_params_put(cma_dev); return -EINVAL; + } ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); -- cgit 1.4.1 From 47e4bb147a96f1c9b4e7691e7e994e53838bfff8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:29:47 -0800 Subject: net: sit: unregister_netdevice on newlink's error path We need to unregister the netdevice if config failed. .ndo_uninit takes care of most of the heavy lifting. This was uncovered by recent commit c269a24ce057 ("net: make free_netdev() more lenient with unregistering devices"). Previously the partially-initialized device would be left in the system. Reported-and-tested-by: syzbot+2393580080a2da190f04@syzkaller.appspotmail.com Fixes: e2f1f072db8d ("sit: allow to configure 6rd tunnels via netlink") Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20210114012947.2515313-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2da0ee703779..93636867aee2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1645,8 +1645,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, } #ifdef CONFIG_IPV6_SIT_6RD - if (ipip6_netlink_6rd_parms(data, &ip6rd)) + if (ipip6_netlink_6rd_parms(data, &ip6rd)) { err = ipip6_tunnel_update_6rd(nt, &ip6rd); + if (err < 0) + unregister_netdevice_queue(dev, NULL); + } #endif return err; -- cgit 1.4.1 From 25764779298f23a659f3daf39f9e2b5975a7a89d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:48 +0100 Subject: net: tip: fix a couple kernel-doc markups A function has a different name between their prototype and its kernel-doc markup: ../net/tipc/link.c:2551: warning: expecting prototype for link_reset_stats(). Prototype was for tipc_link_reset_stats() instead ../net/tipc/node.c:1678: warning: expecting prototype for is the general link level function for message sending(). Prototype was for tipc_node_xmit() instead Signed-off-by: Mauro Carvalho Chehab Acked-by: Jon Maloy Signed-off-by: Jakub Kicinski --- net/tipc/link.c | 2 +- net/tipc/node.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index a6a694b78927..115109259430 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2544,7 +2544,7 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) } /** - * link_reset_stats - reset link statistics + * tipc_link_reset_stats - reset link statistics * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) diff --git a/net/tipc/node.c b/net/tipc/node.c index 83d9eb830592..008670d1f43e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1665,7 +1665,7 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) } /** - * tipc_node_xmit() is the general link level function for message sending + * tipc_node_xmit() - general link level function for message sending * @net: the applicable net namespace * @list: chain of buffers containing message * @dnode: address of destination node -- cgit 1.4.1 From b76889ff51bfee318bea15891420e5aefd2833a0 Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Wed, 13 Jan 2021 14:15:56 +0100 Subject: net: stmmac: fix taprio schedule configuration When configuring a 802.1Qbv schedule through the tc taprio qdisc on an NXP i.MX8MPlus device, the effective cycle time differed from the requested one by N*96ns, with N number of entries in the Qbv Gate Control List. This is because the driver was adding a 96ns margin to each interval of the GCL, apparently to account for the IPG. The problem was observed on NXP i.MX8MPlus devices but likely affected all devices relying on the same configuration callback (dwmac 4.00, 4.10, 5.10 variants). Fix the issue by removing the margins, and simply setup the MAC with the provided cycle time value. This is the behavior expected by the user-space API, as altering the Qbv schedule timings would break standards conformance. This is also the behavior of several other Ethernet MAC implementations supporting taprio, including the dwxgmac variant of stmmac. Fixes: 504723af0d85 ("net: stmmac: Add basic EST support for GMAC5+") Signed-off-by: Yannick Vignon Link: https://lore.kernel.org/r/20210113131557.24651-1-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 52 +++------------------------- 1 file changed, 4 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 03e79a677c8b..8f7ac24545ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -568,68 +568,24 @@ static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate) { - u32 speed, total_offset, offset, ctrl, ctr_low; - u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG); - u32 mac_cfg = readl(ioaddr + GMAC_CONFIG); int i, ret = 0x0; - u64 total_ctr; - - if (extcfg & GMAC_CONFIG_EIPG_EN) { - offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT; - offset = 104 + (offset * 8); - } else { - offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT; - offset = 96 - (offset * 8); - } - - speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES); - speed = speed >> GMAC_CONFIG_FES_SHIFT; - - switch (speed) { - case 0x0: - offset = offset * 1000; /* 1G */ - break; - case 0x1: - offset = offset * 400; /* 2.5G */ - break; - case 0x2: - offset = offset * 100000; /* 10M */ - break; - case 0x3: - offset = offset * 10000; /* 100M */ - break; - default: - return -EINVAL; - } - - offset = offset / 1000; + u32 ctrl; ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false); ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false); ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false); ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false); + ret |= dwmac5_est_write(ioaddr, CTR_LOW, cfg->ctr[0], false); + ret |= dwmac5_est_write(ioaddr, CTR_HIGH, cfg->ctr[1], false); if (ret) return ret; - total_offset = 0; for (i = 0; i < cfg->gcl_size; i++) { - ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true); + ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i], true); if (ret) return ret; - - total_offset += offset; } - total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000ULL; - total_ctr += total_offset; - - ctr_low = do_div(total_ctr, 1000000000); - - ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false); - ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false); - if (ret) - return ret; - ctrl = readl(ioaddr + MTL_EST_CONTROL); ctrl &= ~PTOV; ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT; -- cgit 1.4.1 From fe28c53ed71d463e187748b6b10e1130dd72ceeb Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Wed, 13 Jan 2021 14:15:57 +0100 Subject: net: stmmac: fix taprio configuration when base_time is in the past The Synopsys TSN MAC supports Qbv base times in the past, but only up to a certain limit. As a result, a taprio qdisc configuration with a small base time (for example when treating the base time as a simple phase offset) is not applied by the hardware and silently ignored. This was observed on an NXP i.MX8MPlus device, but likely affects all TSN-variants of the MAC. Fix the issue by making sure the base time is in the future, pushing it by an integer amount of cycle times if needed. (a similar check is already done in several other taprio implementations, see for example drivers/net/ethernet/intel/igc/igc_tsn.c#L116 or drivers/net/dsa/sja1105/sja1105_ptp.h#L39). Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Signed-off-by: Yannick Vignon Link: https://lore.kernel.org/r/20210113131557.24651-2-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index f5bed4d26e80..8ed3b2c834a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -599,7 +599,8 @@ static int tc_setup_taprio(struct stmmac_priv *priv, { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; struct plat_stmmacenet_data *plat = priv->plat; - struct timespec64 time; + struct timespec64 time, current_time; + ktime_t current_time_ns; bool fpe = false; int i, ret = 0; u64 ctr; @@ -694,7 +695,22 @@ static int tc_setup_taprio(struct stmmac_priv *priv, } /* Adjust for real system time */ - time = ktime_to_timespec64(qopt->base_time); + priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); + current_time_ns = timespec64_to_ktime(current_time); + if (ktime_after(qopt->base_time, current_time_ns)) { + time = ktime_to_timespec64(qopt->base_time); + } else { + ktime_t base_time; + s64 n; + + n = div64_s64(ktime_sub_ns(current_time_ns, qopt->base_time), + qopt->cycle_time); + base_time = ktime_add_ns(qopt->base_time, + (n + 1) * qopt->cycle_time); + + time = ktime_to_timespec64(base_time); + } + priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; -- cgit 1.4.1 From 7da17624e7948d5d9660b910f8079d26d26ce453 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Jan 2021 15:43:09 +0100 Subject: nt: usb: USB_RTL8153_ECM should not default to y In general, device drivers should not be enabled by default. Fixes: 657bc1d10bfc23ac ("r8153_ecm: avoid to be prior to r8152 driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210113144309.1384615-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/usb/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 1e3719028780..fbbe78643631 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -631,7 +631,6 @@ config USB_NET_AQC111 config USB_RTL8153_ECM tristate "RTL8153 ECM support" depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n) - default y help This option supports ECM mode for RTL8153 ethernet adapter, when CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not -- cgit 1.4.1 From 3226b158e67cfaa677fd180152bfb28989cb2fac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jan 2021 08:18:19 -0800 Subject: net: avoid 32 x truesize under-estimation for tiny skbs Both virtio net and napi_get_frags() allocate skbs with a very small skb->head While using page fragments instead of a kmalloc backed skb->head might give a small performance improvement in some cases, there is a huge risk of under estimating memory usage. For both GOOD_COPY_LEN and GRO_MAX_HEAD, we can fit at least 32 allocations per page (order-3 page in x86), or even 64 on PowerPC We have been tracking OOM issues on GKE hosts hitting tcp_mem limits but consuming far more memory for TCP buffers than instructed in tcp_mem[2] Even if we force napi_alloc_skb() to only use order-0 pages, the issue would still be there on arches with PAGE_SIZE >= 32768 This patch makes sure that small skb head are kmalloc backed, so that other objects in the slab page can be reused instead of being held as long as skbs are sitting in socket queues. Note that we might in the future use the sk_buff napi cache, instead of going through a more expensive __alloc_skb() Another idea would be to use separate page sizes depending on the allocated length (to never have more than 4 frags per page) I would like to thank Greg Thelen for his precious help on this matter, analysing crash dumps is always a time consuming task. Fixes: fd11a83dd363 ("net: Pull out core bits of __netdev_alloc_skb and add __napi_alloc_skb") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Cc: Greg Thelen Reviewed-by: Alexander Duyck Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20210113161819.1155526-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0da035c1e53f..c1a6f262636a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -501,13 +501,17 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct napi_alloc_cache *nc; struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; - if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ + if (len <= SKB_WITH_OVERHEAD(1024) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) @@ -515,6 +519,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, goto skb_success; } + nc = this_cpu_ptr(&napi_alloc_cache); len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); -- cgit 1.4.1 From 93089de91e85743942a5f804850d4f0846e5402b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:06 -0800 Subject: MAINTAINERS: altx: move Jay Cliburn to CREDITS Jay was not active in recent years and does not have plans to return to work on ATLX drivers. Subsystem ATLX ETHERNET DRIVERS Changes 20 / 116 (17%) Last activity: 2020-02-24 Jay Cliburn : Chris Snook : Tags ea973742140b 2020-02-24 00:00:00 1 Top reviewers: [4]: andrew@lunn.ch [2]: kuba@kernel.org [2]: o.rempel@pengutronix.de INACTIVE MAINTAINER Jay Cliburn Acked-by: Chris Snook Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 090ed4b004a5..59a704a45170 100644 --- a/CREDITS +++ b/CREDITS @@ -710,6 +710,10 @@ S: Las Cuevas 2385 - Bo Guemes S: Las Heras, Mendoza CP 5539 S: Argentina +N: Jay Cliburn +E: jcliburn@gmail.com +D: ATLX Ethernet drivers + N: Steven P. Cole E: scole@lanl.gov E: elenstev@mesatop.com diff --git a/MAINTAINERS b/MAINTAINERS index b15514a770e3..57e17762d411 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2942,7 +2942,6 @@ S: Maintained F: drivers/hwmon/asus_atk0110.c ATLX ETHERNET DRIVERS -M: Jay Cliburn M: Chris Snook L: netdev@vger.kernel.org S: Maintained -- cgit 1.4.1 From 09cd3f4683a901d572ad17f0564cc9e3e989f0f4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:07 -0800 Subject: MAINTAINERS: net: move Alexey Kuznetsov to CREDITS Move Alexey to CREDITS. I am probably not giving him enough justice with the description line.. Subsystem NETWORKING [IPv4/IPv6] Changes 1535 / 5111 (30%) Last activity: 2020-12-10 "David S. Miller" : Author b7e4ba9a91df 2020-12-09 00:00:00 407 Committer e0fecb289ad3 2020-12-10 00:00:00 3992 Tags e0fecb289ad3 2020-12-10 00:00:00 3978 Alexey Kuznetsov : Hideaki YOSHIFUJI : Tags d5d8760b78d0 2016-06-16 00:00:00 8 Top reviewers: [225]: edumazet@google.com [222]: dsahern@gmail.com [176]: ncardwell@google.com INACTIVE MAINTAINER Alexey Kuznetsov Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 59a704a45170..3dceea737694 100644 --- a/CREDITS +++ b/CREDITS @@ -2142,6 +2142,10 @@ E: seasons@falcon.sch.bme.hu E: seasons@makosteszta.sote.hu D: Original author of software suspend +N: Alexey Kuznetsov +E: kuznet@ms2.inr.ac.ru +D: Author and maintainer of large parts of the networking stack + N: Jaroslav Kysela E: perex@perex.cz W: https://www.perex.cz diff --git a/MAINTAINERS b/MAINTAINERS index 57e17762d411..c6e7f6bf7f6d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12417,7 +12417,6 @@ F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] M: "David S. Miller" -M: Alexey Kuznetsov M: Hideaki YOSHIFUJI L: netdev@vger.kernel.org S: Maintained -- cgit 1.4.1 From 5e62d124f75aae0e96fd8a588ad31659a2468710 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:08 -0800 Subject: MAINTAINERS: vrf: move Shrijeet to CREDITS Shrijeet has moved on from VRF-related work. Subsystem VRF Changes 30 / 120 (25%) Last activity: 2020-12-09 David Ahern : Author 1b6687e31a2d 2020-07-23 00:00:00 1 Tags 9125abe7b9cb 2020-12-09 00:00:00 4 Shrijeet Mukherjee : Top reviewers: [13]: dsahern@gmail.com [4]: dsa@cumulusnetworks.com INACTIVE MAINTAINER Shrijeet Mukherjee Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 3dceea737694..98e7485ec106 100644 --- a/CREDITS +++ b/CREDITS @@ -2704,6 +2704,10 @@ N: Wolfgang Muees E: wolfgang@iksw-muees.de D: Auerswald USB driver +N: Shrijeet Mukherjee +E: shrijeet@gmail.com +D: Network routing domains (VRF). + N: Paul Mundt E: paul.mundt@gmail.com D: SuperH maintainer diff --git a/MAINTAINERS b/MAINTAINERS index c6e7f6bf7f6d..a06faf9e2018 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19056,7 +19056,6 @@ K: regulator_get_optional VRF M: David Ahern -M: Shrijeet Mukherjee L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/vrf.rst -- cgit 1.4.1 From c41efbf2ad56280762d19a531eb7edbf2e6a9f84 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:09 -0800 Subject: MAINTAINERS: ena: remove Zorik Machulsky from reviewers While ENA has 3 reviewers and 2 maintainers, we mostly see review tags and comments from the maintainers. While we very much appreciate Zorik's invovment in the community let's trim the reviewer list down to folks we've seen tags from. Subsystem AMAZON ETHERNET DRIVERS Changes 13 / 269 (4%) Last activity: 2020-11-24 Netanel Belgazal : Author 24dee0c7478d 2019-12-10 00:00:00 43 Tags 0e3a3f6dacf0 2020-07-21 00:00:00 47 Arthur Kiyanovski : Author 0e3a3f6dacf0 2020-07-21 00:00:00 79 Tags 09323b3bca95 2020-11-24 00:00:00 104 Guy Tzalik : Tags 713865da3c62 2020-09-10 00:00:00 3 Saeed Bishara : Tags 470793a78ce3 2020-02-11 00:00:00 2 Zorik Machulsky : Top reviewers: [4]: sameehj@amazon.com [3]: snelson@pensando.io [3]: shayagr@amazon.com INACTIVE MAINTAINER Zorik Machulsky Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a06faf9e2018..64dd19dfc9c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -820,7 +820,6 @@ M: Netanel Belgazal M: Arthur Kiyanovski R: Guy Tzalik R: Saeed Bishara -R: Zorik Machulsky L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst -- cgit 1.4.1 From 0e4ed0b62b5a1f60b72ab7aaa29efd735d4cb6a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:10 -0800 Subject: MAINTAINERS: tls: move Aviad to CREDITS Aviad wrote parts of the initial TLS implementation but hasn't been contributing to TLS since. Subsystem NETWORKING [TLS] Changes 123 / 308 (39%) Last activity: 2020-12-01 Boris Pismenny : Tags 138559b9f99d 2020-11-17 00:00:00 1 Aviad Yehezkel : John Fastabend : Author e91de6afa81c 2020-06-01 00:00:00 22 Tags e91de6afa81c 2020-06-01 00:00:00 29 Daniel Borkmann : Author c16ee04c9b30 2018-10-20 00:00:00 7 Committer b8e202d1d1d0 2020-02-21 00:00:00 19 Tags b8e202d1d1d0 2020-02-21 00:00:00 28 Jakub Kicinski : Author 5c39f26e67c9 2020-11-27 00:00:00 89 Committer d31c08007523 2020-12-01 00:00:00 15 Tags d31c08007523 2020-12-01 00:00:00 117 Top reviewers: [50]: dirk.vandermerwe@netronome.com [26]: simon.horman@netronome.com [14]: john.hurley@netronome.com INACTIVE MAINTAINER Aviad Yehezkel Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 98e7485ec106..90384691876c 100644 --- a/CREDITS +++ b/CREDITS @@ -4122,6 +4122,10 @@ S: B-1206 Jingmao Guojigongyu S: 16 Baliqiao Nanjie, Beijing 101100 S: People's Repulic of China +N: Aviad Yehezkel +E: aviadye@nvidia.com +D: Kernel TLS implementation and offload support. + N: Victor Yodaiken E: yodaiken@fsmlabs.com D: RTLinux (RealTime Linux) diff --git a/MAINTAINERS b/MAINTAINERS index 64dd19dfc9c3..92fdc134ca14 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12472,7 +12472,6 @@ F: net/ipv6/tcp*.c NETWORKING [TLS] M: Boris Pismenny -M: Aviad Yehezkel M: John Fastabend M: Daniel Borkmann M: Jakub Kicinski -- cgit 1.4.1 From 4f3786e011940d83d7a9c365730936db96a0b233 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:11 -0800 Subject: MAINTAINERS: ipvs: move Wensong Zhang to CREDITS Move Wensong Zhang to credits, we haven't heard from him in years. Subsystem IPVS Changes 83 / 226 (36%) Last activity: 2020-11-27 Wensong Zhang : Simon Horman : Committer c24b75e0f923 2019-10-24 00:00:00 33 Tags 7980d2eabde8 2020-10-12 00:00:00 76 Julian Anastasov : Author 7980d2eabde8 2020-10-12 00:00:00 26 Tags 4bc3c8dc9f5f 2020-11-27 00:00:00 78 Top reviewers: [6]: horms+renesas@verge.net.au INACTIVE MAINTAINER Wensong Zhang Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 90384691876c..ce8eae8c5aa4 100644 --- a/CREDITS +++ b/CREDITS @@ -4183,6 +4183,10 @@ S: 1507 145th Place SE #B5 S: Bellevue, Washington 98007 S: USA +N: Wensong Zhang +E: wensong@linux-vs.org +D: IP virtual server (IPVS). + N: Haojian Zhuang E: haojian.zhuang@gmail.com D: MMP support diff --git a/MAINTAINERS b/MAINTAINERS index 92fdc134ca14..18e75e29c672 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9325,7 +9325,6 @@ W: http://www.adaptec.com/ F: drivers/scsi/ips* IPVS -M: Wensong Zhang M: Simon Horman M: Julian Anastasov L: netdev@vger.kernel.org -- cgit 1.4.1 From 054c4610bd05e7bf677efefa880da2da340599fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:12 -0800 Subject: MAINTAINERS: dccp: move Gerrit Renker to CREDITS As far as I can tell we haven't heard from Gerrit for roughly 5 years now. DCCP patch would really benefit from some review. Gerrit was the last maintainer so mark this entry as orphaned. Subsystem DCCP PROTOCOL Changes 38 / 166 (22%) (No activity) Top reviewers: [6]: kstewart@linuxfoundation.org [6]: allison@lohutok.net [5]: edumazet@google.com INACTIVE MAINTAINER Gerrit Renker Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index ce8eae8c5aa4..9add7e6a4fa0 100644 --- a/CREDITS +++ b/CREDITS @@ -1288,6 +1288,10 @@ D: Major kbuild rework during the 2.5 cycle D: ISDN Maintainer S: USA +N: Gerrit Renker +E: gerrit@erg.abdn.ac.uk +D: DCCP protocol support. + N: Philip Gladstone E: philip@gladstonefamily.net D: Kernel / timekeeping stuff diff --git a/MAINTAINERS b/MAINTAINERS index 18e75e29c672..2a6dc5bfa08c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4920,9 +4920,8 @@ F: Documentation/scsi/dc395x.rst F: drivers/scsi/dc395x.* DCCP PROTOCOL -M: Gerrit Renker L: dccp@vger.kernel.org -S: Maintained +S: Orphan W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp F: include/linux/dccp.h F: include/linux/tfrc.h -- cgit 1.4.1 From 25537d71e2d007faf42a244a75e5a2bb7c356234 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 14 Jan 2021 17:12:15 +0200 Subject: net: Allow NETIF_F_HW_TLS_TX if IP_CSUM && IPV6_CSUM Cited patch below blocked the TLS TX device offload unless HW_CSUM is set. This broke devices that use IP_CSUM && IP6_CSUM. Here we fix it. Note that the single HW_TLS_TX feature flag indicates support for both IPv4/6, hence it should still be disabled in case only one of (IP_CSUM | IPV6_CSUM) is set. Fixes: ae0b04b238e2 ("net: Disable NETIF_F_HW_TLS_TX when HW_CSUM is disabled") Signed-off-by: Tariq Toukan Reported-by: Rohit Maheshwari Reviewed-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20210114151215.7061-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/tls-offload.rst | 2 +- net/core/dev.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 0f55c6d540f9..9af3334d9ad0 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -530,7 +530,7 @@ TLS device feature flags only control adding of new TLS connection offloads, old connections will remain active after flags are cleared. TLS encryption cannot be offloaded to devices without checksum calculation -offload. Hence, TLS TX device feature flag requires NETIF_F_HW_CSUM being set. +offload. Hence, TLS TX device feature flag requires TX csum offload being set. Disabling the latter implies clearing the former. Disabling TX checksum offload should not affect old connections, and drivers should make sure checksum calculation does not break for them. diff --git a/net/core/dev.c b/net/core/dev.c index 0071a11a6dc3..c360bb5367e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9661,9 +9661,15 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } - if ((features & NETIF_F_HW_TLS_TX) && !(features & NETIF_F_HW_CSUM)) { - netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); - features &= ~NETIF_F_HW_TLS_TX; + if (features & NETIF_F_HW_TLS_TX) { + bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + bool hw_csum = features & NETIF_F_HW_CSUM; + + if (!ip_csum && !hw_csum) { + netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_TX; + } } return features; -- cgit 1.4.1 From 4369376ba91c97a1b2dd74abeec18c0c0ddf4ac9 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 8 Jan 2021 16:34:31 +0800 Subject: drm/amdgpu: set power brake sequence Add function to set power brake sequence. Signed-off-by: Likun Gao Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 34141b785aa4..619d34c041ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -164,6 +164,9 @@ #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid 0x15db #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030 +#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -3328,6 +3331,7 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev); static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev); +static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -7196,6 +7200,9 @@ static int gfx_v10_0_hw_init(void *handle) if (adev->asic_type == CHIP_SIENNA_CICHLID) gfx_v10_3_program_pbb_mode(adev); + if (adev->asic_type >= CHIP_SIENNA_CICHLID) + gfx_v10_3_set_power_brake_sequence(adev); + return r; } @@ -9181,6 +9188,31 @@ static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev) } } +static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, + (0x1 << GRBM_GFX_INDEX__SA_BROADCAST_WRITES__SHIFT) | + (0x1 << GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT) | + (0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + + WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, + (0x1 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_STEP_INTERVAL__SHIFT) | + (0x12 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_BEGIN_STEP__SHIFT) | + (0x13 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_END_STEP__SHIFT) | + (0xf << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_THROTTLE_PATTERN_BIT_NUMS__SHIFT)); + + WREG32_SOC15(GC, 0, mmGC_THROTTLE_CTRL_Sienna_Cichlid, + (0x1 << GC_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT) | + (0x1 << GC_THROTTLE_CTRL__PATTERN_MODE__SHIFT) | + (0x5 << GC_THROTTLE_CTRL__RELEASE_STEP_INTERVAL__SHIFT)); + + WREG32_SOC15(GC, 0, mmDIDT_IND_INDEX, ixDIDT_SQ_THROTTLE_CTRL); + + WREG32_SOC15(GC, 0, mmDIDT_IND_DATA, + (0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT)); +} + const struct amdgpu_ip_block_version gfx_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, -- cgit 1.4.1 From 55df908bd663ead7d85bd64dd49562d5ac3889ef Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 14:50:51 -0500 Subject: Revert "drm/amd/display: Fix unused variable warning" This reverts commit f01afd1ee48816457fb22e201f1d0cfb14589904. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 146486071d01..8fac6116591b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8378,7 +8378,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); } - if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active && +#ifdef CONFIG_DEBUG_FS + if (new_crtc_state->active && amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture @@ -8399,6 +8400,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_crtc_configure_crc_source( crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); } +#endif } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index eba2f1d35d07..0235bfb246e5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source } /* amdgpu_dm_crc.c */ +#ifdef CONFIG_DEBUG_FS bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); -#ifdef CONFIG_DEBUG_FS int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, -- cgit 1.4.1 From 3c517ca5212faab4604e1725b4d31e290945ff87 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 14:51:49 -0500 Subject: Revert "drm/amdgpu/disply: fix documentation warnings in display manager" This reverts commit 6ae09fa49147e557eb6aebbb5b2059b63706d454. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2ee6edb3df93..0b31779a0485 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -337,29 +337,10 @@ struct amdgpu_display_manager { const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; #ifdef CONFIG_DEBUG_FS - /** - * @crc_win_x_start_property: - * - * X start of the crc calculation window - */ + /* set the crc calculation window*/ struct drm_property *crc_win_x_start_property; - /** - * @crc_win_y_start_property: - * - * Y start of the crc calculation window - */ struct drm_property *crc_win_y_start_property; - /** - * @crc_win_x_end_property: - * - * X end of the crc calculation window - */ struct drm_property *crc_win_x_end_property; - /** - * @crc_win_y_end_property: - * - * Y end of the crc calculation window - */ struct drm_property *crc_win_y_end_property; #endif /** -- cgit 1.4.1 From a7ddd22151fc2910c7b2faad64680cc2bb699b03 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 15:09:30 -0500 Subject: Revert "drm/amd/display: Expose new CRC window property" This reverts commit c920888c604d72799d057bbcd9e28a6c003ccfbe. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Reviewed-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 142 ++------------------- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 56 +------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 3 - 4 files changed, 10 insertions(+), 210 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8fac6116591b..c6da89df055d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -939,41 +939,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } #endif -#ifdef CONFIG_DEBUG_FS -static int create_crtc_crc_properties(struct amdgpu_display_manager *dm) -{ - dm->crc_win_x_start_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_X_START", 0, U16_MAX); - if (!dm->crc_win_x_start_property) - return -ENOMEM; - - dm->crc_win_y_start_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_Y_START", 0, U16_MAX); - if (!dm->crc_win_y_start_property) - return -ENOMEM; - - dm->crc_win_x_end_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_X_END", 0, U16_MAX); - if (!dm->crc_win_x_end_property) - return -ENOMEM; - - dm->crc_win_y_end_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_Y_END", 0, U16_MAX); - if (!dm->crc_win_y_end_property) - return -ENOMEM; - - return 0; -} -#endif - static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1120,10 +1085,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_init_callbacks(adev->dm.dc, &init_params); } -#endif -#ifdef CONFIG_DEBUG_FS - if (create_crtc_crc_properties(&adev->dm)) - DRM_ERROR("amdgpu: failed to create crc property.\n"); #endif if (amdgpu_dm_initialize_drm_device(adev)) { DRM_ERROR( @@ -5333,64 +5294,12 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) state->crc_src = cur->crc_src; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; -#ifdef CONFIG_DEBUG_FS - state->crc_window = cur->crc_window; -#endif + /* TODO Duplicate dc_stream after objects are stream object is flattened */ return &state->base; } -#ifdef CONFIG_DEBUG_FS -static int amdgpu_dm_crtc_atomic_set_property(struct drm_crtc *crtc, - struct drm_crtc_state *crtc_state, - struct drm_property *property, - uint64_t val) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct dm_crtc_state *dm_new_state = - to_dm_crtc_state(crtc_state); - - if (property == adev->dm.crc_win_x_start_property) - dm_new_state->crc_window.x_start = val; - else if (property == adev->dm.crc_win_y_start_property) - dm_new_state->crc_window.y_start = val; - else if (property == adev->dm.crc_win_x_end_property) - dm_new_state->crc_window.x_end = val; - else if (property == adev->dm.crc_win_y_end_property) - dm_new_state->crc_window.y_end = val; - else - return -EINVAL; - - return 0; -} - -static int amdgpu_dm_crtc_atomic_get_property(struct drm_crtc *crtc, - const struct drm_crtc_state *state, - struct drm_property *property, - uint64_t *val) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct dm_crtc_state *dm_state = - to_dm_crtc_state(state); - - if (property == adev->dm.crc_win_x_start_property) - *val = dm_state->crc_window.x_start; - else if (property == adev->dm.crc_win_y_start_property) - *val = dm_state->crc_window.y_start; - else if (property == adev->dm.crc_win_x_end_property) - *val = dm_state->crc_window.x_end; - else if (property == adev->dm.crc_win_y_end_property) - *val = dm_state->crc_window.y_end; - else - return -EINVAL; - - return 0; -} -#endif - static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) { enum dc_irq_source irq_source; @@ -5457,10 +5366,6 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .enable_vblank = dm_enable_vblank, .disable_vblank = dm_disable_vblank, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -#ifdef CONFIG_DEBUG_FS - .atomic_set_property = amdgpu_dm_crtc_atomic_set_property, - .atomic_get_property = amdgpu_dm_crtc_atomic_get_property, -#endif }; static enum drm_connector_status @@ -6662,25 +6567,6 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, return 0; } -#ifdef CONFIG_DEBUG_FS -static void attach_crtc_crc_properties(struct amdgpu_display_manager *dm, - struct amdgpu_crtc *acrtc) -{ - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_x_start_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_y_start_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_x_end_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_y_end_property, - 0); -} -#endif - static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t crtc_index) @@ -6728,9 +6614,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, true, MAX_COLOR_LUT_ENTRIES); drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); -#ifdef CONFIG_DEBUG_FS - attach_crtc_crc_properties(dm, acrtc); -#endif + return 0; fail: @@ -8367,7 +8251,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - bool configure_crc = false; dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); @@ -8377,30 +8260,21 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dc_stream_retain(dm_new_crtc_state->stream); acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); - } + #ifdef CONFIG_DEBUG_FS - if (new_crtc_state->active && - amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture * settings for the stream. */ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - - if (amdgpu_dm_crc_window_is_default(dm_new_crtc_state)) { - if (!old_crtc_state->active || drm_atomic_crtc_needs_modeset(new_crtc_state)) - configure_crc = true; - } else { - if (amdgpu_dm_crc_window_changed(dm_new_crtc_state, dm_old_crtc_state)) - configure_crc = true; - } - if (configure_crc) + if (amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { amdgpu_dm_crtc_configure_crc_source( - crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); - } + crtc, dm_new_crtc_state, + dm_new_crtc_state->crc_src); + } #endif + } } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 0b31779a0485..1182dafcef02 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -336,13 +336,6 @@ struct amdgpu_display_manager { */ const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; -#ifdef CONFIG_DEBUG_FS - /* set the crc calculation window*/ - struct drm_property *crc_win_x_start_property; - struct drm_property *crc_win_y_start_property; - struct drm_property *crc_win_x_end_property; - struct drm_property *crc_win_y_end_property; -#endif /** * @mst_encoders: * @@ -429,15 +422,6 @@ struct dm_plane_state { struct dc_plane_state *dc_state; }; -#ifdef CONFIG_DEBUG_FS -struct crc_rec { - uint16_t x_start; - uint16_t y_start; - uint16_t x_end; - uint16_t y_end; - }; -#endif - struct dm_crtc_state { struct drm_crtc_state base; struct dc_stream_state *stream; @@ -460,9 +444,6 @@ struct dm_crtc_state { struct dc_info_packet vrr_infopacket; int abm_level; -#ifdef CONFIG_DEBUG_FS - struct crc_rec crc_window; -#endif }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 7b886a779a8c..c29dc11619f7 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -81,41 +81,6 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc, return pipe_crc_sources; } -static void amdgpu_dm_set_crc_window_default(struct dm_crtc_state *dm_crtc_state) -{ - dm_crtc_state->crc_window.x_start = 0; - dm_crtc_state->crc_window.y_start = 0; - dm_crtc_state->crc_window.x_end = 0; - dm_crtc_state->crc_window.y_end = 0; -} - -bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state) -{ - bool ret = true; - - if ((dm_crtc_state->crc_window.x_start != 0) || - (dm_crtc_state->crc_window.y_start != 0) || - (dm_crtc_state->crc_window.x_end != 0) || - (dm_crtc_state->crc_window.y_end != 0)) - ret = false; - - return ret; -} - -bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, - struct dm_crtc_state *dm_old_crtc_state) -{ - bool ret = false; - - if ((dm_new_crtc_state->crc_window.x_start != dm_old_crtc_state->crc_window.x_start) || - (dm_new_crtc_state->crc_window.y_start != dm_old_crtc_state->crc_window.y_start) || - (dm_new_crtc_state->crc_window.x_end != dm_old_crtc_state->crc_window.x_end) || - (dm_new_crtc_state->crc_window.y_end != dm_old_crtc_state->crc_window.y_end)) - ret = true; - - return ret; -} - int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, size_t *values_cnt) @@ -140,7 +105,6 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dc_stream_state *stream_state = dm_crtc_state->stream; bool enable = amdgpu_dm_is_valid_crc_source(source); int ret = 0; - struct crc_params *crc_window = NULL, tmp_window; /* Configuration will be deferred to stream enable. */ if (!stream_state) @@ -149,25 +113,9 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); /* Enable CRTC CRC generation if necessary. */ - if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { - if (!enable) - amdgpu_dm_set_crc_window_default(dm_crtc_state); - - if (!amdgpu_dm_crc_window_is_default(dm_crtc_state)) { - crc_window = &tmp_window; - - tmp_window.windowa_x_start = dm_crtc_state->crc_window.x_start; - tmp_window.windowa_y_start = dm_crtc_state->crc_window.y_start; - tmp_window.windowa_x_end = dm_crtc_state->crc_window.x_end; - tmp_window.windowa_y_end = dm_crtc_state->crc_window.y_end; - tmp_window.windowb_x_start = dm_crtc_state->crc_window.x_start; - tmp_window.windowb_y_start = dm_crtc_state->crc_window.y_start; - tmp_window.windowb_x_end = dm_crtc_state->crc_window.x_end; - tmp_window.windowb_y_end = dm_crtc_state->crc_window.y_end; - } - + if (dm_is_crc_source_crtc(source)) { if (!dc_stream_configure_crc(stream_state->ctx->dc, - stream_state, crc_window, enable, enable)) { + stream_state, NULL, enable, enable)) { ret = -EINVAL; goto unlock; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 0235bfb246e5..f7d731797d3f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -47,9 +47,6 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source /* amdgpu_dm_crc.c */ #ifdef CONFIG_DEBUG_FS -bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); -bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, - struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); -- cgit 1.4.1 From 2f0fa789f7b9fb022440f8f846cae175233987aa Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 24 Nov 2020 19:57:03 +0800 Subject: drm/amd/display: Fix to be able to stop crc calculation [Why] Find out when we try to disable CRC calculation, crc generation is still enabled. Main reason is that dc_stream_configure_crc() will never get called when the source is AMDGPU_DM_PIPE_CRC_SOURCE_NONE. [How] Add checking condition that when source is AMDGPU_DM_PIPE_CRC_SOURCE_NONE, we should also call dc_stream_configure_crc() to disable crc calculation. Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index c29dc11619f7..66cb8730586b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -113,7 +113,7 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); /* Enable CRTC CRC generation if necessary. */ - if (dm_is_crc_source_crtc(source)) { + if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state, NULL, enable, enable)) { ret = -EINVAL; -- cgit 1.4.1 From 13a9499e833387fcc7a53915bbe5cddf3c336b59 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 14 Jan 2021 16:37:37 +0100 Subject: mptcp: fix locking in mptcp_disconnect() tcp_disconnect() expects the caller acquires the sock lock, but mptcp_disconnect() is not doing that. Add the missing required lock. Reported-by: Eric Dumazet Fixes: 76e2a55d1625 ("mptcp: better msk-level shutdown.") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/f818e82b58a556feeb71dcccc8bf1c87aafc6175.1610638176.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 81faeff8f3bb..f998a077c7dd 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2646,8 +2646,13 @@ static int mptcp_disconnect(struct sock *sk, int flags) struct mptcp_sock *msk = mptcp_sk(sk); __mptcp_flush_join_list(msk); - mptcp_for_each_subflow(msk, subflow) - tcp_disconnect(mptcp_subflow_tcp_sock(subflow), flags); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + tcp_disconnect(ssk, flags); + release_sock(ssk); + } return 0; } -- cgit 1.4.1 From 7a84665619bb5da8c8b6517157875a1fd7632014 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 10 Jan 2021 14:09:05 +0200 Subject: nvmet-rdma: Fix NULL deref when setting pi_enable and traddr INADDR_ANY When setting port traddr to INADDR_ANY, the listening cm_id->device is NULL. The associate IB device is known only when a connect request event arrives, so checking T10-PI device capability should be done at this stage. Fixes: b09160c3996c ("nvmet-rdma: add metadata/T10-PI support") Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index bdfc22eb2a10..06b6b742bb21 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1220,6 +1220,14 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) } ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; + + if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + pr_warn("T10-PI is not supported by device %s. Disabling it\n", + cm_id->device->name); + nport->pi_enable = false; + } + ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -1855,14 +1863,6 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } - if (port->nport->pi_enable && - !(cm_id->device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { - pr_err("T10-PI is not supported for %pISpcs\n", addr); - ret = -EINVAL; - goto out_destroy_id; - } - port->cm_id = cm_id; return 0; -- cgit 1.4.1 From ada831772188192243f9ea437c46e37e97a5975d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 14:03:04 -0800 Subject: nvme-tcp: Fix warning with CONFIG_DEBUG_PREEMPT We shouldn't call smp_processor_id() in a preemptible context, but this is advisory at best, so instead call __smp_processor_id(). Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Reported-by: Or Gerlitz Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 979ee31b8dd1..b2e0865785ef 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -286,7 +286,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * directly, otherwise queue io_work. Also, only do that if we * are on the same cpu, so we don't introduce contention. */ - if (queue->io_cpu == smp_processor_id() && + if (queue->io_cpu == __smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; nvme_tcp_send_all(queue); -- cgit 1.4.1 From ca1ff67d0fb14f39cf0cc5102b1fbcc3b14f6fb9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 13:56:57 -0800 Subject: nvme-tcp: fix possible data corruption with bio merges When a bio merges, we can get a request that spans multiple bios, and the overall request payload size is the sum of all bios. When we calculate how much we need to send from the existing bio (and bvec), we did not take into account the iov_iter byte count cap. Since multipage bvecs support, bvecs can split in the middle which means that when we account for the last bvec send we should also take the iov_iter byte count cap as it might be lower than the last bvec size. Reported-by: Hao Wang Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Tested-by: Hao Wang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b2e0865785ef..216619926563 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -201,7 +201,7 @@ static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) { - return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset, + return min_t(size_t, iov_iter_single_seg_count(&req->iter), req->pdu_len - req->pdu_sent); } -- cgit 1.4.1 From 5ab25a32cd90ce561ac28b9302766e565d61304c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 16:00:22 -0800 Subject: nvme: don't intialize hwmon for discovery controllers Discovery controllers usually don't support smart log page command. So when we connect to the discovery controller we see this warning: nvme nvme0: Failed to read smart log (error 24577) nvme nvme0: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery", addr 192.168.123.1:8009 nvme nvme0: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" Introduce a new helper to understand if the controller is a discovery controller and use this helper to skip nvme_init_hwmon (also use it in other places that we check if the controller is a discovery controller). Fixes: 400b6a7b13a3 ("nvme: Add hardware monitoring support") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f320273fc672..200bdd672c28 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2856,6 +2856,11 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; +static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl) +{ + return ctrl->opts && ctrl->opts->discovery_nqn; +} + static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { @@ -2875,7 +2880,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, } if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || - (ctrl->opts && ctrl->opts->discovery_nqn)) + nvme_discovery_ctrl(ctrl)) continue; dev_err(ctrl->device, @@ -3144,7 +3149,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } - if (!ctrl->opts->discovery_nqn && !ctrl->kas) { + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { dev_err(ctrl->device, "keep-alive support is mandatory for fabrics\n"); ret = -EINVAL; @@ -3184,7 +3189,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; - if (!ctrl->identified) { + if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { ret = nvme_hwmon_init(ctrl); if (ret < 0) return ret; -- cgit 1.4.1 From 402a89660e9dc880710b12773076a336c9dab3d7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/bios: fix issue shadowing expansion ROMs This issue has generally been covered up by the presence of additional expansion ROMs after the ones we're interested in, with header fetches of subsequent images loading enough of the ROM to hide the issue. Noticed on GA102, which lacks a type 0x70 image compared to TU102,. [ 906.364197] nouveau 0000:09:00.0: bios: 00000000: type 00, 65024 bytes [ 906.381205] nouveau 0000:09:00.0: bios: 0000fe00: type 03, 91648 bytes [ 906.405213] nouveau 0000:09:00.0: bios: 00026400: type e0, 22016 bytes [ 906.410984] nouveau 0000:09:00.0: bios: 0002ba00: type e0, 366080 bytes vs [ 22.961901] nouveau 0000:09:00.0: bios: 00000000: type 00, 60416 bytes [ 22.984174] nouveau 0000:09:00.0: bios: 0000ec00: type 03, 71168 bytes [ 23.010446] nouveau 0000:09:00.0: bios: 00020200: type e0, 48128 bytes [ 23.028220] nouveau 0000:09:00.0: bios: 0002be00: type e0, 140800 bytes [ 23.080196] nouveau 0000:09:00.0: bios: 0004e400: type 70, 7168 bytes Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 7deb81b6dbac..4b571cc6bc70 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -75,7 +75,7 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) nvkm_debug(subdev, "%08x: type %02x, %d bytes\n", image.base, image.type, image.size); - if (!shadow_fetch(bios, mthd, image.size)) { + if (!shadow_fetch(bios, mthd, image.base + image.size)) { nvkm_debug(subdev, "%08x: fetch failed\n", image.base); return 0; } -- cgit 1.4.1 From e05e06cd34f5311f677294a08b609acfbc315236 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/privring: ack interrupts the same way as RM Whatever it is that we were doing before doesn't work on Ampere. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c | 10 +++++++--- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c index 2340040942c9..1115376bc85f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0400)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gf100_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0400)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gf100_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0400)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0400), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gf100_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x121c4c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x121c4c) & 0x0000003f)) + break; + ); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c index f3915f85838e..22e487b493ad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0800)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gk104_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0800)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gk104_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0800)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0800), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gk104_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x12004c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x12004c) & 0x0000003f)) + break; + ); } static int -- cgit 1.4.1 From b5510d1e21d80e2fa2286468ca8c2922f5895ef8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/i2c/gk110: split out from i2c/gk104 No functional changes here yet. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 12 +++---- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c | 38 +++++++++++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h index 81b977319640..640f649ce497 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h @@ -92,6 +92,7 @@ int g94_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gf117_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gf119_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gk104_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); +int gk110_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gm200_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); static inline int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7851bec5f0e5..ffada13c416c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1815,7 +1815,7 @@ nvf0_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1853,7 +1853,7 @@ nvf1_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1891,7 +1891,7 @@ nv106_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1929,7 +1929,7 @@ nv108_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1967,7 +1967,7 @@ nv117_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -2003,7 +2003,7 @@ nv118_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild index 723d0284caef..819703913a00 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild @@ -7,6 +7,7 @@ nvkm-y += nvkm/subdev/i2c/g94.o nvkm-y += nvkm/subdev/i2c/gf117.o nvkm-y += nvkm/subdev/i2c/gf119.o nvkm-y += nvkm/subdev/i2c/gk104.o +nvkm-y += nvkm/subdev/i2c/gk110.o nvkm-y += nvkm/subdev/i2c/gm200.o nvkm-y += nvkm/subdev/i2c/pad.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c new file mode 100644 index 000000000000..10eaf1d70f62 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c @@ -0,0 +1,38 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" +#include "pad.h" + +static const struct nvkm_i2c_func +gk110_i2c = { + .pad_x_new = gf119_i2c_pad_x_new, + .pad_s_new = gf119_i2c_pad_s_new, + .aux = 4, + .aux_stat = gk104_aux_stat, + .aux_mask = gk104_aux_mask, +}; + +int +gk110_i2c_new(struct nvkm_device *device, int index, struct nvkm_i2c **pi2c) +{ + return nvkm_i2c_new_(&gk110_i2c, device, index, pi2c); +} -- cgit 1.4.1 From 8ad95edc39100c22c29ab1d2588332b99f387c8e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/i2c/gk110-: disable hw-initiated dpcd reads RM does this around transactions, and it seemed to help while debugging AUXCH issues on GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c | 10 +++++++--- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 9 +++++++-- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h | 4 ++++ 7 files changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h index 30b48896965e..f920eabf8628 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h @@ -3,6 +3,13 @@ #define __NVKM_I2C_AUX_H__ #include "pad.h" +static inline void +nvkm_i2c_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + if (i2c->func->aux_autodpcd) + i2c->func->aux_autodpcd(i2c, aux, false); +} + struct nvkm_i2c_aux_func { bool address_only; int (*xfer)(struct nvkm_i2c_aux *, bool retry, u8 type, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c index db7769cb33eb..47068f6f9c55 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c @@ -77,7 +77,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u8 type, u32 addr, u8 *data, u8 *size) { struct g94_i2c_aux *aux = g94_i2c_aux(obj); - struct nvkm_device *device = aux->base.pad->i2c->subdev.device; + struct nvkm_i2c *i2c = aux->base.pad->i2c; + struct nvkm_device *device = i2c->subdev.device; const u32 base = aux->ch * 0x50; u32 ctrl, stat, timeout, retries = 0; u32 xbuf[4] = {}; @@ -96,6 +97,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, goto out; } + nvkm_i2c_aux_autodpcd(i2c, aux->ch, false); + if (!(type & 1)) { memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { @@ -128,7 +131,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (!timeout--) { AUX_ERR(&aux->base, "timeout %08x", ctrl); ret = -EIO; - goto out; + goto out_err; } } while (ctrl & 0x00010000); ret = 0; @@ -154,7 +157,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, memcpy(data, xbuf, *size); *size = stat & 0x0000001f; } - +out_err: + nvkm_i2c_aux_autodpcd(i2c, aux->ch, true); out: g94_i2c_aux_fini(aux); return ret < 0 ? ret : (stat & 0x000f0000) >> 16; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index edb6148cbca0..ab67b67fd2a5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -77,7 +77,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u8 type, u32 addr, u8 *data, u8 *size) { struct gm200_i2c_aux *aux = gm200_i2c_aux(obj); - struct nvkm_device *device = aux->base.pad->i2c->subdev.device; + struct nvkm_i2c *i2c = aux->base.pad->i2c; + struct nvkm_device *device = i2c->subdev.device; const u32 base = aux->ch * 0x50; u32 ctrl, stat, timeout, retries = 0; u32 xbuf[4] = {}; @@ -96,6 +97,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, goto out; } + nvkm_i2c_aux_autodpcd(i2c, aux->ch, false); + if (!(type & 1)) { memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { @@ -128,7 +131,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (!timeout--) { AUX_ERR(&aux->base, "timeout %08x", ctrl); ret = -EIO; - goto out; + goto out_err; } } while (ctrl & 0x00010000); ret = 0; @@ -155,6 +158,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, *size = stat & 0x0000001f; } +out_err: + nvkm_i2c_aux_autodpcd(i2c, aux->ch, true); out: gm200_i2c_aux_fini(aux); return ret < 0 ? ret : (stat & 0x000f0000) >> 16; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c index 10eaf1d70f62..8e3bfa1af52a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c @@ -22,6 +22,12 @@ #include "priv.h" #include "pad.h" +static void +gk110_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + nvkm_mask(i2c->subdev.device, 0x00e4f8 + (aux * 0x50), 0x00010000, enable << 16); +} + static const struct nvkm_i2c_func gk110_i2c = { .pad_x_new = gf119_i2c_pad_x_new, @@ -29,6 +35,7 @@ gk110_i2c = { .aux = 4, .aux_stat = gk104_aux_stat, .aux_mask = gk104_aux_mask, + .aux_autodpcd = gk110_aux_autodpcd, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c index a23c5f315221..7b2375bff8a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c @@ -24,6 +24,12 @@ #include "priv.h" #include "pad.h" +static void +gm200_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + nvkm_mask(i2c->subdev.device, 0x00d968 + (aux * 0x50), 0x00010000, enable << 16); +} + static const struct nvkm_i2c_func gm200_i2c = { .pad_x_new = gf119_i2c_pad_x_new, @@ -31,6 +37,7 @@ gm200_i2c = { .aux = 8, .aux_stat = gk104_aux_stat, .aux_mask = gk104_aux_mask, + .aux_autodpcd = gm200_aux_autodpcd, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h index 461016814f4f..44b7bb7d4777 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: MIT */ #ifndef __NVKM_I2C_PAD_H__ #define __NVKM_I2C_PAD_H__ -#include +#include "priv.h" struct nvkm_i2c_pad { const struct nvkm_i2c_pad_func *func; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h index bd86bc298ebe..e35f6036fcfc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h @@ -23,6 +23,10 @@ struct nvkm_i2c_func { /* mask on/off interrupt types for a given set of auxch */ void (*aux_mask)(struct nvkm_i2c *, u32, u32, u32); + + /* enable/disable HW-initiated DPCD reads + */ + void (*aux_autodpcd)(struct nvkm_i2c *, int aux, bool enable); }; void g94_aux_stat(struct nvkm_i2c *, u32 *, u32 *, u32 *, u32 *); -- cgit 1.4.1 From ba6e9ab0fcf3d76e3952deb12b5f993991621d9c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/i2c/gm200: increase width of aux semaphore owner fields Noticed while debugging GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index ab67b67fd2a5..8bd1d442e465 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -33,7 +33,7 @@ static void gm200_i2c_aux_fini(struct gm200_i2c_aux *aux) { struct nvkm_device *device = aux->base.pad->i2c->subdev.device; - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00310000, 0x00000000); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00710000, 0x00000000); } static int @@ -54,10 +54,10 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) AUX_ERR(&aux->base, "begin idle timeout %08x", ctrl); return -EBUSY; } - } while (ctrl & 0x03010000); + } while (ctrl & 0x07010000); /* set some magic, and wait up to 1ms for it to appear */ - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00300000, ureq); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00700000, ureq); timeout = 1000; do { ctrl = nvkm_rd32(device, 0x00d954 + (aux->ch * 0x50)); @@ -67,7 +67,7 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) gm200_i2c_aux_fini(aux); return -EBUSY; } - } while ((ctrl & 0x03000000) != urep); + } while ((ctrl & 0x07000000) != urep); return 0; } -- cgit 1.4.1 From add42781ad76c5ae65127bf13852a4c6b2f08849 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/mmu: fix vram heap sizing Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index de91e9a26172..6d5212ae2fd5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -316,9 +316,9 @@ nvkm_mmu_vram(struct nvkm_mmu *mmu) { struct nvkm_device *device = mmu->subdev.device; struct nvkm_mm *mm = &device->fb->ram->vram; - const u32 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); - const u32 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); - const u32 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); + const u64 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); + const u64 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); + const u64 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); u8 type = NVKM_MEM_KIND * !!mmu->func->kind; u8 heap = NVKM_MEM_VRAM; int heapM, heapN, heapU; -- cgit 1.4.1 From 3b050680c84153d8e6f5ae3785922cd417f4b071 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/core: recognise GA10[024] GA100 hidden behind a module option, as it's not been as well verified since initial bring-up and may need additional changes. There's no display anyway, so this can wait for a bit. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/cl0080.h | 1 + drivers/gpu/drm/nouveau/include/nvkm/core/device.h | 1 + drivers/gpu/drm/nouveau/nouveau_backlight.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 35 ++++++++++++++++++++-- drivers/gpu/drm/nouveau/nvkm/engine/device/user.c | 1 + 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h index cd9a2e687bb6..57d4f457a7d4 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h @@ -33,6 +33,7 @@ struct nv_device_info_v0 { #define NV_DEVICE_INFO_V0_PASCAL 0x0a #define NV_DEVICE_INFO_V0_VOLTA 0x0b #define NV_DEVICE_INFO_V0_TURING 0x0c +#define NV_DEVICE_INFO_V0_AMPERE 0x0d __u8 family; __u8 pad06[2]; __u64 ram_size; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 5c007ce62fc3..c920939a1467 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -120,6 +120,7 @@ struct nvkm_device { GP100 = 0x130, GV100 = 0x140, TU100 = 0x160, + GA100 = 0x170, } card_type; u32 chipset; u8 chiprev; diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index c7a94c94dbf3..72f35a2babcb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -256,6 +256,7 @@ nouveau_backlight_init(struct drm_connector *connector) case NV_DEVICE_INFO_V0_PASCAL: case NV_DEVICE_INFO_V0_VOLTA: case NV_DEVICE_INFO_V0_TURING: + case NV_DEVICE_INFO_V0_AMPERE: //XXX: not confirmed ret = nv50_backlight_init(nv_encoder, &props, &ops); break; default: diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index ffada13c416c..b6b094bbd562 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2652,6 +2652,21 @@ nv168_chipset = { .sec2 = tu102_sec2_new, }; +static const struct nvkm_device_chip +nv170_chipset = { + .name = "GA100", +}; + +static const struct nvkm_device_chip +nv172_chipset = { + .name = "GA102", +}; + +static const struct nvkm_device_chip +nv174_chipset = { + .name = "GA104", +}; + static int nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size, struct nvkm_notify *notify) @@ -3063,6 +3078,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x130: device->card_type = GP100; break; case 0x140: device->card_type = GV100; break; case 0x160: device->card_type = TU100; break; + case 0x170: device->card_type = GA100; break; default: break; } @@ -3160,10 +3176,23 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; + case 0x172: device->chip = &nv172_chipset; break; + case 0x174: device->chip = &nv174_chipset; break; default: - nvdev_error(device, "unknown chipset (%08x)\n", boot0); - ret = -ENODEV; - goto done; + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + + if (!device->chip) { + nvdev_error(device, "unknown chipset (%08x)\n", boot0); + ret = -ENODEV; + goto done; + } + break; } nvdev_info(device, "NVIDIA %s (%08x)\n", diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index 03c6d9aef075..147894798786 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -176,6 +176,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size) case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break; case GV100: args->v0.family = NV_DEVICE_INFO_V0_VOLTA; break; case TU100: args->v0.family = NV_DEVICE_INFO_V0_TURING; break; + case GA100: args->v0.family = NV_DEVICE_INFO_V0_AMPERE; break; default: args->v0.family = 0; break; -- cgit 1.4.1 From caeb6ab899c3d36a74cda6e299c6e1c9c4e2a22e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/kms/nv50-: fix case where notifier buffer is at offset 0 VRAM offset 0 is a valid address, triggered on GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 4 ++-- drivers/gpu/drm/nouveau/dispnv50/disp.h | 2 +- drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 33fff388dd83..c6367035970e 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -222,7 +222,7 @@ nv50_dmac_wait(struct nvif_push *push, u32 size) int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, - const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf, + const s32 *oclass, u8 head, void *data, u32 size, s64 syncbuf, struct nv50_dmac *dmac) { struct nouveau_cli *cli = (void *)device->object.client; @@ -271,7 +271,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, if (ret) return ret; - if (!syncbuf) + if (syncbuf < 0) return 0; ret = nvif_object_ctor(&dmac->base.user, "kmsSyncCtxDma", NV50_DISP_HANDLE_SYNCBUF, diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h index 92bddc083617..38dec11e7dda 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -95,7 +95,7 @@ struct nv50_outp_atom { int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, const s32 *oclass, u8 head, void *data, u32 size, - u64 syncbuf, struct nv50_dmac *dmac); + s64 syncbuf, struct nv50_dmac *dmac); void nv50_dmac_destroy(struct nv50_dmac *); /* diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c index 685b70871324..b390029c69ec 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c @@ -76,7 +76,7 @@ wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, int ret; ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, - &oclass, 0, &args, sizeof(args), 0, + &oclass, 0, &args, sizeof(args), -1, &wndw->wimm); if (ret) { NV_ERROR(drm, "wimm%04x allocation failed: %d\n", oclass, ret); -- cgit 1.4.1 From 70afbe4bdc0a7ccdb462a38216f5abc3db7e5c1b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/pci/ga10[024]: initial support Appears to be compatible with GP100 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b6b094bbd562..95c470d13cb9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,16 +2655,19 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .pci = gp100_pci_new, }; static int -- cgit 1.4.1 From a34632482f1ea768429a9d4c79a10d12f5093405 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/bios/ga10[024]: initial support Forcing PRAMIN-shadowing off for GA100, as it requires display, and we don't know if/where the fuse register for detecting its presence is. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 95c470d13cb9..7c35c32cdf73 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,18 +2655,21 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c index 3634cd0630b8..023ddc7c5399 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c @@ -64,6 +64,9 @@ pramin_init(struct nvkm_bios *bios, const char *name) return NULL; /* we can't get the bios image pointer without PDISP */ + if (device->card_type >= GA100) + addr = device->chipset == 0x170; /*XXX: find the fuse reg for this */ + else if (device->card_type >= GM100) addr = nvkm_rd32(device, 0x021c04); else -- cgit 1.4.1 From 7ddf5e9597faa6f939370e294e0f6d9516d2a431 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/devinit/ga10[024]: initial support VPLL regs changed a bit. There's more stuff to do around these, but it's less invasive to stick those changes into disp for now. None of that belongs here anymore anyhow - fix that someday. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/devinit.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c | 76 ++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h | 1 + .../gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c | 2 +- 6 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h index 1a39e52e09e3..50cc7c05eac4 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h @@ -32,4 +32,5 @@ int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int tu102_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); +int ga100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7c35c32cdf73..76f1b2504f3a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2656,6 +2656,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; @@ -2663,6 +2664,7 @@ static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; @@ -2670,6 +2672,7 @@ static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild index b3429371ed82..d1abb64841da 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild @@ -15,3 +15,4 @@ nvkm-y += nvkm/subdev/devinit/gm107.o nvkm-y += nvkm/subdev/devinit/gm200.o nvkm-y += nvkm/subdev/devinit/gv100.o nvkm-y += nvkm/subdev/devinit/tu102.o +nvkm-y += nvkm/subdev/devinit/ga100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c new file mode 100644 index 000000000000..636a92128f6c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c @@ -0,0 +1,76 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" + +#include +#include +#include + +static int +ga100_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) +{ + struct nvkm_subdev *subdev = &init->subdev; + struct nvkm_device *device = subdev->device; + struct nvbios_pll info; + int head = type - PLL_VPLL0; + int N, fN, M, P; + int ret; + + ret = nvbios_pll_parse(device->bios, type, &info); + if (ret) + return ret; + + ret = gt215_pll_calc(subdev, &info, freq, &N, &fN, &M, &P); + if (ret < 0) + return ret; + + switch (info.type) { + case PLL_VPLL0: + case PLL_VPLL1: + case PLL_VPLL2: + case PLL_VPLL3: + nvkm_wr32(device, 0x00ef00 + (head * 0x40), 0x02080004); + nvkm_wr32(device, 0x00ef18 + (head * 0x40), (N << 16) | fN); + nvkm_wr32(device, 0x00ef04 + (head * 0x40), (P << 16) | M); + nvkm_wr32(device, 0x00e9c0 + (head * 0x04), 0x00000001); + break; + default: + nvkm_warn(subdev, "%08x/%dKhz unimplemented\n", type, freq); + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct nvkm_devinit_func +ga100_devinit = { + .init = nv50_devinit_init, + .post = tu102_devinit_post, + .pll_set = ga100_devinit_pll_set, +}; + +int +ga100_devinit_new(struct nvkm_device *device, int index, struct nvkm_devinit **pinit) +{ + return nv50_devinit_new_(&ga100_devinit, device, index, pinit); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h index 94723352137a..05961e624264 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h @@ -19,4 +19,5 @@ void nvkm_devinit_ctor(const struct nvkm_devinit_func *, struct nvkm_device *, int index, struct nvkm_devinit *); int nv04_devinit_post(struct nvkm_devinit *, bool); +int tu102_devinit_post(struct nvkm_devinit *, bool); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c index 397670e72fff..9a469bf482f2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c @@ -65,7 +65,7 @@ tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) return ret; } -static int +int tu102_devinit_post(struct nvkm_devinit *base, bool post) { struct nv50_devinit *init = nv50_devinit(base); -- cgit 1.4.1 From 5961c62d20753009408df4752e22991097386aa9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/mc/ga10[024]: initial support Fortunately, all the interrupts we need to bring up basic display support are contained in a single leaf register, allowing this basic (but hackish) implementation. There's a bunch more invasive patches to come implementing all this in a better/more complete way, but trying to get a minimal series out first. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c | 74 +++++++++++++++++++++++ 4 files changed, 79 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h index 6641fe4c252c..e45ca4583967 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h @@ -32,4 +32,5 @@ int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp10b_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int tu102_mc_new(struct nvkm_device *, int, struct nvkm_mc **); +int ga100_mc_new(struct nvkm_device *, int, struct nvkm_mc **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 76f1b2504f3a..fb551edc6a35 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2665,6 +2666,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2673,6 +2675,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild index 2585ef07532a..ac2b34e9ac6a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild @@ -14,3 +14,4 @@ nvkm-y += nvkm/subdev/mc/gk20a.o nvkm-y += nvkm/subdev/mc/gp100.o nvkm-y += nvkm/subdev/mc/gp10b.o nvkm-y += nvkm/subdev/mc/tu102.o +nvkm-y += nvkm/subdev/mc/ga100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c new file mode 100644 index 000000000000..967eb3af11eb --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c @@ -0,0 +1,74 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +static void +ga100_mc_intr_unarm(struct nvkm_mc *mc) +{ + nvkm_wr32(mc->subdev.device, 0xb81610, 0x00000004); +} + +static void +ga100_mc_intr_rearm(struct nvkm_mc *mc) +{ + nvkm_wr32(mc->subdev.device, 0xb81608, 0x00000004); +} + +static void +ga100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 intr) +{ + nvkm_wr32(mc->subdev.device, 0xb81210, mask & intr ); + nvkm_wr32(mc->subdev.device, 0xb81410, mask & ~(mask & intr)); +} + +static u32 +ga100_mc_intr_stat(struct nvkm_mc *mc) +{ + u32 intr_top = nvkm_rd32(mc->subdev.device, 0xb81600), intr = 0x00000000; + if (intr_top & 0x00000004) + intr = nvkm_mask(mc->subdev.device, 0xb81010, 0x00000000, 0x00000000); + return intr; +} + +static void +ga100_mc_init(struct nvkm_mc *mc) +{ + nv50_mc_init(mc); + nvkm_wr32(mc->subdev.device, 0xb81210, 0xffffffff); +} + +static const struct nvkm_mc_func +ga100_mc = { + .init = ga100_mc_init, + .intr = gp100_mc_intr, + .intr_unarm = ga100_mc_intr_unarm, + .intr_rearm = ga100_mc_intr_rearm, + .intr_mask = ga100_mc_intr_mask, + .intr_stat = ga100_mc_intr_stat, + .reset = gk104_mc_reset, +}; + +int +ga100_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc) +{ + return nvkm_mc_new_(&ga100_mc, device, index, pmc); +} -- cgit 1.4.1 From e0df4bbfc3365d7699e32bebb24647dc7a09b00c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/privring/ga10[024]: initial support Appears to be compatible with GM200 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index fb551edc6a35..2f6e1b1ce0bb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2666,6 +2667,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2675,6 +2677,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; -- cgit 1.4.1 From de4781d0f22b54fdbe7ac459eb67b585ca3ee430 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/imem/ga10[024]: initial support Appears to be compatible with NV50 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 2f6e1b1ce0bb..d2be48fc5db1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2658,6 +2658,7 @@ nv170_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2668,6 +2669,7 @@ nv172_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2678,6 +2680,7 @@ nv174_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; -- cgit 1.4.1 From 41ba806f40a9a4c4f4c04a474bf368160f1baa2c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/fb/ga10[024]: initial support No VPR scrub. GA102 and GA104 have a new VRAM size detection method. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h | 2 ++ drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 ++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild | 3 ++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c | 40 +++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c | 40 +++++++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h | 2 ++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c | 40 +++++++++++++++++++++++ 9 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 34b56b10218a..2ecd52aec1d1 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -86,6 +86,8 @@ int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp102_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp10b_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gv100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int ga100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int ga102_fb_new(struct nvkm_device *, int, struct nvkm_fb **); #include #include diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index d2be48fc5db1..4a5d43fe3f3a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga100_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2668,6 +2669,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga102_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2679,6 +2681,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga102_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild index 43a42159a3d0..5d0bab8ecb43 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild @@ -32,6 +32,8 @@ nvkm-y += nvkm/subdev/fb/gp100.o nvkm-y += nvkm/subdev/fb/gp102.o nvkm-y += nvkm/subdev/fb/gp10b.o nvkm-y += nvkm/subdev/fb/gv100.o +nvkm-y += nvkm/subdev/fb/ga100.o +nvkm-y += nvkm/subdev/fb/ga102.o nvkm-y += nvkm/subdev/fb/ram.o nvkm-y += nvkm/subdev/fb/ramnv04.o @@ -52,6 +54,7 @@ nvkm-y += nvkm/subdev/fb/ramgk104.o nvkm-y += nvkm/subdev/fb/ramgm107.o nvkm-y += nvkm/subdev/fb/ramgm200.o nvkm-y += nvkm/subdev/fb/ramgp100.o +nvkm-y += nvkm/subdev/fb/ramga102.o nvkm-y += nvkm/subdev/fb/sddr2.o nvkm-y += nvkm/subdev/fb/sddr3.o nvkm-y += nvkm/subdev/fb/gddr3.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c new file mode 100644 index 000000000000..bf82686851cd --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ram.h" + +static const struct nvkm_fb_func +ga100_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gp100_fb_init, + .init_page = gv100_fb_init_page, + .init_unkn = gp100_fb_init_unkn, + .ram_new = gp100_ram_new, + .default_bigpage = 16, +}; + +int +ga100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gp102_fb_new_(&ga100_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c new file mode 100644 index 000000000000..bcecf84a6e67 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ram.h" + +static const struct nvkm_fb_func +ga102_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gp100_fb_init, + .init_page = gv100_fb_init_page, + .init_unkn = gp100_fb_init_unkn, + .ram_new = ga102_ram_new, + .default_bigpage = 16, +}; + +int +ga102_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gp102_fb_new_(&ga102_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c index 10ff5d053f7e..feda86a5fba8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c @@ -22,7 +22,7 @@ #include "gf100.h" #include "ram.h" -static int +int gv100_fb_init_page(struct nvkm_fb *fb) { return (fb->page == 16) ? 0 : -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h index 5be9c563350d..66932ac10d15 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h @@ -82,4 +82,6 @@ int gp102_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *, int, struct nvkm_fb **); bool gp102_fb_vpr_scrub_required(struct nvkm_fb *); int gp102_fb_vpr_scrub(struct nvkm_fb *); + +int gv100_fb_init_page(struct nvkm_fb *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index d723a9b4e3c4..ea7d66f3dd82 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -70,4 +70,5 @@ int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm200_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **); +int ga102_ram_new(struct nvkm_fb *, struct nvkm_ram **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c new file mode 100644 index 000000000000..298c136cefe0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ram.h" + +#include +#include +#include + +static const struct nvkm_ram_func +ga102_ram = { +}; + +int +ga102_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +{ + struct nvkm_device *device = fb->subdev.device; + enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios); + u32 size = nvkm_rd32(device, 0x1183a4); + + return nvkm_ram_new_(&ga102_ram, fb, type, (u64)size << 20, pram); +} -- cgit 1.4.1 From 6f300e0a0ba8873f1225959089f8bb2897d93ec6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/timer/ga10[024]: initial support Appears to be compatible with GK20A code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 4a5d43fe3f3a..6a1920965c01 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2662,6 +2662,7 @@ nv170_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static const struct nvkm_device_chip @@ -2674,6 +2675,7 @@ nv172_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static const struct nvkm_device_chip @@ -2686,6 +2688,7 @@ nv174_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static int -- cgit 1.4.1 From a3abc23ac40111c76708119013d63451169e7838 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/mmu/ga10[024]: initial support Appears to be compatible with TU102 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 6a1920965c01..ad4cece038b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2661,6 +2661,7 @@ nv170_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; @@ -2674,6 +2675,7 @@ nv172_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; @@ -2687,6 +2689,7 @@ nv174_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; -- cgit 1.4.1 From f5cbe7c8bd1ac6f8c91179de381e10ee5f0f8809 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/bar/ga10[024]: initial support Appears to be compatible with TU102 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index ad4cece038b4..c9cc08ba1444 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,6 +2655,7 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga100_fb_new, @@ -2669,6 +2670,7 @@ nv170_chipset = { static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, @@ -2683,6 +2685,7 @@ nv172_chipset = { static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, -- cgit 1.4.1 From c28efb15f9e51a96c6bce2b92c0f3a4da87db877 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/gpio/ga10[024]: initial support GA100 appears to be compatible with GK104 code, the others have some register moves. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c | 118 +++++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h index eaacf8d80527..cdcce5ece6ff 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h @@ -37,4 +37,5 @@ int nv50_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int g94_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int gf119_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int gk104_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); +int ga102_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index c9cc08ba1444..20d947808ef7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2659,6 +2659,7 @@ nv170_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga100_fb_new, + .gpio = gk104_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2674,6 +2675,7 @@ nv172_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, + .gpio = ga102_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2689,6 +2691,7 @@ nv174_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, + .gpio = ga102_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild index b2ad5922a1c2..efbbaa080de5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild @@ -5,3 +5,4 @@ nvkm-y += nvkm/subdev/gpio/nv50.o nvkm-y += nvkm/subdev/gpio/g94.o nvkm-y += nvkm/subdev/gpio/gf119.o nvkm-y += nvkm/subdev/gpio/gk104.o +nvkm-y += nvkm/subdev/gpio/ga102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c new file mode 100644 index 000000000000..62c791baf400 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c @@ -0,0 +1,118 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +static void +ga102_gpio_reset(struct nvkm_gpio *gpio, u8 match) +{ + struct nvkm_device *device = gpio->subdev.device; + struct nvkm_bios *bios = device->bios; + u8 ver, len; + u16 entry; + int ent = -1; + + while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) { + u32 data = nvbios_rd32(bios, entry); + u8 line = (data & 0x0000003f); + u8 defs = !!(data & 0x00000080); + u8 func = (data & 0x0000ff00) >> 8; + u8 unk0 = (data & 0x00ff0000) >> 16; + u8 unk1 = (data & 0x1f000000) >> 24; + + if ( func == DCB_GPIO_UNUSED || + (match != DCB_GPIO_UNUSED && match != func)) + continue; + + nvkm_gpio_set(gpio, 0, func, line, defs); + + nvkm_mask(device, 0x021200 + (line * 4), 0xff, unk0); + if (unk1--) + nvkm_mask(device, 0x00d740 + (unk1 * 4), 0xff, line); + } +} + +static int +ga102_gpio_drive(struct nvkm_gpio *gpio, int line, int dir, int out) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 data = ((dir ^ 1) << 13) | (out << 12); + nvkm_mask(device, 0x021200 + (line * 4), 0x00003000, data); + nvkm_mask(device, 0x00d604, 0x00000001, 0x00000001); /* update? */ + return 0; +} + +static int +ga102_gpio_sense(struct nvkm_gpio *gpio, int line) +{ + struct nvkm_device *device = gpio->subdev.device; + return !!(nvkm_rd32(device, 0x021200 + (line * 4)) & 0x00004000); +} + +static void +ga102_gpio_intr_stat(struct nvkm_gpio *gpio, u32 *hi, u32 *lo) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 intr0 = nvkm_rd32(device, 0x021640); + u32 intr1 = nvkm_rd32(device, 0x02164c); + u32 stat0 = nvkm_rd32(device, 0x021648) & intr0; + u32 stat1 = nvkm_rd32(device, 0x021654) & intr1; + *lo = (stat1 & 0xffff0000) | (stat0 >> 16); + *hi = (stat1 << 16) | (stat0 & 0x0000ffff); + nvkm_wr32(device, 0x021640, intr0); + nvkm_wr32(device, 0x02164c, intr1); +} + +static void +ga102_gpio_intr_mask(struct nvkm_gpio *gpio, u32 type, u32 mask, u32 data) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 inte0 = nvkm_rd32(device, 0x021648); + u32 inte1 = nvkm_rd32(device, 0x021654); + if (type & NVKM_GPIO_LO) + inte0 = (inte0 & ~(mask << 16)) | (data << 16); + if (type & NVKM_GPIO_HI) + inte0 = (inte0 & ~(mask & 0xffff)) | (data & 0xffff); + mask >>= 16; + data >>= 16; + if (type & NVKM_GPIO_LO) + inte1 = (inte1 & ~(mask << 16)) | (data << 16); + if (type & NVKM_GPIO_HI) + inte1 = (inte1 & ~mask) | data; + nvkm_wr32(device, 0x021648, inte0); + nvkm_wr32(device, 0x021654, inte1); +} + +static const struct nvkm_gpio_func +ga102_gpio = { + .lines = 32, + .intr_stat = ga102_gpio_intr_stat, + .intr_mask = ga102_gpio_intr_mask, + .drive = ga102_gpio_drive, + .sense = ga102_gpio_sense, + .reset = ga102_gpio_reset, +}; + +int +ga102_gpio_new(struct nvkm_device *device, int index, struct nvkm_gpio **pgpio) +{ + return nvkm_gpio_new_(&ga102_gpio, device, index, pgpio); +} -- cgit 1.4.1 From 8a0412265f06490d93724bf8badf220180790ad1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/i2c/ga10[024]: initial support Appears to be compatible with GM200 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 20d947808ef7..946b0001ca54 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2660,6 +2660,7 @@ nv170_chipset = { .devinit = ga100_devinit_new, .fb = ga100_fb_new, .gpio = gk104_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2676,6 +2677,7 @@ nv172_chipset = { .devinit = ga100_devinit_new, .fb = ga102_fb_new, .gpio = ga102_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2692,6 +2694,7 @@ nv174_chipset = { .devinit = ga100_devinit_new, .fb = ga102_fb_new, .gpio = ga102_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, -- cgit 1.4.1 From a6cf0320aad0c69a6b558dd41d3cb6891a6c9872 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/dmaobj/ga10[24]: initial support Appears to be compatible with GV100 code, and not required on GA100, as it shouldn't have display. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 946b0001ca54..bea3daf08570 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2684,6 +2684,7 @@ nv172_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .dma = gv100_dma_new, }; static const struct nvkm_device_chip @@ -2701,6 +2702,7 @@ nv174_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .dma = gv100_dma_new, }; static int -- cgit 1.4.1 From 8ef23b6f6a79e6fa2a169081d2d76011fffa0482 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: drm/nouveau/disp/ga10[24]: initial support UEFI/RM no longer use IED scripts from the VBIOS, though they appear to have been updated for use by the x86 VBIOS code, so we should be able to continue using them for the moment. Unfortunately, we require some hacks to do so, as the BeforeLinkTraining IED script became a pointer to an array of scripts instead, without a revbump of the relevant tables. There's also some changes to SOR clock divider fiddling, which are hopefully correct enough that things work as they should. AFAIK, GA100 shouldn't have display, so it hasn't been added. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/Kbuild | 1 + drivers/gpu/drm/nouveau/dispnv50/core.c | 1 + drivers/gpu/drm/nouveau/dispnv50/curs.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wimm.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wndw.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wndw.h | 8 ++ drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c | 10 +- drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c | 106 ++++++++++++++++ drivers/gpu/drm/nouveau/include/nvif/class.h | 5 + drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h | 1 + drivers/gpu/drm/nouveau/nvif/disp.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 + drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 3 + drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 33 ++++- drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c | 46 +++++++ drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h | 4 + drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h | 2 + .../gpu/drm/nouveau/nvkm/engine/disp/rootga102.c | 52 ++++++++ .../gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h | 1 + .../gpu/drm/nouveau/nvkm/engine/disp/sorga102.c | 140 +++++++++++++++++++++ .../gpu/drm/nouveau/nvkm/engine/disp/sortu102.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c | 2 +- 22 files changed, 410 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c diff --git a/drivers/gpu/drm/nouveau/dispnv50/Kbuild b/drivers/gpu/drm/nouveau/dispnv50/Kbuild index 6fdddb266fb1..4488e1c061b3 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/Kbuild +++ b/drivers/gpu/drm/nouveau/dispnv50/Kbuild @@ -37,6 +37,7 @@ nouveau-y += dispnv50/wimmc37b.o nouveau-y += dispnv50/wndw.o nouveau-y += dispnv50/wndwc37e.o nouveau-y += dispnv50/wndwc57e.o +nouveau-y += dispnv50/wndwc67e.o nouveau-y += dispnv50/base.o nouveau-y += dispnv50/base507c.o diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c index 27ea3f34706d..abefc2343443 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/core.c +++ b/drivers/gpu/drm/nouveau/dispnv50/core.c @@ -42,6 +42,7 @@ nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore) int version; int (*new)(struct nouveau_drm *, s32, struct nv50_core **); } cores[] = { + { GA102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, { TU102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, { GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new }, { GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c index 121c24a18f11..31d8b2e4791d 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/curs.c +++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c @@ -31,6 +31,7 @@ nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw) int version; int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **); } curses[] = { + { GA102_DISP_CURSOR, 0, cursc37a_new }, { TU102_DISP_CURSOR, 0, cursc37a_new }, { GV100_DISP_CURSOR, 0, cursc37a_new }, { GK104_DISP_CURSOR, 0, curs907a_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c index a1ac153d5e98..566fbddfc8d7 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimm.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c @@ -31,6 +31,7 @@ nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw) int version; int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *); } wimms[] = { + { GA102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, { TU102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, { GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, {} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 0356474ad6f6..ce451242f79e 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -784,6 +784,7 @@ nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, int (*new)(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); } wndws[] = { + { GA102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc67e_new }, { TU102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new }, { GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new }, {} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h index 3278e2880034..f4e0c5080034 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h @@ -129,6 +129,14 @@ int wndwc37e_update(struct nv50_wndw *, u32 *); int wndwc57e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); +bool wndwc57e_ilut(struct nv50_wndw *, struct nv50_wndw_atom *, int); +int wndwc57e_ilut_set(struct nv50_wndw *, struct nv50_wndw_atom *); +int wndwc57e_ilut_clr(struct nv50_wndw *); +int wndwc57e_csc_set(struct nv50_wndw *, struct nv50_wndw_atom *); +int wndwc57e_csc_clr(struct nv50_wndw *); + +int wndwc67e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, + struct nv50_wndw **); int nv50_wndw_new(struct nouveau_drm *, enum drm_plane_type, int index, struct nv50_wndw **); diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c index 429be0bb0222..abdd3bb658b3 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c @@ -80,7 +80,7 @@ wndwc57e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } -static int +int wndwc57e_csc_clr(struct nv50_wndw *wndw) { struct nvif_push *push = wndw->wndw.push; @@ -98,7 +98,7 @@ wndwc57e_csc_clr(struct nv50_wndw *wndw) return 0; } -static int +int wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { struct nvif_push *push = wndw->wndw.push; @@ -111,7 +111,7 @@ wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } -static int +int wndwc57e_ilut_clr(struct nv50_wndw *wndw) { struct nvif_push *push = wndw->wndw.push; @@ -124,7 +124,7 @@ wndwc57e_ilut_clr(struct nv50_wndw *wndw) return 0; } -static int +int wndwc57e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { struct nvif_push *push = wndw->wndw.push; @@ -179,7 +179,7 @@ wndwc57e_ilut_load(struct drm_color_lut *in, int size, void __iomem *mem) writew(readw(mem - 4), mem + 4); } -static bool +bool wndwc57e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, int size) { if (size = size ? size : 1024, size != 256 && size != 1024) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c new file mode 100644 index 000000000000..7a370fa1df20 --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c @@ -0,0 +1,106 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wndw.h" +#include "atom.h" + +#include + +#include + +static int +wndwc67e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + struct nvif_push *push = wndw->wndw.push; + int ret; + + if ((ret = PUSH_WAIT(push, 17))) + return ret; + + PUSH_MTHD(push, NVC57E, SET_PRESENT_CONTROL, + NVVAL(NVC57E, SET_PRESENT_CONTROL, MIN_PRESENT_INTERVAL, asyw->image.interval) | + NVVAL(NVC57E, SET_PRESENT_CONTROL, BEGIN_MODE, asyw->image.mode) | + NVDEF(NVC57E, SET_PRESENT_CONTROL, TIMESTAMP_MODE, DISABLE)); + + PUSH_MTHD(push, NVC57E, SET_SIZE, + NVVAL(NVC57E, SET_SIZE, WIDTH, asyw->image.w) | + NVVAL(NVC57E, SET_SIZE, HEIGHT, asyw->image.h), + + SET_STORAGE, + NVVAL(NVC57E, SET_STORAGE, BLOCK_HEIGHT, asyw->image.blockh), + + SET_PARAMS, + NVVAL(NVC57E, SET_PARAMS, FORMAT, asyw->image.format) | + NVDEF(NVC57E, SET_PARAMS, CLAMP_BEFORE_BLEND, DISABLE) | + NVDEF(NVC57E, SET_PARAMS, SWAP_UV, DISABLE) | + NVDEF(NVC57E, SET_PARAMS, FMT_ROUNDING_MODE, ROUND_TO_NEAREST), + + SET_PLANAR_STORAGE(0), + NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.blocks[0]) | + NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.pitch[0] >> 6)); + + PUSH_MTHD(push, NVC57E, SET_CONTEXT_DMA_ISO(0), asyw->image.handle, 1); + PUSH_MTHD(push, NVC57E, SET_OFFSET(0), asyw->image.offset[0] >> 8); + + PUSH_MTHD(push, NVC57E, SET_POINT_IN(0), + NVVAL(NVC57E, SET_POINT_IN, X, asyw->state.src_x >> 16) | + NVVAL(NVC57E, SET_POINT_IN, Y, asyw->state.src_y >> 16)); + + PUSH_MTHD(push, NVC57E, SET_SIZE_IN, + NVVAL(NVC57E, SET_SIZE_IN, WIDTH, asyw->state.src_w >> 16) | + NVVAL(NVC57E, SET_SIZE_IN, HEIGHT, asyw->state.src_h >> 16)); + + PUSH_MTHD(push, NVC57E, SET_SIZE_OUT, + NVVAL(NVC57E, SET_SIZE_OUT, WIDTH, asyw->state.crtc_w) | + NVVAL(NVC57E, SET_SIZE_OUT, HEIGHT, asyw->state.crtc_h)); + return 0; +} + +static const struct nv50_wndw_func +wndwc67e = { + .acquire = wndwc37e_acquire, + .release = wndwc37e_release, + .sema_set = wndwc37e_sema_set, + .sema_clr = wndwc37e_sema_clr, + .ntfy_set = wndwc37e_ntfy_set, + .ntfy_clr = wndwc37e_ntfy_clr, + .ntfy_reset = corec37d_ntfy_init, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .ilut = wndwc57e_ilut, + .ilut_identity = true, + .ilut_size = 1024, + .xlut_set = wndwc57e_ilut_set, + .xlut_clr = wndwc57e_ilut_clr, + .csc = base907c_csc, + .csc_set = wndwc57e_csc_set, + .csc_clr = wndwc57e_csc_clr, + .image_set = wndwc67e_image_set, + .image_clr = wndwc37e_image_clr, + .blend_set = wndwc37e_blend_set, + .update = wndwc37e_update, +}; + +int +wndwc67e_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, + s32 oclass, struct nv50_wndw **pwndw) +{ + return wndwc37e_new_(&wndwc67e, drm, type, index, oclass, BIT(index >> 1), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 2c79beb41126..ba2c28ea43d2 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -88,6 +88,7 @@ #define GP102_DISP /* cl5070.h */ 0x00009870 #define GV100_DISP /* cl5070.h */ 0x0000c370 #define TU102_DISP /* cl5070.h */ 0x0000c570 +#define GA102_DISP /* cl5070.h */ 0x0000c670 #define GV100_DISP_CAPS 0x0000c373 @@ -103,6 +104,7 @@ #define GK104_DISP_CURSOR /* cl507a.h */ 0x0000917a #define GV100_DISP_CURSOR /* cl507a.h */ 0x0000c37a #define TU102_DISP_CURSOR /* cl507a.h */ 0x0000c57a +#define GA102_DISP_CURSOR /* cl507a.h */ 0x0000c67a #define NV50_DISP_OVERLAY /* cl507b.h */ 0x0000507b #define G82_DISP_OVERLAY /* cl507b.h */ 0x0000827b @@ -112,6 +114,7 @@ #define GV100_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c37b #define TU102_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c57b +#define GA102_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c67b #define NV50_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000507c #define G82_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000827c @@ -135,6 +138,7 @@ #define GP102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000987d #define GV100_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c37d #define TU102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c57d +#define GA102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c67d #define NV50_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000507e #define G82_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000827e @@ -145,6 +149,7 @@ #define GV100_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c37e #define TU102_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c57e +#define GA102_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c67e #define NV50_TESLA 0x00005097 #define G82_TESLA 0x00008297 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h index 5a96c942d912..0f6fa6631a19 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h @@ -37,4 +37,5 @@ int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int tu102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); +int ga102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); #endif diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c index 8d0d30e08f57..529cb60d5efb 100644 --- a/drivers/gpu/drm/nouveau/nvif/disp.c +++ b/drivers/gpu/drm/nouveau/nvif/disp.c @@ -35,6 +35,7 @@ nvif_disp_ctor(struct nvif_device *device, const char *name, s32 oclass, struct nvif_disp *disp) { static const struct nvif_mclass disps[] = { + { GA102_DISP, -1 }, { TU102_DISP, -1 }, { GV100_DISP, -1 }, { GP102_DISP, -1 }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index bea3daf08570..cdcc851e06f9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2684,6 +2684,7 @@ nv172_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .disp = ga102_disp_new, .dma = gv100_dma_new, }; @@ -2702,6 +2703,7 @@ nv174_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .disp = ga102_disp_new, .dma = gv100_dma_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index cf075311cdd2..b03f043efe26 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -17,6 +17,7 @@ nvkm-y += nvkm/engine/disp/gp100.o nvkm-y += nvkm/engine/disp/gp102.o nvkm-y += nvkm/engine/disp/gv100.o nvkm-y += nvkm/engine/disp/tu102.o +nvkm-y += nvkm/engine/disp/ga102.o nvkm-y += nvkm/engine/disp/vga.o nvkm-y += nvkm/engine/disp/head.o @@ -42,6 +43,7 @@ nvkm-y += nvkm/engine/disp/sorgm200.o nvkm-y += nvkm/engine/disp/sorgp100.o nvkm-y += nvkm/engine/disp/sorgv100.o nvkm-y += nvkm/engine/disp/sortu102.o +nvkm-y += nvkm/engine/disp/sorga102.o nvkm-y += nvkm/engine/disp/outp.o nvkm-y += nvkm/engine/disp/dp.o @@ -75,6 +77,7 @@ nvkm-y += nvkm/engine/disp/rootgp100.o nvkm-y += nvkm/engine/disp/rootgp102.o nvkm-y += nvkm/engine/disp/rootgv100.o nvkm-y += nvkm/engine/disp/roottu102.o +nvkm-y += nvkm/engine/disp/rootga102.o nvkm-y += nvkm/engine/disp/capsgv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 3800aeb507d0..55fbfe28c6dc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -33,6 +33,12 @@ #include +/* IED scripts are no longer used by UEFI/RM from Ampere, but have been updated for + * the x86 option ROM. However, the relevant VBIOS table versions weren't modified, + * so we're unable to detect this in a nice way. + */ +#define AMPERE_IED_HACK(disp) ((disp)->engine.subdev.device->card_type >= GA100) + struct lt_state { struct nvkm_dp *dp; u8 stat[6]; @@ -238,6 +244,19 @@ nvkm_dp_train_links(struct nvkm_dp *dp) dp->dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED; lt.pc2 = dp->dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED; + if (AMPERE_IED_HACK(disp) && (lnkcmp = lt.dp->info.script[0])) { + /* Execute BeforeLinkTraining script from DP Info table. */ + while (ior->dp.bw < nvbios_rd08(bios, lnkcmp)) + lnkcmp += 3; + lnkcmp = nvbios_rd16(bios, lnkcmp + 1); + + nvbios_init(&dp->outp.disp->engine.subdev, lnkcmp, + init.outp = &dp->outp.info; + init.or = ior->id; + init.link = ior->asy.link; + ); + } + /* Set desired link configuration on the source. */ if ((lnkcmp = lt.dp->info.lnkcmp)) { if (dp->version < 0x30) { @@ -316,12 +335,14 @@ nvkm_dp_train_init(struct nvkm_dp *dp) ); } - /* Execute BeforeLinkTraining script from DP Info table. */ - nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0], - init.outp = &dp->outp.info; - init.or = dp->outp.ior->id; - init.link = dp->outp.ior->asy.link; - ); + if (!AMPERE_IED_HACK(dp->outp.disp)) { + /* Execute BeforeLinkTraining script from DP Info table. */ + nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0], + init.outp = &dp->outp.info; + init.or = dp->outp.ior->id; + init.link = dp->outp.ior->asy.link; + ); + } } static const struct dp_rates { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c new file mode 100644 index 000000000000..aa2e5645fe36 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c @@ -0,0 +1,46 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" +#include "head.h" +#include "ior.h" +#include "channv50.h" +#include "rootnv50.h" + +static const struct nv50_disp_func +ga102_disp = { + .init = tu102_disp_init, + .fini = gv100_disp_fini, + .intr = gv100_disp_intr, + .uevent = &gv100_disp_chan_uevent, + .super = gv100_disp_super, + .root = &ga102_disp_root_oclass, + .wndw = { .cnt = gv100_disp_wndw_cnt }, + .head = { .cnt = gv100_head_cnt, .new = gv100_head_new }, + .sor = { .cnt = gv100_sor_cnt, .new = ga102_sor_new }, + .ramht_size = 0x2000, +}; + +int +ga102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) +{ + return nv50_disp_new_(&ga102_disp, device, index, pdisp); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index 09f3038eff26..9f0bb7c6b010 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -150,6 +150,8 @@ void gv100_sor_dp_audio(struct nvkm_ior *, int, bool); void gv100_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32); void gv100_sor_dp_watermark(struct nvkm_ior *, int, u8); +void tu102_sor_dp_vcpi(struct nvkm_ior *, int, u8, u8, u16, u16); + void g84_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gt215_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gf119_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); @@ -207,4 +209,6 @@ int gv100_sor_cnt(struct nvkm_disp *, unsigned long *); int gv100_sor_new(struct nvkm_disp *, int); int tu102_sor_new(struct nvkm_disp *, int); + +int ga102_sor_new(struct nvkm_disp *, int); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h index a677161c7f3a..db31b37752a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h @@ -86,6 +86,8 @@ void gv100_disp_intr(struct nv50_disp *); void gv100_disp_super(struct work_struct *); int gv100_disp_wndw_cnt(struct nvkm_disp *, unsigned long *); +int tu102_disp_init(struct nv50_disp *); + void nv50_disp_dptmds_war_2(struct nv50_disp *, struct dcb_output *); void nv50_disp_dptmds_war_3(struct nv50_disp *, struct dcb_output *); void nv50_disp_update_sppll1(struct nv50_disp *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c new file mode 100644 index 000000000000..9af07c3cf9fc --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c @@ -0,0 +1,52 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "rootnv50.h" +#include "channv50.h" + +#include + +static const struct nv50_disp_root_func +ga102_disp_root = { + .user = { + {{-1,-1,GV100_DISP_CAPS }, gv100_disp_caps_new }, + {{0,0,GA102_DISP_CURSOR }, gv100_disp_curs_new }, + {{0,0,GA102_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new }, + {{0,0,GA102_DISP_CORE_CHANNEL_DMA }, gv100_disp_core_new }, + {{0,0,GA102_DISP_WINDOW_CHANNEL_DMA }, gv100_disp_wndw_new }, + {} + }, +}; + +static int +ga102_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass, + void *data, u32 size, struct nvkm_object **pobject) +{ + return nv50_disp_root_new_(&ga102_disp_root, disp, oclass, data, size, pobject); +} + +const struct nvkm_disp_oclass +ga102_disp_root_oclass = { + .base.oclass = GA102_DISP, + .base.minver = -1, + .base.maxver = -1, + .ctor = ga102_disp_root_new, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h index 7070f5408d92..27bb170d0293 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h @@ -41,4 +41,5 @@ extern const struct nvkm_disp_oclass gp100_disp_root_oclass; extern const struct nvkm_disp_oclass gp102_disp_root_oclass; extern const struct nvkm_disp_oclass gv100_disp_root_oclass; extern const struct nvkm_disp_oclass tu102_disp_root_oclass; +extern const struct nvkm_disp_oclass ga102_disp_root_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c new file mode 100644 index 000000000000..033827de9116 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c @@ -0,0 +1,140 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ior.h" + +#include + +static int +ga102_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + const u32 soff = nv50_ior_base(sor); + const u32 loff = nv50_sor_link(sor); + u32 dpctrl = 0x00000000; + u32 clksor = 0x00000000; + + switch (sor->dp.bw) { + case 0x06: clksor |= 0x00000000; break; + case 0x0a: clksor |= 0x00040000; break; + case 0x14: clksor |= 0x00080000; break; + case 0x1e: clksor |= 0x000c0000; break; + default: + WARN_ON(1); + return -EINVAL; + } + + dpctrl |= ((1 << sor->dp.nr) - 1) << 16; + if (sor->dp.mst) + dpctrl |= 0x40000000; + if (sor->dp.ef) + dpctrl |= 0x00004000; + + nvkm_mask(device, 0x612300 + soff, 0x007c0000, clksor); + + /*XXX*/ + nvkm_msec(device, 40, NVKM_DELAY); + nvkm_mask(device, 0x612300 + soff, 0x00030000, 0x00010000); + nvkm_mask(device, 0x61c10c + loff, 0x00000003, 0x00000001); + + nvkm_mask(device, 0x61c10c + loff, 0x401f4000, dpctrl); + return 0; +} + +static void +ga102_sor_clock(struct nvkm_ior *sor) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + u32 div2 = 0; + if (sor->asy.proto == TMDS) { + if (sor->tmds.high_speed) + div2 = 1; + } + nvkm_wr32(device, 0x00ec08 + (sor->id * 0x10), 0x00000000); + nvkm_wr32(device, 0x00ec04 + (sor->id * 0x10), div2); +} + +static const struct nvkm_ior_func +ga102_sor_hda = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = ga102_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = ga102_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = tu102_sor_dp_vcpi, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, + .hda = { + .hpd = gf119_hda_hpd, + .eld = gf119_hda_eld, + .device_entry = gv100_hda_device_entry, + }, +}; + +static const struct nvkm_ior_func +ga102_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = ga102_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = ga102_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = tu102_sor_dp_vcpi, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, +}; + +int +ga102_sor_new(struct nvkm_disp *disp, int id) +{ + struct nvkm_device *device = disp->engine.subdev.device; + u32 hda = nvkm_rd32(device, 0x08a15c); + if (hda & BIT(id)) + return nvkm_ior_new_(&ga102_sor_hda, disp, SOR, id); + return nvkm_ior_new_(&ga102_sor, disp, SOR, id); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c index 59865a934c4b..0cf9e8752d25 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c @@ -23,7 +23,7 @@ #include -static void +void tu102_sor_dp_vcpi(struct nvkm_ior *sor, int head, u8 slot, u8 slot_nr, u16 pbn, u16 aligned) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c index 883ae4151ff8..4c85d1d4fbd4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c @@ -28,7 +28,7 @@ #include #include -static int +int tu102_disp_init(struct nv50_disp *disp) { struct nvkm_device *device = disp->base.engine.subdev.device; -- cgit 1.4.1 From dec822771b0174a01e72d7641d08e44461b6a82f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 14 Jan 2021 10:46:57 +0800 Subject: riscv: stacktrace: Move register keyword to beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using global sp_in_global directly to fix the following warning, arch/riscv/kernel/stacktrace.c:31:3: warning: ‘register’ is not at beginning of declaration [-Wold-style-declaration] 31 | const register unsigned long current_sp = sp_in_global; | ^~~~~ Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 48b870a685b3..df5d2da7c40b 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -14,7 +14,7 @@ #include -register unsigned long sp_in_global __asm__("sp"); +register const unsigned long sp_in_global __asm__("sp"); #ifdef CONFIG_FRAME_POINTER @@ -28,9 +28,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp = sp_in_global; fp = (unsigned long)__builtin_frame_address(0); - sp = current_sp; + sp = sp_in_global; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ -- cgit 1.4.1 From dca5244d2f5b94f1809f0c02a549edf41ccd5493 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:48:32 +0000 Subject: compiler.h: Raise minimum version of GCC to 5.1 for arm64 GCC versions >= 4.9 and < 5.1 have been shown to emit memory references beyond the stack pointer, resulting in memory corruption if an interrupt is taken after the stack pointer has been adjusted but before the reference has been executed. This leads to subtle, infrequent data corruption such as the EXT4 problems reported by Russell King at the link below. Life is too short for buggy compilers, so raise the minimum GCC version required by arm64 to 5.1. Reported-by: Russell King Suggested-by: Arnd Bergmann Signed-off-by: Will Deacon Tested-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Linus Torvalds Cc: Cc: Theodore Ts'o Cc: Florian Weimer Cc: Peter Zijlstra Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20210105154726.GD1551@shell.armlinux.org.uk Link: https://lore.kernel.org/r/20210112224832.10980-1-will@kernel.org Signed-off-by: Catalin Marinas --- include/linux/compiler-gcc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 74c6c0486eed..555ab0fddbef 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -13,6 +13,12 @@ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ #if GCC_VERSION < 40900 # error Sorry, your version of GCC is too old - please use 4.9 or newer. +#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100 +/* + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 + * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk + */ +# error Sorry, your version of GCC is too old - please use 5.1 or newer. #endif /* -- cgit 1.4.1 From b6d8878d24e39f213df0f3ea7abebd15edc7be21 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 14 Jan 2021 12:48:12 +0000 Subject: arm64: syscall: include prototype for EL0 SVC functions The kbuild test robot reports that when building with W=1, GCC will warn for a couple of missing prototypes in syscall.c: | arch/arm64/kernel/syscall.c:157:6: warning: no previous prototype for 'do_el0_svc' [-Wmissing-prototypes] | 157 | void do_el0_svc(struct pt_regs *regs) | | ^~~~~~~~~~ | arch/arm64/kernel/syscall.c:164:6: warning: no previous prototype for 'do_el0_svc_compat' [-Wmissing-prototypes] | 164 | void do_el0_svc_compat(struct pt_regs *regs) | | ^~~~~~~~~~~~~~~~~ While this isn't a functional problem, as a general policy we should include the prototype for functions wherever possible to catch any accidental divergence between the prototype and implementation. Here we can easily include , so let's do so. While there are a number of warnings elsewhere and some warnings enabled under W=1 are of questionable benefit, this change helps to make the code more robust as it evolved and reduces the noise somewhat, so it seems worthwhile. Signed-off-by: Mark Rutland Reported-by: kernel test robot Cc: Will Deacon Link: https://lore.kernel.org/r/202101141046.n8iPO3mw-lkp@intel.com Link: https://lore.kernel.org/r/20210114124812.17754-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/syscall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 0bfac95fe464..c2877c332f2d 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include -- cgit 1.4.1 From 3a57a643a851dbb1c4a1819394ca009e3bfa4813 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Jan 2021 18:31:44 +0000 Subject: arm64: selftests: Fix spelling of 'Mismatch' The SVE and FPSIMD stress tests have a spelling mistake in the output, fix it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210108183144.673-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fpsimd-test.S | 2 +- tools/testing/selftests/arm64/fp/sve-test.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 1c5556bdd11d..0dbd594c2747 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -457,7 +457,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f95074c9b48b..9210691aa998 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -625,7 +625,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" -- cgit 1.4.1 From f010505b78a4fa8d5b6480752566e7313fb5ca6e Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Fri, 15 Jan 2021 11:54:40 -0500 Subject: io_uring: flush timeouts that should already have expired Right now io_flush_timeouts() checks if the current number of events is equal to ->timeout.target_seq, but this will miss some timeouts if there have been more than 1 event added since the last time they were flushed (possible in io_submit_flush_completions(), for example). Fix it by recording the last sequence at which timeouts were flushed so that the number of events seen can be compared to the number of events needed without overflow. Signed-off-by: Marcelo Diop-Gonzalez Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 372be9caf340..06cc79d39586 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -354,6 +354,7 @@ struct io_ring_ctx { unsigned cq_entries; unsigned cq_mask; atomic_t cq_timeouts; + unsigned cq_last_tm_flush; unsigned long cq_check_overflow; struct wait_queue_head cq_wait; struct fasync_struct *cq_fasync; @@ -1639,19 +1640,38 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) static void io_flush_timeouts(struct io_ring_ctx *ctx) { - while (!list_empty(&ctx->timeout_list)) { + u32 seq; + + if (list_empty(&ctx->timeout_list)) + return; + + seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + + do { + u32 events_needed, events_got; struct io_kiocb *req = list_first_entry(&ctx->timeout_list, struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; - if (req->timeout.target_seq != ctx->cached_cq_tail - - atomic_read(&ctx->cq_timeouts)) + + /* + * Since seq can easily wrap around over time, subtract + * the last seq at which timeouts were flushed before comparing. + * Assuming not more than 2^31-1 events have happened since, + * these subtractions won't have wrapped, so we can check if + * target is in [last_seq, current_seq] by comparing the two. + */ + events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; + events_got = seq - ctx->cq_last_tm_flush; + if (events_got < events_needed) break; list_del_init(&req->timeout.list); io_kill_timeout(req); - } + } while (!list_empty(&ctx->timeout_list)); + + ctx->cq_last_tm_flush = seq; } static void io_commit_cqring(struct io_ring_ctx *ctx) @@ -5837,6 +5857,12 @@ static int io_timeout(struct io_kiocb *req) tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); req->timeout.target_seq = tail + off; + /* Update the last seq here in case io_flush_timeouts() hasn't. + * This is safe because ->completion_lock is held, and submissions + * and completions are never mixed in the same ->completion_lock section. + */ + ctx->cq_last_tm_flush = tail; + /* * Insertion sort, ensuring the first entry in the list is always * the one we need first. -- cgit 1.4.1 From 301f0203e04293c13372c032198665bd75adf81b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Dec 2020 15:41:19 -0300 Subject: perf bpf examples: Fix bpf.h header include directive in 5sec.c example It was looking at bpf/bpf.h, which caused this problem: # perf trace -e tools/perf/examples/bpf/5sec.c /home/acme/git/perf/tools/perf/examples/bpf/5sec.c:42:10: fatal error: 'bpf/bpf.h' file not found #include ^~~~~~~~~~~ 1 error generated. ERROR: unable to compile tools/perf/examples/bpf/5sec.c Hint: Check error message shown above. Hint: You can also pre-compile it into .o using: clang -target bpf -O2 -c tools/perf/examples/bpf/5sec.c with proper -I and -D options. event syntax error: 'tools/perf/examples/bpf/5sec.c' \___ Failed to load tools/perf/examples/bpf/5sec.c from source: Error when compiling BPF scriptlet # Change that to plain bpf.h, to make it work again: # perf trace -e tools/perf/examples/bpf/5sec.c sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) # perf trace -e tools/perf/examples/bpf/5sec.c/max-stack=16/ sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) hrtimer_nanosleep ([kernel.kallsyms]) common_nsleep ([kernel.kallsyms]) __x64_sys_clock_nanosleep ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __clock_nanosleep_2 (/usr/lib64/libc-2.32.so) # perf trace -e tools/perf/examples/bpf/5sec.c sleep 4s # Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/5sec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index 65c4ff6892d9..e6b6181c6dc6 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,7 +39,7 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo */ -#include +#include #define NSEC_PER_SEC 1000000000L -- cgit 1.4.1 From 38c53947a7dcb6d295769830c9085b0409921ec9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:26:08 -0300 Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: 647daca25d24fb6e ("KVM: SVM: Add support for booting APs in an SEV-ES guest") That don't cause any tooling change, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 886802b8ffba..374c67875cdb 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; -- cgit 1.4.1 From addbdff24293ef772a1b8e5d127b570e70f08cdc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:35:27 -0300 Subject: tools headers: Syncronize linux/build_bug.h with the kernel sources To pick up the changes in: 3a176b94609a18f5 ("Revert "kbuild: avoid static_assert for genksyms"") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/build_bug.h' differs from latest version at 'include/linux/build_bug.h' diff -u tools/include/linux/build_bug.h include/linux/build_bug.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/build_bug.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ce365d212768..cc7070c7439b 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,9 +79,4 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ -- cgit 1.4.1 From a042a82ddbb3434f523c0671f5301d1fe796b4eb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 14 Jan 2021 14:06:09 +0900 Subject: perf test: Fix shadow stat test for non-bash shells It was using some bash-specific features and failed to parse when running with a different shell like below: root@kbl-ppc:~/kbl-ws/perf-dev/lck-9077/acme.tmp/tools/perf# ./perf test 83 -vv 83: perf stat metrics (shadow stat) test : --- start --- test child forked, pid 3922 ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 45: ./tests/shell/stat+shadow_stat.sh: declare: not found test child finished with -1 ---- end ---- perf stat metrics (shadow stat) test: FAILED! Reported-by: Jin Yao Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Laight Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114050609.1258820-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index 249dfe48cf6a..ebebd3596cf9 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2 test_global_aggr() { - local cyc - perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ grep -e cycles -e instructions | \ while read num evt hash ipc rest do # skip not counted events - if [[ $num == "&1 | \ grep ^CPU | \ while read cpu num evt hash ipc rest do # skip not counted events - if [[ $num == " Date: Wed, 30 Dec 2020 11:38:27 +0800 Subject: ext4: use IS_ERR instead of IS_ERR_OR_NULL and set inode null when IS_ERR 1: ext4_iget/ext4_find_extent never returns NULL, use IS_ERR instead of IS_ERR_OR_NULL to fix this. 2: ext4_fc_replay_inode should set the inode to NULL when IS_ERR. and go to call iput properly. Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Signed-off-by: Yi Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201230033827.3996064-1-yili@winhong.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 4fcc21c25e79..6b5489273c85 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1318,14 +1318,14 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl) entry.len = darg.dname_len; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", darg.ino); return 0; } old_parent = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(old_parent)) { + if (IS_ERR(old_parent)) { jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); iput(inode); return 0; @@ -1410,7 +1410,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl) darg.parent_ino, darg.dname_len); inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1466,10 +1466,11 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) trace_ext4_fc_replay(sb, tag, ino, 0, 0); inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (!IS_ERR_OR_NULL(inode)) { + if (!IS_ERR(inode)) { ext4_ext_clear_bb(inode); iput(inode); } + inode = NULL; ext4_fc_record_modified_inode(sb, ino); @@ -1512,7 +1513,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return -EFSCORRUPTED; } @@ -1564,7 +1565,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) goto out; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "inode %d not found.", darg.ino); inode = NULL; ret = -EINVAL; @@ -1577,7 +1578,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) * dot and dot dot dirents are setup properly. */ dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(dir)) { + if (IS_ERR(dir)) { jbd_debug(1, "Dir %d not found.", darg.ino); goto out; } @@ -1653,7 +1654,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1777,7 +1778,7 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) le32_to_cpu(lrange->fc_ino), cur, remaining); inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino)); return 0; } @@ -1832,7 +1833,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) for (i = 0; i < state->fc_modified_inodes_used; i++) { inode = ext4_iget(sb, state->fc_modified_inodes[i], EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found.", state->fc_modified_inodes[i]); continue; @@ -1849,7 +1850,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) if (ret > 0) { path = ext4_find_extent(inode, map.m_lblk, NULL, 0); - if (!IS_ERR_OR_NULL(path)) { + if (!IS_ERR(path)) { for (j = 0; j < path->p_depth; j++) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 1); -- cgit 1.4.1 From 31e203e09f036f48e7c567c2d32df0196bbd303f Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Wed, 30 Dec 2020 18:48:51 +0900 Subject: ext4: fix wrong list_splice in ext4_fc_cleanup After full/fast commit, entries in staging queue are promoted to main queue. In ext4_fs_cleanup function, it splice to staging queue to staging queue. Fixes: aa75f4d3daaeb ("ext4: main fast-commit commit path") Signed-off-by: Daejun Park Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201230094851epcms2p6eeead8cc984379b37b2efd21af90fd1a@epcms2p6 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6b5489273c85..ec5316fbcd1d 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1268,7 +1268,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], &sbi->s_fc_dentry_q[FC_Q_MAIN]); list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], - &sbi->s_fc_q[FC_Q_STAGING]); + &sbi->s_fc_q[FC_Q_MAIN]); ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); -- cgit 1.4.1 From 6b4b8e6b4ad8553660421d6360678b3811d5deb9 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Tue, 5 Jan 2021 14:28:57 +0800 Subject: ext4: fix bug for rename with RENAME_WHITEOUT We got a "deleted inode referenced" warning cross our fsstress test. The bug can be reproduced easily with following steps: cd /dev/shm mkdir test/ fallocate -l 128M img mkfs.ext4 -b 1024 img mount img test/ dd if=/dev/zero of=test/foo bs=1M count=128 mkdir test/dir/ && cd test/dir/ for ((i=0;i<1000;i++)); do touch file$i; done # consume all block cd ~ && renameat2(AT_FDCWD, /dev/shm/test/dir/file1, AT_FDCWD, /dev/shm/test/dir/dst_file, RENAME_WHITEOUT) # ext4_add_entry in ext4_rename will return ENOSPC!! cd /dev/shm/ && umount test/ && mount img test/ && ls -li test/dir/file1 We will get the output: "ls: cannot access 'test/dir/file1': Structure needs cleaning" and the dmesg show: "EXT4-fs error (device loop0): ext4_lookup:1626: inode #2049: comm ls: deleted inode referenced: 139" ext4_rename will create a special inode for whiteout and use this 'ino' to replace the source file's dir entry 'ino'. Once error happens latter(the error above was the ENOSPC return from ext4_add_entry in ext4_rename since all space has been consumed), the cleanup do drop the nlink for whiteout, but forget to restore 'ino' with source file. This will trigger the bug describle as above. Signed-off-by: yangerkun Reviewed-by: Jan Kara Cc: stable@vger.kernel.org Fixes: cd808deced43 ("ext4: support RENAME_WHITEOUT") Link: https://lore.kernel.org/r/20210105062857.3566-1-yangerkun@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a3b28ef2455a..fa625a247e9a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3601,9 +3601,6 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, return retval2; } } - brelse(ent->bh); - ent->bh = NULL; - return retval; } @@ -3802,6 +3799,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } } + old_file_type = old.de->file_type; if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) ext4_handle_sync(handle); @@ -3829,7 +3827,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, force_reread = (new.dir->i_ino == old.dir->i_ino && ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); - old_file_type = old.de->file_type; if (whiteout) { /* * Do this before adding a new entry, so the old entry is sure @@ -3927,15 +3924,19 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, retval = 0; end_rename: - brelse(old.dir_bh); - brelse(old.bh); - brelse(new.bh); if (whiteout) { - if (retval) + if (retval) { + ext4_setent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); + } unlock_new_inode(whiteout); iput(whiteout); + } + brelse(old.dir_bh); + brelse(old.bh); + brelse(new.bh); if (handle) ext4_journal_stop(handle); return retval; -- cgit 1.4.1 From e9f53353e166a67dfe4f8295100f8ac39d6cf10b Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Wed, 6 Jan 2021 10:32:42 +0900 Subject: ext4: remove expensive flush on fast commit In the fast commit, it adds REQ_FUA and REQ_PREFLUSH on each fast commit block when barrier is enabled. However, in recovery phase, ext4 compares CRC value in the tail. So it is sufficient to add REQ_FUA and REQ_PREFLUSH on the block that has tail. Signed-off-by: Daejun Park Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20210106013242epcms2p5b6b4ed8ca86f29456fdf56aa580e74b4@epcms2p5 Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index ec5316fbcd1d..0a14a7c87bf8 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -604,13 +604,13 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star trace_ext4_fc_track_range(inode, start, end, ret); } -static void ext4_fc_submit_bh(struct super_block *sb) +static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) { int write_flags = REQ_SYNC; struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; - /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */ - if (test_opt(sb, BARRIER)) + /* Add REQ_FUA | REQ_PREFLUSH only its tail */ + if (test_opt(sb, BARRIER) && is_tail) write_flags |= REQ_FUA | REQ_PREFLUSH; lock_buffer(bh); set_buffer_dirty(bh); @@ -684,7 +684,7 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); if (pad_len > 0) ext4_fc_memzero(sb, tl + 1, pad_len, crc); - ext4_fc_submit_bh(sb); + ext4_fc_submit_bh(sb, false); ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); if (ret) @@ -741,7 +741,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) tail.fc_crc = cpu_to_le32(crc); ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); - ext4_fc_submit_bh(sb); + ext4_fc_submit_bh(sb, true); return 0; } -- cgit 1.4.1 From be82fddca81eefd1edbd9b290dfcb2177e24785b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 13:23:04 -0800 Subject: libperf tests: Avoid uninitialized variable warning The variable 'bf' is read (for a write call) without being initialized triggering a memory sanitizer warning. Use 'bf' in the read and switch the write to reading from a string. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114212304.4018119-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 6d8ebe0c2504..1b225fe34a72 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -208,7 +208,6 @@ static int test_mmap_thread(void) char path[PATH_MAX]; int id, err, pid, go_pipe[2]; union perf_event *event; - char bf; int count = 0; snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", @@ -229,6 +228,7 @@ static int test_mmap_thread(void) pid = fork(); if (!pid) { int i; + char bf; read(go_pipe[0], &bf, 1); @@ -266,7 +266,7 @@ static int test_mmap_thread(void) perf_evlist__enable(evlist); /* kick the child and wait for it to finish */ - write(go_pipe[1], &bf, 1); + write(go_pipe[1], "A", 1); waitpid(pid, NULL, 0); /* -- cgit 1.4.1 From bba2ea17ef553aea0df80cb64399fe2f70f225dd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:49 -0800 Subject: libperf tests: If a test fails return non-zero If a test fails return -1 rather than 0. This is consistent with the return value in test-cpumap.c Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-cpumap.c | 2 +- tools/lib/perf/tests/test-evlist.c | 2 +- tools/lib/perf/tests/test-evsel.c | 2 +- tools/lib/perf/tests/test-threadmap.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c8d45091e7c2..c70e9e03af3e 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_cpu_map__put(cpus); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 1b225fe34a72..220a95be47c3 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -409,5 +409,5 @@ int main(int argc, char **argv) test_mmap_cpus(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 135722ac965b..0ad82d7a2a51 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -131,5 +131,5 @@ int main(int argc, char **argv) test_stat_thread_enable(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 7dc4d6fbedde..384471441b48 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_thread_map__put(threads); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } -- cgit 1.4.1 From 66dd86b2a2bee129c70f7ff054d3a6a2e5f8eb20 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:50 -0800 Subject: libperf tests: Fail when failing to get a tracepoint id Permissions are necessary to get a tracepoint id. Fail the test when the read fails. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 220a95be47c3..e2ac0b7f432e 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -214,6 +214,7 @@ static int test_mmap_thread(void) sysfs__mountpoint()); if (filename__read_int(path, &id)) { + tests_failed++; fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); return -1; } -- cgit 1.4.1 From 3ff1e7180abc7f6db413933c110df69157216715 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:38 +0900 Subject: perf stat: Introduce struct runtime_stat_data To pass more info to the saved_value in the runtime_stat, add a new struct runtime_stat_data. Currently it only has 'ctx' field but later patch will add more. Note that we intentionally pass 0 as ctx to clock-related events for compatibility. It was already there in a few places. So move the code into the saved_value_lookup() explicitly and add a comment. Suggested-by: Andi Kleen Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 346 +++++++++++++++++++++--------------------- 1 file changed, 173 insertions(+), 173 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 901265127e36..a1565b6e38f2 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -114,6 +114,10 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, rblist = &st->value_list; + /* don't use context info for clock events */ + if (type == STAT_NSECS) + dm.ctx = 0; + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); @@ -191,12 +195,17 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) reset_stat(st); } +struct runtime_stat_data { + int ctx; +}; + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int ctx, int cpu, u64 count) + int cpu, u64 count, + struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, - type, ctx, st); + struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + rsd->ctx, st); if (v) update_stats(&v->stats, count); @@ -210,73 +219,75 @@ static void update_runtime_stat(struct runtime_stat *st, void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, int cpu, struct runtime_stat *st) { - int ctx = evsel_context(counter); u64 count_ns = count; struct saved_value *v; + struct runtime_stat_data rsd = { + .ctx = evsel_context(counter), + }; count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); + update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); + update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); + update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); + update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); + update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); + update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, ctx, cpu, count); + update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); @@ -422,11 +433,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -434,11 +446,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -446,16 +459,15 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -470,16 +482,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -490,17 +501,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -511,18 +520,15 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -533,18 +539,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -554,17 +557,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -574,17 +575,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -594,17 +593,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -662,56 +659,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) +static double td_total_slots(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); } -static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) +static double td_bad_spec(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); - total_slots = td_total_slots(ctx, cpu, st); + total_slots = td_total_slots(cpu, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu, struct runtime_stat *st) +static double td_retiring(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu); + cpu, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_fe_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu); + cpu, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_be_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(ctx, cpu, st) + - td_bad_spec(ctx, cpu, st) + - td_retiring(ctx, cpu, st)); + double sum = (td_fe_bound(cpu, st, rsd) + + td_bad_spec(cpu, st, rsd) + + td_retiring(cpu, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -722,15 +724,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int ctx, int cpu, - enum stat_type type, - struct runtime_stat *stat) +static double td_metric_ratio(int cpu, enum stat_type type, + struct runtime_stat *stat, + struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu); - double d = runtime_stat_avg(stat, type, ctx, cpu); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); + double d = runtime_stat_avg(stat, type, cpu, rsd); if (sum) return d / sum; @@ -742,34 +744,33 @@ static double td_metric_ratio(int ctx, int cpu, * We allow two missing. */ -static bool full_td(int ctx, int cpu, - struct runtime_stat *stat) +static bool full_td(int cpu, struct runtime_stat *stat, + struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, - int cpu, struct evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, int cpu, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double smi_num, aperf, cycles, cost = 0.0; - int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); - aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); - cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -930,12 +931,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; - int ctx = evsel_context(evsel); + struct runtime_stat_data rsd = { + .ctx = evsel_context(evsel), + }; struct metric_event *me; int num = 1; if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) { ratio = avg / total; @@ -945,12 +948,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu)); + cpu, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -960,8 +962,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) + print_branch_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -970,8 +972,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) + print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -980,8 +982,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) + print_l1_icache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -990,8 +992,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) + print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1000,8 +1002,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) + print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1010,27 +1012,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) + print_ll_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) { ratio = avg / total; @@ -1039,7 +1041,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1049,8 +1051,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (total2 < avg) total2 = avg; @@ -1060,21 +1062,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; @@ -1087,28 +1087,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu, st); + double fe_bound = td_fe_bound(cpu, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu, st); + double retiring = td_retiring(cpu, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu, st); + double bad_spec = td_bad_spec(cpu, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu, st); + double be_bound = td_be_bound(cpu, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1121,43 +1121,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu, st) > 0) + if (td_total_slots(cpu, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(ctx, cpu, st)) { - double retiring = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_RETIRING, st); - + full_td(cpu, st, &rsd)) { + double retiring = td_metric_ratio(cpu, + STAT_TOPDOWN_RETIRING, st, + &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(ctx, cpu, st)) { - double fe_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_FE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_FE_BOUND, st, + &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(ctx, cpu, st)) { - double be_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double be_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_BE_BOUND, st, + &rsd); if (be_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(ctx, cpu, st)) { - double bad_spec = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BAD_SPEC, st); - + full_td(cpu, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu, + STAT_TOPDOWN_BAD_SPEC, st, + &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", @@ -1165,11 +1165,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { + } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { char unit = 'M'; char unit_buf[10]; - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) ratio = 1000.0 * avg / total; @@ -1180,7 +1180,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, evsel, out, st); + print_smi_cost(config, cpu, out, st, &rsd); } else { num = 0; } -- cgit 1.4.1 From a1bf23052bdfe30ec3c693cf32feb2d79114ac16 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:39 +0900 Subject: perf stat: Take cgroups into account for shadow stats As of now it doesn't consider cgroups when collecting shadow stats and metrics so counter values from different cgroups will be saved in a same slot. This resulted in incorrect numbers when those cgroups have different workloads. For example, let's look at the scenario below: cgroups A and C runs same workload which burns a cpu while cgroup B runs a light workload. $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 3,958,116,522 cycles A 6,722,650,929 instructions A # 2.53 insn per cycle 1,132,741 cycles B 571,743 instructions B # 0.00 insn per cycle 4,007,799,935 cycles C 6,793,181,523 instructions C # 2.56 insn per cycle 1.001050869 seconds time elapsed When I run 'perf stat' with single workload, it usually shows IPC around 1.7. We can verify it (6,722,650,929.0 / 3,958,116,522 = 1.698) for cgroup A. But in this case, since cgroups are ignored, cycles are averaged so it used the lower value for IPC calculation and resulted in around 2.5. avg cycle: (3958116522 + 1132741 + 4007799935) / 3 = 2655683066 IPC (A) : 6722650929 / 2655683066 = 2.531 IPC (B) : 571743 / 2655683066 = 0.0002 IPC (C) : 6793181523 / 2655683066 = 2.557 We can simply compare cgroup pointers in the evsel and it'll be NULL when cgroups are not specified. With this patch, I can see correct numbers like below: $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 4,171,051,687 cycles A 7,219,793,922 instructions A # 1.73 insn per cycle 1,051,189 cycles B 583,102 instructions B # 0.55 insn per cycle 4,171,124,710 cycles C 7,192,944,580 instructions C # 1.72 insn per cycle 1.007909814 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a1565b6e38f2..12eafd12a693 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "expr.h" #include "metricgroup.h" +#include "cgroup.h" #include /* @@ -28,6 +29,7 @@ struct saved_value { enum stat_type type; int ctx; int cpu; + struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; u64 metric_total; @@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->ctx != b->ctx) return a->ctx - b->ctx; + if (a->cgrp != b->cgrp) + return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1; + if (a->evsel == NULL && b->evsel == NULL) { if (a->stat == b->stat) return 0; @@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, bool create, enum stat_type type, int ctx, - struct runtime_stat *st) + struct runtime_stat *st, + struct cgroup *cgrp) { struct rblist *rblist; struct rb_node *nd; @@ -110,6 +116,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, .type = type, .ctx = ctx, .stat = st, + .cgrp = cgrp, }; rblist = &st->value_list; @@ -197,6 +204,7 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) struct runtime_stat_data { int ctx; + struct cgroup *cgrp; }; static void update_runtime_stat(struct runtime_stat *st, @@ -205,7 +213,7 @@ static void update_runtime_stat(struct runtime_stat *st, struct runtime_stat_data *rsd) { struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, - rsd->ctx, st); + rsd->ctx, st, rsd->cgrp); if (v) update_stats(&v->stats, count); @@ -223,6 +231,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, struct saved_value *v; struct runtime_stat_data rsd = { .ctx = evsel_context(counter), + .cgrp = counter->cgrp, }; count *= counter->scale; @@ -290,13 +299,14 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st); + cpu, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -438,7 +448,7 @@ static double runtime_stat_avg(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -451,7 +461,7 @@ static double runtime_stat_n(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -805,7 +815,8 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, st); + STAT_NONE, 0, st, + metric_events[i]->cgrp); if (!v) break; stats = &v->stats; @@ -933,6 +944,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, const char *color = NULL; struct runtime_stat_data rsd = { .ctx = evsel_context(evsel), + .cgrp = evsel->cgrp, }; struct metric_event *me; int num = 1; -- cgit 1.4.1 From 5501e9229a80d95a1ea68609f44c447a75d23ed5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 7 Jan 2021 19:41:59 +0200 Subject: perf intel-pt: Fix 'CPU too large' error In some cases, the number of cpus (nr_cpus_online) is confused with the maximum cpu number (nr_cpus_avail), which results in the error in the example below: Example on system with 8 cpus: Before: # echo 0 > /sys/devices/system/cpu/cpu2/online # ./perf record --kcore -e intel_pt// taskset --cpu-list 7 uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.147 MB perf.data ] # ./perf script --itrace=e Requested CPU 7 too large. Consider raising MAX_NR_CPUS 0x25908 [0x8]: failed to process type: 68 [Invalid argument] After: # ./perf script --itrace=e # Fixes: 8c7274691f0d ("perf machine: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Fixes: 7df4e36a4785 ("perf session: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Signed-off-by: Adrian Hunter Tested-by: Kan Liang Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210107174159.24897-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 ++-- tools/perf/util/session.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f841f3503cae..1e9d3f982b47 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2980,7 +2980,7 @@ int machines__for_each_thread(struct machines *machines, pid_t machine__get_current_tid(struct machine *machine, int cpu) { - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid) return -1; @@ -2992,7 +2992,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid) { struct thread *thread; - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0) return -EINVAL; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 50ff9795a4f1..25adbcce0281 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2404,7 +2404,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); for (i = 0; i < PERF_TYPE_MAX; ++i) { struct evsel *evsel; -- cgit 1.4.1 From 648b054a4647cd62e13ba79f398b8b97a7c82b19 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Tue, 24 Nov 2020 19:58:17 +0000 Subject: perf inject: Correct event attribute sizes When 'perf inject' reads a perf.data file from an older version of perf, it writes event attributes into the output with the original size field, but lays them out as if they had the size currently used. Readers see a corrupt file. Update the size field to match the layout. Signed-off-by: Al Grant Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201124195818.30603-1-al.grant@arm.com Signed-off-by: Denis Nikitin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 062383e225a3..c4ed3dc2c8f4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session, attr_offset = lseek(ff.fd, 0, SEEK_CUR); evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.size < sizeof(evsel->core.attr)) { + /* + * We are likely in "perf inject" and have read + * from an older file. Update attr size so that + * reader gets the right offset to the ids. + */ + evsel->core.attr.size = sizeof(evsel->core.attr); + } f_attr = (struct perf_file_attr){ .attr = evsel->core.attr, .ids = { -- cgit 1.4.1 From a8d13dbccb137c46fead2ec1a4f1fbc8cfc9ea91 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Jan 2021 16:04:23 -0700 Subject: io_uring: ensure finish_wait() is always called in __io_uring_task_cancel() If we enter with requests pending and performm cancelations, we'll have a different inflight count before and after calling prepare_to_wait(). This causes the loop to restart. If we actually ended up canceling everything, or everything completed in-between, then we'll break out of the loop without calling finish_wait() on the waitqueue. This can trigger a warning on exit_signals(), as we leave the task state in TASK_UNINTERRUPTIBLE. Put a finish_wait() after the loop to catch that case. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 06cc79d39586..985a9e3f976d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9101,6 +9101,7 @@ void __io_uring_task_cancel(void) finish_wait(&tctx->wait, &wait); } while (1); + finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); io_uring_remove_task_files(tctx); -- cgit 1.4.1 From a959a9782fa87669feeed095ced5d78181a7c02d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Jan 2021 18:19:26 +0100 Subject: iov_iter: fix the uaccess area in copy_compat_iovec_from_user sizeof needs to be called on the compat pointer, not the native one. Fixes: 89cd35c58bc2 ("iov_iter: transparently handle compat iovecs in import_iovec") Reported-by: David Laight Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1635111c5bd2..a21e6a5792c5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1658,7 +1658,7 @@ static int copy_compat_iovec_from_user(struct iovec *iov, (const struct compat_iovec __user *)uvec; int ret = -EFAULT, i; - if (!user_access_begin(uvec, nr_segs * sizeof(*uvec))) + if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; for (i = 0; i < nr_segs; i++) { -- cgit 1.4.1 From 797f0375dd2ef5cdc68ac23450cbae9a5c67a74e Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:01 -0800 Subject: RISC-V: Do not allocate memblock while iterating reserved memblocks Currently, resource tree allocates memory blocks while iterating on the list. It leads to following kernel warning because memblock allocation also invokes memory block reservation API. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 __insert_resource+0x8e/0xd0 [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-00022-ge20097fb37e2-dirty #549 [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff This is also unnecessary as we can pre-compute the total memblocks required for each memory region and allocate it before the loop. It save precious boot time not going through memblock allocation code every time. Fixes: 00ab027a3b82 ("RISC-V: Add kernel image sections to the resource tree") Reviewed-by: Anup Patel Tested-by: Geert Uytterhoeven Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 1d85e9bf783c..3fa3f26dde85 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -127,7 +127,9 @@ static void __init init_resources(void) { struct memblock_region *region = NULL; struct resource *res = NULL; - int ret = 0; + struct resource *mem_res = NULL; + size_t mem_res_sz = 0; + int ret = 0, i = 0; code_res.start = __pa_symbol(_text); code_res.end = __pa_symbol(_etext) - 1; @@ -145,16 +147,17 @@ static void __init init_resources(void) bss_res.end = __pa_symbol(__bss_stop) - 1; bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res); + mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); + if (!mem_res) + panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); /* * Start by adding the reserved regions, if they overlap * with /memory regions, insert_resource later on will take * care of it. */ for_each_reserved_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; res->name = "Reserved"; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; @@ -171,8 +174,10 @@ static void __init init_resources(void) * Ignore any other reserved regions within * system memory. */ - if (memblock_is_memory(res->start)) + if (memblock_is_memory(res->start)) { + memblock_free((phys_addr_t) res, sizeof(struct resource)); continue; + } ret = add_resource(&iomem_resource, res); if (ret < 0) @@ -181,10 +186,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -205,9 +207,9 @@ static void __init init_resources(void) return; error: - memblock_free((phys_addr_t) res, sizeof(struct resource)); /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); + memblock_free((phys_addr_t) mem_res, mem_res_sz); } -- cgit 1.4.1 From abb8e86b269604e906a6a4af7a09f04b72dbb862 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:02 -0800 Subject: RISC-V: Set current memblock limit Currently, linux kernel can not use last 4k bytes of addressable space because IS_ERR_VALUE macro treats those as an error. This will be an issue for RV32 as any memblock allocator potentially allocate chunk of memory from the end of DRAM (2GB) leading bad address error even though the address was technically valid. Fix this issue by limiting the memblock if available memory spans the entire address space. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index bf5379135e39..7cd4993f4ff2 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -157,9 +157,10 @@ disable: void __init setup_bootmem(void) { phys_addr_t mem_start = 0; - phys_addr_t start, end = 0; + phys_addr_t start, dram_end, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); + phys_addr_t max_mapped_addr = __pa(~(ulong)0); u64 i; /* Find the memory region containing the kernel */ @@ -181,7 +182,18 @@ void __init setup_bootmem(void) /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + dram_end = memblock_end_of_DRAM(); + + /* + * memblock allocator is not aware of the fact that last 4K bytes of + * the addressable memory can not be mapped because of IS_ERR_VALUE + * macro. Make sure that last 4k bytes are not usable by memblock + * if end of dram is equal to maximum addressable memory. + */ + if (max_mapped_addr == (dram_end - 1)) + memblock_set_current_limit(max_mapped_addr - 4096); + + max_pfn = PFN_DOWN(dram_end); max_low_pfn = max_pfn; dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn); -- cgit 1.4.1 From e557793799c5a8406afb08aa170509619f7eac36 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:04 -0800 Subject: RISC-V: Fix maximum allowed phsyical memory for RV32 Linux kernel can only map 1GB of address space for RV32 as the page offset is set to 0xC0000000. The current description in the Kconfig is confusing as it indicates that RV32 can support 2GB of physical memory. That is simply not true for current kernel. In future, a 2GB split support can be added to allow 2GB physical address space. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 81b76d44725d..e9e2c1f0a690 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -137,7 +137,7 @@ config PA_BITS config PAGE_OFFSET hex - default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB + default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB default 0x80000000 if 64BIT && !MMU default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB @@ -247,10 +247,12 @@ config MODULE_SECTIONS choice prompt "Maximum Physical Memory" - default MAXPHYSMEM_2GB if 32BIT + default MAXPHYSMEM_1GB if 32BIT default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY + config MAXPHYSMEM_1GB + bool "1GiB" config MAXPHYSMEM_2GB bool "2GiB" config MAXPHYSMEM_128GB -- cgit 1.4.1 From 29a951dfb3c3263c3a0f3bd9f7f2c2cfde4baedb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 8 Jan 2021 13:13:41 -0800 Subject: mm: fix clear_refs_write locking Turning page table entries read-only requires the mmap_sem held for writing. So stop doing the odd games with turning things from read locks to write locks and back. Just get the write lock. Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee5a235b3056..ab7d700b2caa 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1215,41 +1215,26 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .type = type, }; + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } if (type == CLEAR_REFS_MM_HIWATER_RSS) { - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - mmap_write_unlock(mm); - goto out_mm; + goto out_unlock; } - if (mmap_read_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; - mmap_read_unlock(mm); - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) { - vma->vm_flags &= ~VM_SOFTDIRTY; - vma_set_page_prot(vma); - } - mmap_write_downgrade(mm); - break; + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); } mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, @@ -1261,7 +1246,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, 0, -1); - mmap_read_unlock(mm); +out_unlock: + mmap_write_unlock(mm); out_mm: mmput(mm); } -- cgit 1.4.1 From 9348b73c2e1bfea74ccd4a44fb4ccc7276ab9623 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 Jan 2021 17:09:10 -0800 Subject: mm: don't play games with pinned pages in clear_page_refs Turning a pinned page read-only breaks the pinning after COW. Don't do it. The whole "track page soft dirty" state doesn't work with pinned pages anyway, since the page might be dirtied by the pinning entity without ever being noticed in the page tables. Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ab7d700b2caa..602e3a52884d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1035,6 +1035,25 @@ struct clear_refs_private { }; #ifdef CONFIG_MEM_SOFT_DIRTY + +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) + +static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +{ + struct page *page; + + if (!pte_write(pte)) + return false; + if (!is_cow_mapping(vma->vm_flags)) + return false; + if (likely(!atomic_read(&vma->vm_mm->has_pinned))) + return false; + page = vm_normal_page(vma, addr, pte); + if (!page) + return false; + return page_maybe_dma_pinned(page); +} + static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { @@ -1049,6 +1068,8 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, if (pte_present(ptent)) { pte_t old_pte; + if (pte_is_pinned(vma, addr, ptent)) + return; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); -- cgit 1.4.1 From d36a1dd9f77ae1e72da48f4123ed35627848507d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Jan 2021 14:43:46 -0500 Subject: dump_common_audit_data(): fix racy accesses to ->d_name We are not guaranteed the locking environment that would prevent dentry getting renamed right under us. And it's possible for old long name to be freed after rename, leading to UAF here. Cc: stable@kernel.org # v2.6.2+ Signed-off-by: Al Viro --- security/lsm_audit.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 7d8026f3f377..a0cd28cd31a8 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -275,7 +275,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, struct inode *inode; audit_log_format(ab, " name="); + spin_lock(&a->u.dentry->d_lock); audit_log_untrustedstring(ab, a->u.dentry->d_name.name); + spin_unlock(&a->u.dentry->d_lock); inode = d_backing_inode(a->u.dentry); if (inode) { @@ -293,8 +295,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, dentry = d_find_alias(inode); if (dentry) { audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, - dentry->d_name.name); + spin_lock(&dentry->d_lock); + audit_log_untrustedstring(ab, dentry->d_name.name); + spin_unlock(&dentry->d_lock); dput(dentry); } audit_log_format(ab, " dev="); -- cgit 1.4.1 From feb889fb40fafc6933339cf1cca8f770126819fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Jan 2021 15:34:57 -0800 Subject: mm: don't put pinned pages into the swap cache So technically there is nothing wrong with adding a pinned page to the swap cache, but the pinning obviously means that the page can't actually be free'd right now anyway, so it's a bit pointless. However, the real problem is not with it being a bit pointless: the real issue is that after we've added it to the swap cache, we'll try to unmap the page. That will succeed, because the code in mm/rmap.c doesn't know or care about pinned pages. Even the unmapping isn't fatal per se, since the page will stay around in memory due to the pinning, and we do hold the connection to it using the swap cache. But when we then touch it next and take a page fault, the logic in do_swap_page() will map it back into the process as a possibly read-only page, and we'll then break the page association on the next COW fault. Honestly, this issue could have been fixed in any of those other places: (a) we could refuse to unmap a pinned page (which makes conceptual sense), or (b) we could make sure to re-map a pinned page writably in do_swap_page(), or (c) we could just make do_wp_page() not COW the pinned page (which was what we historically did before that "mm: do_wp_page() simplification" commit). But while all of them are equally valid models for breaking this chain, not putting pinned pages into the swap cache in the first place is the simplest one by far. It's also the safest one: the reason why do_wp_page() was changed in the first place was that getting the "can I re-use this page" wrong is so fraught with errors. If you do it wrong, you end up with an incorrectly shared page. As a result, using "page_maybe_dma_pinned()" in either do_wp_page() or do_swap_page() would be a serious bug since it is only a (very good) heuristic. Re-using the page requires a hard black-and-white rule with no room for ambiguity. In contrast, saying "this page is very likely dma pinned, so let's not add it to the swap cache and try to unmap it" is an obviously safe thing to do, and if the heuristic might very rarely be a false positive, no harm is done. Fixes: 09854ba94c6a ("mm: do_wp_page() simplification") Reported-and-tested-by: Martin Raiber Cc: Pavel Begunkov Cc: Jens Axboe Cc: Peter Xu Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 257cba79a96d..b1b574ad199d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1238,6 +1238,8 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; + if (page_maybe_dma_pinned(page)) + goto keep_locked; if (PageTransHuge(page)) { /* cannot split THP, skip it */ if (!can_split_huge_page(page, NULL)) -- cgit 1.4.1 From 19c329f6808995b142b3966301f217c831e7cf31 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Jan 2021 16:37:05 -0800 Subject: Linux 5.11-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9e73f82e0d86..b0e4767735dc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- cgit 1.4.1 From d2006bb79411c56b7bb6173f753a0ba6172d1fe6 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Jan 2021 10:24:47 +0100 Subject: watchdog: pcwd: drop always-false if from remove callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If pcwd_isa_probe() succeeded pcwd_private.io_addr cannot be NULL. (And if pcwd_isa_probe() failed, pcwd_isa_remove() isn't called.) Acked-by: Guenter Roeck Acked-by: William Breathitt Gray Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210122092449.426097-2-uwe@kleine-koenig.org Signed-off-by: Takashi Iwai --- drivers/watchdog/pcwd.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index e86fa7f8351d..b95cd38f3ceb 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -956,9 +956,6 @@ static int pcwd_isa_remove(struct device *dev, unsigned int id) if (debug >= DEBUG) pr_debug("pcwd_isa_remove id=%d\n", id); - if (!pcwd_private.io_addr) - return 1; - /* Disable the board */ if (!nowayout) pcwd_stop(); -- cgit 1.4.1 From 7797b4e00faf3229ef67ac37015dd2ee8e4c7901 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Jan 2021 10:24:48 +0100 Subject: media/radio: Make radio_isa_common_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of an unconditional return 0, return no value. One of the two callers ignored the return value already before. This simplifies the next patch. Signed-off-by: Uwe Kleine-König Reviewed-by: Hans Verkuil Link: https://lore.kernel.org/r/20210122092449.426097-3-uwe@kleine-koenig.org Signed-off-by: Takashi Iwai --- drivers/media/radio/radio-isa.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/media/radio/radio-isa.c b/drivers/media/radio/radio-isa.c index ad2ac16ff12d..527f4c3b0ca4 100644 --- a/drivers/media/radio/radio-isa.c +++ b/drivers/media/radio/radio-isa.c @@ -273,8 +273,8 @@ err_dev_reg: return res; } -static int radio_isa_common_remove(struct radio_isa_card *isa, - unsigned region_size) +static void radio_isa_common_remove(struct radio_isa_card *isa, + unsigned region_size) { const struct radio_isa_ops *ops = isa->drv->ops; @@ -285,7 +285,6 @@ static int radio_isa_common_remove(struct radio_isa_card *isa, release_region(isa->io, region_size); v4l2_info(&isa->v4l2_dev, "Removed radio card %s\n", isa->drv->card); kfree(isa); - return 0; } int radio_isa_probe(struct device *pdev, unsigned int dev) @@ -342,7 +341,9 @@ int radio_isa_remove(struct device *pdev, unsigned int dev) { struct radio_isa_card *isa = dev_get_drvdata(pdev); - return radio_isa_common_remove(isa, isa->drv->region_size); + radio_isa_common_remove(isa, isa->drv->region_size); + + return 0; } EXPORT_SYMBOL_GPL(radio_isa_remove); -- cgit 1.4.1 From 30e88d017fcbeb50c4b07577fe059558361067e7 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 22 Jan 2021 10:24:49 +0100 Subject: isa: Make the remove callback for isa drivers return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver core ignores the return value of the remove callback, so don't give isa drivers the chance to provide a value. Adapt all isa_drivers with a remove callbacks accordingly; they all return 0 unconditionally anyhow. Acked-by: Marc Kleine-Budde # for drivers/net/can/sja1000/tscan1.c Acked-by: William Breathitt Gray Acked-by: Wolfram Sang # for drivers/i2c/ Reviewed-by: Takashi Iway # for sound/ Reviewed-by: Hans Verkuil # for drivers/media/ Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20210122092449.426097-4-uwe@kleine-koenig.org Signed-off-by: Takashi Iwai --- drivers/base/isa.c | 2 +- drivers/i2c/busses/i2c-elektor.c | 4 +--- drivers/i2c/busses/i2c-pca-isa.c | 4 +--- drivers/input/touchscreen/htcpen.c | 4 +--- drivers/media/radio/radio-isa.c | 4 +--- drivers/media/radio/radio-isa.h | 2 +- drivers/media/radio/radio-sf16fmr2.c | 4 +--- drivers/net/can/sja1000/tscan1.c | 4 +--- drivers/net/ethernet/3com/3c509.c | 3 +-- drivers/scsi/advansys.c | 3 +-- drivers/scsi/aha1542.c | 3 +-- drivers/scsi/fdomain_isa.c | 3 +-- drivers/scsi/g_NCR5380.c | 5 ++--- drivers/watchdog/pcwd.c | 4 +--- include/linux/isa.h | 2 +- sound/isa/ad1848/ad1848.c | 3 +-- sound/isa/adlib.c | 3 +-- sound/isa/cmi8328.c | 3 +-- sound/isa/cmi8330.c | 3 +-- sound/isa/cs423x/cs4231.c | 3 +-- sound/isa/cs423x/cs4236.c | 3 +-- sound/isa/es1688/es1688.c | 3 +-- sound/isa/es18xx.c | 5 ++--- sound/isa/galaxy/galaxy.c | 3 +-- sound/isa/gus/gusclassic.c | 3 +-- sound/isa/gus/gusextreme.c | 3 +-- sound/isa/gus/gusmax.c | 3 +-- sound/isa/gus/interwave.c | 3 +-- sound/isa/msnd/msnd_pinnacle.c | 3 +-- sound/isa/opl3sa2.c | 3 +-- sound/isa/opti9xx/miro.c | 3 +-- sound/isa/opti9xx/opti92x-ad1848.c | 5 ++--- sound/isa/sb/jazz16.c | 3 +-- sound/isa/sb/sb16.c | 3 +-- sound/isa/sb/sb8.c | 3 +-- sound/isa/sc6000.c | 3 +-- sound/isa/sscape.c | 3 +-- sound/isa/wavefront/wavefront.c | 3 +-- 38 files changed, 41 insertions(+), 83 deletions(-) diff --git a/drivers/base/isa.c b/drivers/base/isa.c index 2772f5d1948a..aa4737667026 100644 --- a/drivers/base/isa.c +++ b/drivers/base/isa.c @@ -51,7 +51,7 @@ static int isa_bus_remove(struct device *dev) struct isa_driver *isa_driver = dev->platform_data; if (isa_driver && isa_driver->remove) - return isa_driver->remove(dev, to_isa_dev(dev)->id); + isa_driver->remove(dev, to_isa_dev(dev)->id); return 0; } diff --git a/drivers/i2c/busses/i2c-elektor.c b/drivers/i2c/busses/i2c-elektor.c index 140426db28df..b72a3c3ef2ab 100644 --- a/drivers/i2c/busses/i2c-elektor.c +++ b/drivers/i2c/busses/i2c-elektor.c @@ -282,7 +282,7 @@ static int elektor_probe(struct device *dev, unsigned int id) return -ENODEV; } -static int elektor_remove(struct device *dev, unsigned int id) +static void elektor_remove(struct device *dev, unsigned int id) { i2c_del_adapter(&pcf_isa_ops); @@ -298,8 +298,6 @@ static int elektor_remove(struct device *dev, unsigned int id) iounmap(base_iomem); release_mem_region(base, 2); } - - return 0; } static struct isa_driver i2c_elektor_driver = { diff --git a/drivers/i2c/busses/i2c-pca-isa.c b/drivers/i2c/busses/i2c-pca-isa.c index f27bc1e55385..85e8cf58e8bf 100644 --- a/drivers/i2c/busses/i2c-pca-isa.c +++ b/drivers/i2c/busses/i2c-pca-isa.c @@ -161,7 +161,7 @@ static int pca_isa_probe(struct device *dev, unsigned int id) return -ENODEV; } -static int pca_isa_remove(struct device *dev, unsigned int id) +static void pca_isa_remove(struct device *dev, unsigned int id) { i2c_del_adapter(&pca_isa_ops); @@ -170,8 +170,6 @@ static int pca_isa_remove(struct device *dev, unsigned int id) free_irq(irq, &pca_isa_ops); } release_region(base, IO_SIZE); - - return 0; } static struct isa_driver pca_isa_driver = { diff --git a/drivers/input/touchscreen/htcpen.c b/drivers/input/touchscreen/htcpen.c index 2f261a34f9c2..056ba76087e8 100644 --- a/drivers/input/touchscreen/htcpen.c +++ b/drivers/input/touchscreen/htcpen.c @@ -171,7 +171,7 @@ static int htcpen_isa_probe(struct device *dev, unsigned int id) return err; } -static int htcpen_isa_remove(struct device *dev, unsigned int id) +static void htcpen_isa_remove(struct device *dev, unsigned int id) { struct input_dev *htcpen_dev = dev_get_drvdata(dev); @@ -182,8 +182,6 @@ static int htcpen_isa_remove(struct device *dev, unsigned int id) release_region(HTCPEN_PORT_INDEX, 2); release_region(HTCPEN_PORT_INIT, 1); release_region(HTCPEN_PORT_IRQ_CLEAR, 1); - - return 0; } #ifdef CONFIG_PM diff --git a/drivers/media/radio/radio-isa.c b/drivers/media/radio/radio-isa.c index 527f4c3b0ca4..c591c0851fa2 100644 --- a/drivers/media/radio/radio-isa.c +++ b/drivers/media/radio/radio-isa.c @@ -337,13 +337,11 @@ int radio_isa_probe(struct device *pdev, unsigned int dev) } EXPORT_SYMBOL_GPL(radio_isa_probe); -int radio_isa_remove(struct device *pdev, unsigned int dev) +void radio_isa_remove(struct device *pdev, unsigned int dev) { struct radio_isa_card *isa = dev_get_drvdata(pdev); radio_isa_common_remove(isa, isa->drv->region_size); - - return 0; } EXPORT_SYMBOL_GPL(radio_isa_remove); diff --git a/drivers/media/radio/radio-isa.h b/drivers/media/radio/radio-isa.h index 2f0736edfda8..c9159958203e 100644 --- a/drivers/media/radio/radio-isa.h +++ b/drivers/media/radio/radio-isa.h @@ -91,7 +91,7 @@ struct radio_isa_driver { int radio_isa_match(struct device *pdev, unsigned int dev); int radio_isa_probe(struct device *pdev, unsigned int dev); -int radio_isa_remove(struct device *pdev, unsigned int dev); +void radio_isa_remove(struct device *pdev, unsigned int dev); #ifdef CONFIG_PNP int radio_isa_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id); diff --git a/drivers/media/radio/radio-sf16fmr2.c b/drivers/media/radio/radio-sf16fmr2.c index 0388894cfe41..d0dde55b7930 100644 --- a/drivers/media/radio/radio-sf16fmr2.c +++ b/drivers/media/radio/radio-sf16fmr2.c @@ -293,11 +293,9 @@ static void fmr2_remove(struct fmr2 *fmr2) kfree(fmr2); } -static int fmr2_isa_remove(struct device *pdev, unsigned int ndev) +static void fmr2_isa_remove(struct device *pdev, unsigned int ndev) { fmr2_remove(dev_get_drvdata(pdev)); - - return 0; } static void fmr2_pnp_remove(struct pnp_dev *pdev) diff --git a/drivers/net/can/sja1000/tscan1.c b/drivers/net/can/sja1000/tscan1.c index 6ea802c66124..3dbba8d61afb 100644 --- a/drivers/net/can/sja1000/tscan1.c +++ b/drivers/net/can/sja1000/tscan1.c @@ -159,7 +159,7 @@ static int tscan1_probe(struct device *dev, unsigned id) return -ENXIO; } -static int tscan1_remove(struct device *dev, unsigned id /*unused*/) +static void tscan1_remove(struct device *dev, unsigned id /*unused*/) { struct net_device *netdev; struct sja1000_priv *priv; @@ -179,8 +179,6 @@ static int tscan1_remove(struct device *dev, unsigned id /*unused*/) release_region(pld_base, TSCAN1_PLD_SIZE); free_sja1000dev(netdev); - - return 0; } static struct isa_driver tscan1_isa_driver = { diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 667f38c9e4c6..53e1f7e07959 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -335,12 +335,11 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev) return 1; } -static int el3_isa_remove(struct device *pdev, +static void el3_isa_remove(struct device *pdev, unsigned int ndev) { el3_device_remove(pdev); dev_set_drvdata(pdev, NULL); - return 0; } #ifdef CONFIG_PM diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index 79830e77afa9..b1e97f75b0ba 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -11459,12 +11459,11 @@ static int advansys_isa_probe(struct device *dev, unsigned int id) return err; } -static int advansys_isa_remove(struct device *dev, unsigned int id) +static void advansys_isa_remove(struct device *dev, unsigned int id) { int ioport = _asc_def_iop_base[id]; advansys_release(dev_get_drvdata(dev)); release_region(ioport, ASC_IOADR_GAP); - return 0; } static struct isa_driver advansys_isa_driver = { diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c index dc5667afeb27..e0d8cca1c70b 100644 --- a/drivers/scsi/aha1542.c +++ b/drivers/scsi/aha1542.c @@ -1025,12 +1025,11 @@ static int aha1542_isa_match(struct device *pdev, unsigned int ndev) return 1; } -static int aha1542_isa_remove(struct device *pdev, +static void aha1542_isa_remove(struct device *pdev, unsigned int ndev) { aha1542_release(dev_get_drvdata(pdev)); dev_set_drvdata(pdev, NULL); - return 0; } static struct isa_driver aha1542_isa_driver = { diff --git a/drivers/scsi/fdomain_isa.c b/drivers/scsi/fdomain_isa.c index e0cdcd2003d0..2b4280a43a53 100644 --- a/drivers/scsi/fdomain_isa.c +++ b/drivers/scsi/fdomain_isa.c @@ -175,7 +175,7 @@ static int fdomain_isa_param_match(struct device *dev, unsigned int ndev) return 1; } -static int fdomain_isa_remove(struct device *dev, unsigned int ndev) +static void fdomain_isa_remove(struct device *dev, unsigned int ndev) { struct Scsi_Host *sh = dev_get_drvdata(dev); int base = sh->io_port; @@ -183,7 +183,6 @@ static int fdomain_isa_remove(struct device *dev, unsigned int ndev) fdomain_destroy(sh); release_region(base, FDOMAIN_REGION_SIZE); dev_set_drvdata(dev, NULL); - return 0; } static struct isa_driver fdomain_isa_driver = { diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c index 2df2f38a9b12..7ba3c9312731 100644 --- a/drivers/scsi/g_NCR5380.c +++ b/drivers/scsi/g_NCR5380.c @@ -720,12 +720,11 @@ static int generic_NCR5380_isa_match(struct device *pdev, unsigned int ndev) return 1; } -static int generic_NCR5380_isa_remove(struct device *pdev, - unsigned int ndev) +static void generic_NCR5380_isa_remove(struct device *pdev, + unsigned int ndev) { generic_NCR5380_release_resources(dev_get_drvdata(pdev)); dev_set_drvdata(pdev, NULL); - return 0; } static struct isa_driver generic_NCR5380_isa_driver = { diff --git a/drivers/watchdog/pcwd.c b/drivers/watchdog/pcwd.c index b95cd38f3ceb..a793b03a785d 100644 --- a/drivers/watchdog/pcwd.c +++ b/drivers/watchdog/pcwd.c @@ -951,7 +951,7 @@ error_request_region: return ret; } -static int pcwd_isa_remove(struct device *dev, unsigned int id) +static void pcwd_isa_remove(struct device *dev, unsigned int id) { if (debug >= DEBUG) pr_debug("pcwd_isa_remove id=%d\n", id); @@ -968,8 +968,6 @@ static int pcwd_isa_remove(struct device *dev, unsigned int id) (pcwd_private.revision == PCWD_REVISION_A) ? 2 : 4); pcwd_private.io_addr = 0x0000; cards_found--; - - return 0; } static void pcwd_isa_shutdown(struct device *dev, unsigned int id) diff --git a/include/linux/isa.h b/include/linux/isa.h index 41336da0f4e7..e30963190968 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -13,7 +13,7 @@ struct isa_driver { int (*match)(struct device *, unsigned int); int (*probe)(struct device *, unsigned int); - int (*remove)(struct device *, unsigned int); + void (*remove)(struct device *, unsigned int); void (*shutdown)(struct device *, unsigned int); int (*suspend)(struct device *, unsigned int, pm_message_t); int (*resume)(struct device *, unsigned int); diff --git a/sound/isa/ad1848/ad1848.c b/sound/isa/ad1848/ad1848.c index 593c6e959afe..48f7cc57c3da 100644 --- a/sound/isa/ad1848/ad1848.c +++ b/sound/isa/ad1848/ad1848.c @@ -118,10 +118,9 @@ out: snd_card_free(card); return error; } -static int snd_ad1848_remove(struct device *dev, unsigned int n) +static void snd_ad1848_remove(struct device *dev, unsigned int n) { snd_card_free(dev_get_drvdata(dev)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/adlib.c b/sound/isa/adlib.c index 5105524b6f38..e6cd7c4da38e 100644 --- a/sound/isa/adlib.c +++ b/sound/isa/adlib.c @@ -97,10 +97,9 @@ out: snd_card_free(card); return error; } -static int snd_adlib_remove(struct device *dev, unsigned int n) +static void snd_adlib_remove(struct device *dev, unsigned int n) { snd_card_free(dev_get_drvdata(dev)); - return 0; } static struct isa_driver snd_adlib_driver = { diff --git a/sound/isa/cmi8328.c b/sound/isa/cmi8328.c index faca5dd95bfe..3b9fbb02864b 100644 --- a/sound/isa/cmi8328.c +++ b/sound/isa/cmi8328.c @@ -403,7 +403,7 @@ error: return err; } -static int snd_cmi8328_remove(struct device *pdev, unsigned int dev) +static void snd_cmi8328_remove(struct device *pdev, unsigned int dev) { struct snd_card *card = dev_get_drvdata(pdev); struct snd_cmi8328 *cmi = card->private_data; @@ -420,7 +420,6 @@ static int snd_cmi8328_remove(struct device *pdev, unsigned int dev) snd_cmi8328_cfg_write(cmi->port, CFG2, 0); snd_cmi8328_cfg_write(cmi->port, CFG3, 0); snd_card_free(card); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c index 4669eb0cc8ce..19e258527d69 100644 --- a/sound/isa/cmi8330.c +++ b/sound/isa/cmi8330.c @@ -631,11 +631,10 @@ static int snd_cmi8330_isa_probe(struct device *pdev, return 0; } -static int snd_cmi8330_isa_remove(struct device *devptr, +static void snd_cmi8330_isa_remove(struct device *devptr, unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/cs423x/cs4231.c b/sound/isa/cs423x/cs4231.c index 2135963eba78..383ee621cea1 100644 --- a/sound/isa/cs423x/cs4231.c +++ b/sound/isa/cs423x/cs4231.c @@ -135,10 +135,9 @@ out: snd_card_free(card); return error; } -static int snd_cs4231_remove(struct device *dev, unsigned int n) +static void snd_cs4231_remove(struct device *dev, unsigned int n) { snd_card_free(dev_get_drvdata(dev)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index fa3c39cff5f8..24688271e73f 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -487,11 +487,10 @@ static int snd_cs423x_isa_probe(struct device *pdev, return 0; } -static int snd_cs423x_isa_remove(struct device *pdev, +static void snd_cs423x_isa_remove(struct device *pdev, unsigned int dev) { snd_card_free(dev_get_drvdata(pdev)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/es1688/es1688.c b/sound/isa/es1688/es1688.c index 64610571a5e1..d99bb3f8f0c1 100644 --- a/sound/isa/es1688/es1688.c +++ b/sound/isa/es1688/es1688.c @@ -192,10 +192,9 @@ out: return error; } -static int snd_es1688_isa_remove(struct device *dev, unsigned int n) +static void snd_es1688_isa_remove(struct device *dev, unsigned int n) { snd_card_free(dev_get_drvdata(dev)); - return 0; } static struct isa_driver snd_es1688_driver = { diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c index 5f8d7e8a5477..9beef8079177 100644 --- a/sound/isa/es18xx.c +++ b/sound/isa/es18xx.c @@ -2210,11 +2210,10 @@ static int snd_es18xx_isa_probe(struct device *pdev, unsigned int dev) } } -static int snd_es18xx_isa_remove(struct device *devptr, - unsigned int dev) +static void snd_es18xx_isa_remove(struct device *devptr, + unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/galaxy/galaxy.c b/sound/isa/galaxy/galaxy.c index 65f9f46c9f58..d33d69f29924 100644 --- a/sound/isa/galaxy/galaxy.c +++ b/sound/isa/galaxy/galaxy.c @@ -608,10 +608,9 @@ error: return err; } -static int snd_galaxy_remove(struct device *dev, unsigned int n) +static void snd_galaxy_remove(struct device *dev, unsigned int n) { snd_card_free(dev_get_drvdata(dev)); - return 0; } static struct isa_driver snd_galaxy_driver = { diff --git a/sound/isa/gus/gusclassic.c b/sound/isa/gus/gusclassic.c index 7419b1939754..015f88a11352 100644 --- a/sound/isa/gus/gusclassic.c +++ b/sound/isa/gus/gusclassic.c @@ -195,10 +195,9 @@ out: snd_card_free(card); return error; } -static int snd_gusclassic_remove(struct device *dev, unsigned int n) +static void snd_gusclassic_remove(struct device *dev, unsigned int n) { snd_card_free(dev_get_drvdata(dev)); - return 0; } static struct isa_driver snd_gusclassic_driver = { diff --git a/sound/isa/gus/gusextreme.c b/sound/isa/gus/gusextreme.c index ed2f9d64efae..c9f31b4fb887 100644 --- a/sound/isa/gus/gusextreme.c +++ b/sound/isa/gus/gusextreme.c @@ -324,10 +324,9 @@ out: snd_card_free(card); return error; } -static int snd_gusextreme_remove(struct device *dev, unsigned int n) +static void snd_gusextreme_remove(struct device *dev, unsigned int n) { snd_card_free(dev_get_drvdata(dev)); - return 0; } static struct isa_driver snd_gusextreme_driver = { diff --git a/sound/isa/gus/gusmax.c b/sound/isa/gus/gusmax.c index 05cd9be4dd8a..dc09fbd6f88d 100644 --- a/sound/isa/gus/gusmax.c +++ b/sound/isa/gus/gusmax.c @@ -338,10 +338,9 @@ static int snd_gusmax_probe(struct device *pdev, unsigned int dev) return err; } -static int snd_gusmax_remove(struct device *devptr, unsigned int dev) +static void snd_gusmax_remove(struct device *devptr, unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #define DEV_NAME "gusmax" diff --git a/sound/isa/gus/interwave.c b/sound/isa/gus/interwave.c index 3e9ad930deae..e4d412e72b75 100644 --- a/sound/isa/gus/interwave.c +++ b/sound/isa/gus/interwave.c @@ -825,10 +825,9 @@ static int snd_interwave_isa_probe(struct device *pdev, } } -static int snd_interwave_isa_remove(struct device *devptr, unsigned int dev) +static void snd_interwave_isa_remove(struct device *devptr, unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } static struct isa_driver snd_interwave_driver = { diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c index 24b34ecf5e5b..69647b41300d 100644 --- a/sound/isa/msnd/msnd_pinnacle.c +++ b/sound/isa/msnd/msnd_pinnacle.c @@ -1049,10 +1049,9 @@ cfg_error: #endif } -static int snd_msnd_isa_remove(struct device *pdev, unsigned int dev) +static void snd_msnd_isa_remove(struct device *pdev, unsigned int dev) { snd_msnd_unload(dev_get_drvdata(pdev)); - return 0; } static struct isa_driver snd_msnd_driver = { diff --git a/sound/isa/opl3sa2.c b/sound/isa/opl3sa2.c index 85a181acd388..7649a8a4128d 100644 --- a/sound/isa/opl3sa2.c +++ b/sound/isa/opl3sa2.c @@ -878,11 +878,10 @@ static int snd_opl3sa2_isa_probe(struct device *pdev, return 0; } -static int snd_opl3sa2_isa_remove(struct device *devptr, +static void snd_opl3sa2_isa_remove(struct device *devptr, unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/opti9xx/miro.c b/sound/isa/opti9xx/miro.c index 44ed1b65f6ce..20933342f5eb 100644 --- a/sound/isa/opti9xx/miro.c +++ b/sound/isa/opti9xx/miro.c @@ -1480,11 +1480,10 @@ static int snd_miro_isa_probe(struct device *devptr, unsigned int n) return 0; } -static int snd_miro_isa_remove(struct device *devptr, +static void snd_miro_isa_remove(struct device *devptr, unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #define DEV_NAME "miro" diff --git a/sound/isa/opti9xx/opti92x-ad1848.c b/sound/isa/opti9xx/opti92x-ad1848.c index 881d3b5711d2..758f5b579138 100644 --- a/sound/isa/opti9xx/opti92x-ad1848.c +++ b/sound/isa/opti9xx/opti92x-ad1848.c @@ -1024,11 +1024,10 @@ static int snd_opti9xx_isa_probe(struct device *devptr, return 0; } -static int snd_opti9xx_isa_remove(struct device *devptr, - unsigned int dev) +static void snd_opti9xx_isa_remove(struct device *devptr, + unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/sb/jazz16.c b/sound/isa/sb/jazz16.c index ee379bbf70a4..0e2e0ab3b9e4 100644 --- a/sound/isa/sb/jazz16.c +++ b/sound/isa/sb/jazz16.c @@ -339,12 +339,11 @@ err_free: return err; } -static int snd_jazz16_remove(struct device *devptr, unsigned int dev) +static void snd_jazz16_remove(struct device *devptr, unsigned int dev) { struct snd_card *card = dev_get_drvdata(devptr); snd_card_free(card); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/sb/sb16.c b/sound/isa/sb/sb16.c index 479197c13803..db284b7b88a7 100644 --- a/sound/isa/sb/sb16.c +++ b/sound/isa/sb/sb16.c @@ -547,10 +547,9 @@ static int snd_sb16_isa_probe(struct device *pdev, unsigned int dev) } } -static int snd_sb16_isa_remove(struct device *pdev, unsigned int dev) +static void snd_sb16_isa_remove(struct device *pdev, unsigned int dev) { snd_card_free(dev_get_drvdata(pdev)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index 438109f167d6..8e3e67b9a341 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -192,10 +192,9 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev) return err; } -static int snd_sb8_remove(struct device *pdev, unsigned int dev) +static void snd_sb8_remove(struct device *pdev, unsigned int dev) { snd_card_free(dev_get_drvdata(pdev)); - return 0; } #ifdef CONFIG_PM diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index 3d0bea44f454..def137579717 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -672,7 +672,7 @@ err_exit: return err; } -static int snd_sc6000_remove(struct device *devptr, unsigned int dev) +static void snd_sc6000_remove(struct device *devptr, unsigned int dev) { struct snd_card *card = dev_get_drvdata(devptr); char __iomem **vport = card->private_data; @@ -684,7 +684,6 @@ static int snd_sc6000_remove(struct device *devptr, unsigned int dev) release_region(mss_port[dev], 4); snd_card_free(card); - return 0; } static struct isa_driver snd_sc6000_driver = { diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c index 2e5a5c5279e8..e70ef9aee545 100644 --- a/sound/isa/sscape.c +++ b/sound/isa/sscape.c @@ -1183,10 +1183,9 @@ _release_card: return ret; } -static int snd_sscape_remove(struct device *devptr, unsigned int dev) +static void snd_sscape_remove(struct device *devptr, unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #define DEV_NAME "sscape" diff --git a/sound/isa/wavefront/wavefront.c b/sound/isa/wavefront/wavefront.c index 9e0f6b226775..b750a4fd40de 100644 --- a/sound/isa/wavefront/wavefront.c +++ b/sound/isa/wavefront/wavefront.c @@ -565,11 +565,10 @@ static int snd_wavefront_isa_probe(struct device *pdev, return 0; } -static int snd_wavefront_isa_remove(struct device *devptr, +static void snd_wavefront_isa_remove(struct device *devptr, unsigned int dev) { snd_card_free(dev_get_drvdata(devptr)); - return 0; } #define DEV_NAME "wavefront" -- cgit 1.4.1