From d1877e639bc6bf1c3131eda3f9ede73f8da96c22 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 8 Jun 2022 12:55:13 -0600 Subject: vfio: de-extern-ify function prototypes The use of 'extern' in function prototypes has been disrecommended in the kernel coding style for several years now, remove them from all vfio related files so contributors no longer need to decide between style and consistency. Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Reviewed-by: Eric Farman Reviewed-by: Eric Auger Reviewed-by: Cornelia Huck Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/165471414407.203056.474032786990662279.stgit@omen Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_cp.h | 12 ++++++------ drivers/s390/cio/vfio_ccw_private.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h index e4c436199b4c..3194d887e08e 100644 --- a/drivers/s390/cio/vfio_ccw_cp.h +++ b/drivers/s390/cio/vfio_ccw_cp.h @@ -41,11 +41,11 @@ struct channel_program { struct ccw1 *guest_cp; }; -extern int cp_init(struct channel_program *cp, union orb *orb); -extern void cp_free(struct channel_program *cp); -extern int cp_prefetch(struct channel_program *cp); -extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm); -extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw); -extern bool cp_iova_pinned(struct channel_program *cp, u64 iova); +int cp_init(struct channel_program *cp, union orb *orb); +void cp_free(struct channel_program *cp); +int cp_prefetch(struct channel_program *cp); +union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm); +void cp_update_scsw(struct channel_program *cp, union scsw *scsw); +bool cp_iova_pinned(struct channel_program *cp, u64 iova); #endif diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 7272eb788612..b7163bac8cc7 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -119,10 +119,10 @@ struct vfio_ccw_private { struct work_struct crw_work; } __aligned(8); -extern int vfio_ccw_mdev_reg(struct subchannel *sch); -extern void vfio_ccw_mdev_unreg(struct subchannel *sch); +int vfio_ccw_mdev_reg(struct subchannel *sch); +void vfio_ccw_mdev_unreg(struct subchannel *sch); -extern int vfio_ccw_sch_quiesce(struct subchannel *sch); +int vfio_ccw_sch_quiesce(struct subchannel *sch); extern struct mdev_driver vfio_ccw_mdev_driver; -- cgit 1.4.1 From 3566ee1d776c1393393564b2514f9cd52a49c16e Mon Sep 17 00:00:00 2001 From: Michael Kawano Date: Thu, 7 Jul 2022 15:57:27 +0200 Subject: vfio/ccw: Remove UUID from s390 debug log As vfio-ccw devices are created/destroyed, the uuid of the associated mdevs that are recorded in $S390DBF/vfio_ccw_msg/sprintf get lost. This is because a pointer to the UUID is stored instead of the UUID itself, and that memory may have been repurposed if/when the logs are examined. The result is usually garbage UUID data in the logs, though there is an outside chance of an oops happening here. Simply remove the UUID from the traces, as the subchannel number will provide useful configuration information for problem determination, and is stored directly into the log instead of a pointer. As we were the only consumer of mdev_uuid(), remove that too. Cc: Kirti Wankhede Signed-off-by: Michael Kawano Fixes: 60e05d1cf0875 ("vfio-ccw: add some logging") Fixes: b7701dfbf9832 ("vfio-ccw: Register a chp_event callback for vfio-ccw") [farman: reworded commit message, added Fixes: tags] Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Reviewed-by: Kirti Wankhede Link: https://lore.kernel.org/r/20220707135737.720765-2-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 5 ++--- drivers/s390/cio/vfio_ccw_fsm.c | 26 ++++++++++++-------------- drivers/s390/cio/vfio_ccw_ops.c | 8 ++++---- include/linux/mdev.h | 5 ----- 4 files changed, 18 insertions(+), 26 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index ee182cfb467d..35055eb94115 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -358,8 +357,8 @@ static int vfio_ccw_chp_event(struct subchannel *sch, return 0; trace_vfio_ccw_chp_event(private->sch->schid, mask, event); - VFIO_CCW_MSG_EVENT(2, "%pUl (%x.%x.%04x): mask=0x%x event=%d\n", - mdev_uuid(private->mdev), sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n", + sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no, mask, event); diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 8483a266051c..bbcc5b486749 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -10,7 +10,6 @@ */ #include -#include #include "ioasm.h" #include "vfio_ccw_private.h" @@ -242,7 +241,6 @@ static void fsm_io_request(struct vfio_ccw_private *private, union orb *orb; union scsw *scsw = &private->scsw; struct ccw_io_region *io_region = private->io_region; - struct mdev_device *mdev = private->mdev; char *errstr = "request"; struct subchannel_id schid = get_schid(private); @@ -256,8 +254,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, if (orb->tm.b) { io_region->ret_code = -EOPNOTSUPP; VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): transport mode\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: transport mode\n", + schid.cssid, schid.ssid, schid.sch_no); errstr = "transport mode"; goto err_out; @@ -265,8 +263,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = cp_init(&private->cp, orb); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): cp_init=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: cp_init=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp init"; @@ -276,8 +274,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = cp_prefetch(&private->cp); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): cp_prefetch=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: cp_prefetch=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp prefetch"; @@ -289,8 +287,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = fsm_io_helper(private); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): fsm_io_helper=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: fsm_io_helper=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp fsm_io_helper"; @@ -300,16 +298,16 @@ static void fsm_io_request(struct vfio_ccw_private *private, return; } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): halt on io_region\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: halt on io_region\n", + schid.cssid, schid.ssid, schid.sch_no); /* halt is handled via the async cmd region */ io_region->ret_code = -EOPNOTSUPP; goto err_out; } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): clear on io_region\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: clear on io_region\n", + schid.cssid, schid.ssid, schid.sch_no); /* clear is handled via the async cmd region */ io_region->ret_code = -EOPNOTSUPP; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index b49e2e9db2dc..0e05bff78b8e 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -131,8 +131,8 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) private->mdev = mdev; private->state = VFIO_CCW_STATE_IDLE; - VFIO_CCW_MSG_EVENT(2, "mdev %pUl, sch %x.%x.%04x: create\n", - mdev_uuid(mdev), private->sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", + private->sch->schid.cssid, private->sch->schid.ssid, private->sch->schid.sch_no); @@ -154,8 +154,8 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) { struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); - VFIO_CCW_MSG_EVENT(2, "mdev %pUl, sch %x.%x.%04x: remove\n", - mdev_uuid(mdev), private->sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n", + private->sch->schid.cssid, private->sch->schid.ssid, private->sch->schid.sch_no); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index bb539794f54a..47ad3b104d9e 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -65,11 +65,6 @@ struct mdev_driver { struct device_driver driver; }; -static inline const guid_t *mdev_uuid(struct mdev_device *mdev) -{ - return &mdev->uuid; -} - extern struct bus_type mdev_bus_type; int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver); -- cgit 1.4.1 From f6c876d67e956de8d69349b0ee43bc7277c09e5c Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:28 +0200 Subject: vfio/ccw: Fix FSM state if mdev probe fails The FSM is in STANDBY state when arriving in vfio_ccw_mdev_probe(), and this routine converts it to IDLE as part of its processing. The error exit sets it to IDLE (again) but clears the private->mdev pointer. The FSM should of course be managing the state itself, but the correct thing for vfio_ccw_mdev_probe() to do would be to put the state back the way it found it. The corresponding check of private->mdev in vfio_ccw_sch_io_todo() can be removed, since the distinction is unnecessary at this point. Fixes: 3bf1311f351ef ("vfio/ccw: Convert to use vfio_register_emulated_iommu_dev()") Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-3-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 5 +++-- drivers/s390/cio/vfio_ccw_ops.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 35055eb94115..179eb614fa5b 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -106,9 +106,10 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) /* * Reset to IDLE only if processing of a channel program * has finished. Do not overwrite a possible processing - * state if the final interrupt was for HSCH or CSCH. + * state if the interrupt was unsolicited, or if the final + * interrupt was for HSCH or CSCH. */ - if (private->mdev && cp_is_finished) + if (cp_is_finished) private->state = VFIO_CCW_STATE_IDLE; if (private->io_trigger) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 0e05bff78b8e..9a05dadcbb75 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -146,7 +146,7 @@ err_atomic: vfio_uninit_group_dev(&private->vdev); atomic_inc(&private->avail); private->mdev = NULL; - private->state = VFIO_CCW_STATE_IDLE; + private->state = VFIO_CCW_STATE_STANDBY; return ret; } -- cgit 1.4.1 From cffcc109fd682075dee79bade3d60a07152a8fd1 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:29 +0200 Subject: vfio/ccw: Do not change FSM state in subchannel event The routine vfio_ccw_sch_event() is tasked with handling subchannel events, specifically machine checks, on behalf of vfio-ccw. It correctly calls cio_update_schib(), and if that fails (meaning the subchannel is gone) it makes an FSM event call to mark the subchannel Not Operational. If that worked, however, then it decides that if the FSM state was already Not Operational (implying the subchannel just came back), then it should simply change the FSM to partially- or fully-open. Remove this trickery, since a subchannel returning will require more probing than simply "oh all is well again" to ensure it works correctly. Fixes: bbe37e4cb8970 ("vfio: ccw: introduce a finite state machine") Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-4-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 179eb614fa5b..279ad2161f17 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -301,19 +301,11 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) if (work_pending(&sch->todo_work)) goto out_unlock; - if (cio_update_schib(sch)) { - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); - rc = 0; - goto out_unlock; - } - - private = dev_get_drvdata(&sch->dev); - if (private->state == VFIO_CCW_STATE_NOT_OPER) { - private->state = private->mdev ? VFIO_CCW_STATE_IDLE : - VFIO_CCW_STATE_STANDBY; - } rc = 0; + if (cio_update_schib(sch)) + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + out_unlock: spin_unlock_irqrestore(sch->lock, flags); -- cgit 1.4.1 From e46a724886914c4de154f2103a019b422f3c9bb2 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:30 +0200 Subject: vfio/ccw: Remove private->mdev There are no remaining users of private->mdev. Remove it. Suggested-by: Jason Gunthorpe Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220707135737.720765-5-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_async.c | 1 - drivers/s390/cio/vfio_ccw_ops.c | 3 --- drivers/s390/cio/vfio_ccw_private.h | 2 -- 3 files changed, 6 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_async.c b/drivers/s390/cio/vfio_ccw_async.c index 7a838e3d7c0f..420d89ba7f83 100644 --- a/drivers/s390/cio/vfio_ccw_async.c +++ b/drivers/s390/cio/vfio_ccw_async.c @@ -8,7 +8,6 @@ */ #include -#include #include "vfio_ccw_private.h" diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 9a05dadcbb75..81377270d4a7 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -128,7 +128,6 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) vfio_init_group_dev(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops); - private->mdev = mdev; private->state = VFIO_CCW_STATE_IDLE; VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", @@ -145,7 +144,6 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) err_atomic: vfio_uninit_group_dev(&private->vdev); atomic_inc(&private->avail); - private->mdev = NULL; private->state = VFIO_CCW_STATE_STANDBY; return ret; } @@ -170,7 +168,6 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) vfio_uninit_group_dev(&private->vdev); cp_free(&private->cp); - private->mdev = NULL; atomic_inc(&private->avail); } diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index b7163bac8cc7..4d11ef48333e 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -73,7 +73,6 @@ struct vfio_ccw_crw { * @state: internal state of the device * @completion: synchronization helper of the I/O completion * @avail: available for creating a mediated device - * @mdev: pointer to the mediated device * @nb: notifier for vfio events * @io_region: MMIO region to input/output I/O arguments/results * @io_mutex: protect against concurrent update of I/O regions @@ -97,7 +96,6 @@ struct vfio_ccw_private { int state; struct completion *completion; atomic_t avail; - struct mdev_device *mdev; struct notifier_block nb; struct ccw_io_region *io_region; struct mutex io_mutex; -- cgit 1.4.1 From 8557d73bddbb28d10745f3545f6686a92db4c781 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:31 +0200 Subject: vfio/ccw: Pass enum to FSM event jumptable The FSM has an enumerated list of events defined. Use that as the argument passed to the jump table, instead of a regular int. Suggested-by: Jason Gunthorpe Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-6-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 4d11ef48333e..5891bea8ce41 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -156,7 +156,7 @@ typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event); extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS]; static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private, - int event) + enum vfio_ccw_event event) { trace_vfio_ccw_fsm_event(private->sch->schid, private->state, event); vfio_ccw_jumptable[private->state][event](private, event); -- cgit 1.4.1 From 09205a7659714c14591c274fdbdfa97a20f4cef2 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:32 +0200 Subject: vfio/ccw: Flatten MDEV device (un)register The vfio_ccw_mdev_(un)reg routines are merely vfio-ccw routines that pass control to mdev_(un)register_device. Since there's only one caller of each, let's just call the mdev routines directly. Suggested-by: Jason Gunthorpe Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-7-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 4 ++-- drivers/s390/cio/vfio_ccw_ops.c | 10 ---------- drivers/s390/cio/vfio_ccw_private.h | 3 --- 3 files changed, 2 insertions(+), 15 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 279ad2161f17..fe87a2652a22 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -240,7 +240,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) private->state = VFIO_CCW_STATE_STANDBY; - ret = vfio_ccw_mdev_reg(sch); + ret = mdev_register_device(&sch->dev, &vfio_ccw_mdev_driver); if (ret) goto out_disable; @@ -262,7 +262,7 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); vfio_ccw_sch_quiesce(sch); - vfio_ccw_mdev_unreg(sch); + mdev_unregister_device(&sch->dev); dev_set_drvdata(&sch->dev, NULL); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 81377270d4a7..a7ea9358e461 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -654,13 +654,3 @@ struct mdev_driver vfio_ccw_mdev_driver = { .remove = vfio_ccw_mdev_remove, .supported_type_groups = mdev_type_groups, }; - -int vfio_ccw_mdev_reg(struct subchannel *sch) -{ - return mdev_register_device(&sch->dev, &vfio_ccw_mdev_driver); -} - -void vfio_ccw_mdev_unreg(struct subchannel *sch) -{ - mdev_unregister_device(&sch->dev); -} diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 5891bea8ce41..a2584c130e79 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -117,9 +117,6 @@ struct vfio_ccw_private { struct work_struct crw_work; } __aligned(8); -int vfio_ccw_mdev_reg(struct subchannel *sch); -void vfio_ccw_mdev_unreg(struct subchannel *sch); - int vfio_ccw_sch_quiesce(struct subchannel *sch); extern struct mdev_driver vfio_ccw_mdev_driver; -- cgit 1.4.1 From 4cc2c051c35639787de1b5797de2cd003bbc98ab Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:33 +0200 Subject: vfio/ccw: Update trace data for not operational event We currently cut a very basic trace whenever the FSM directs control to the not operational routine. Convert this to a message, so it's alongside the other configuration related traces (create, remove, etc.), and record both the event that brought us here and the current state of the device. This will provide some better footprints if things go bad. Suggested-by: Matthew Rosato Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-8-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_fsm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index bbcc5b486749..88e529a2e184 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -160,8 +160,12 @@ static void fsm_notoper(struct vfio_ccw_private *private, { struct subchannel *sch = private->sch; - VFIO_CCW_TRACE_EVENT(2, "notoper"); - VFIO_CCW_TRACE_EVENT(2, dev_name(&sch->dev)); + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: notoper event %x state %x\n", + sch->schid.cssid, + sch->schid.ssid, + sch->schid.sch_no, + event, + private->state); /* * TODO: -- cgit 1.4.1 From 62ec0d49e683c25e35927a942c64433878de143c Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:34 +0200 Subject: vfio/ccw: Create an OPEN FSM Event Move the process of enabling a subchannel for use by vfio-ccw into the FSM, such that it can manage the sequence of lifecycle events for the device. That is, if the FSM state is NOT_OPER(erational), then do the work that would enable the subchannel and move the FSM to STANDBY state. An attempt to perform this event again from any of the other operating states (IDLE, CP_PROCESSING, CP_PENDING) will convert the device back to NOT_OPER so the configuration process can be started again. Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-9-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 9 ++------- drivers/s390/cio/vfio_ccw_fsm.c | 21 +++++++++++++++++++++ drivers/s390/cio/vfio_ccw_private.h | 1 + 3 files changed, 24 insertions(+), 7 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index fe87a2652a22..7d9189640da3 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -231,15 +231,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) dev_set_drvdata(&sch->dev, private); - spin_lock_irq(sch->lock); - sch->isc = VFIO_CCW_ISC; - ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); - spin_unlock_irq(sch->lock); - if (ret) + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OPEN); + if (private->state == VFIO_CCW_STATE_NOT_OPER) goto out_free; - private->state = VFIO_CCW_STATE_STANDBY; - ret = mdev_register_device(&sch->dev, &vfio_ccw_mdev_driver); if (ret) goto out_disable; diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 88e529a2e184..2811b2040490 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -11,6 +11,8 @@ #include +#include + #include "ioasm.h" #include "vfio_ccw_private.h" @@ -368,6 +370,20 @@ static void fsm_irq(struct vfio_ccw_private *private, complete(private->completion); } +static void fsm_open(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + struct subchannel *sch = private->sch; + int ret; + + spin_lock_irq(sch->lock); + sch->isc = VFIO_CCW_ISC; + ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); + if (!ret) + private->state = VFIO_CCW_STATE_STANDBY; + spin_unlock_irq(sch->lock); +} + /* * Device statemachine */ @@ -377,29 +393,34 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_open, }, [VFIO_CCW_STATE_STANDBY] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_notoper, }, [VFIO_CCW_STATE_IDLE] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_request, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_request, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_notoper, }, [VFIO_CCW_STATE_CP_PROCESSING] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_retry, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_retry, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_notoper, }, [VFIO_CCW_STATE_CP_PENDING] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_request, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_notoper, }, }; diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index a2584c130e79..93e136ba369b 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -142,6 +142,7 @@ enum vfio_ccw_event { VFIO_CCW_EVENT_IO_REQ, VFIO_CCW_EVENT_INTERRUPT, VFIO_CCW_EVENT_ASYNC_REQ, + VFIO_CCW_EVENT_OPEN, /* last element! */ NR_VFIO_CCW_EVENTS }; -- cgit 1.4.1 From f4b4ed44770221819daa19800f232a2708588c2f Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:35 +0200 Subject: vfio/ccw: Create a CLOSE FSM event Refactor the vfio_ccw_sch_quiesce() routine to extract the bit that disables the subchannel and affects the FSM state. Use this to form the basis of a CLOSE event that will mirror the OPEN event, and move the subchannel back to NOT_OPER state. A key difference with that mirroring is that while OPEN handles the transition from NOT_OPER => STANDBY, the later probing of the mdev handles the transition from STANDBY => IDLE. On the other hand, the CLOSE event will move from one of the operating states {IDLE, CP_PROCESSING, CP_PENDING} => NOT_OPER. That is, there is no stop in a STANDBY state on the deconfigure path. Add a call to cp_free() in this event, such that it is captured for the various permutations of this event. In the unlikely event that cio_disable_subchannel() returns -EBUSY, the remaining logic of vfio_ccw_sch_quiesce() can still be used. Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-10-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 17 +++++------------ drivers/s390/cio/vfio_ccw_fsm.c | 26 ++++++++++++++++++++++++++ drivers/s390/cio/vfio_ccw_ops.c | 14 ++------------ drivers/s390/cio/vfio_ccw_private.h | 1 + 4 files changed, 34 insertions(+), 24 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 7d9189640da3..f98c9915e73d 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -41,13 +41,6 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch) DECLARE_COMPLETION_ONSTACK(completion); int iretry, ret = 0; - spin_lock_irq(sch->lock); - if (!sch->schib.pmcw.ena) - goto out_unlock; - ret = cio_disable_subchannel(sch); - if (ret != -EBUSY) - goto out_unlock; - iretry = 255; do { @@ -74,9 +67,7 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch) spin_lock_irq(sch->lock); ret = cio_disable_subchannel(sch); } while (ret == -EBUSY); -out_unlock: - private->state = VFIO_CCW_STATE_NOT_OPER; - spin_unlock_irq(sch->lock); + return ret; } @@ -256,7 +247,7 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) { struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); - vfio_ccw_sch_quiesce(sch); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); mdev_unregister_device(&sch->dev); dev_set_drvdata(&sch->dev, NULL); @@ -270,7 +261,9 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) static void vfio_ccw_sch_shutdown(struct subchannel *sch) { - vfio_ccw_sch_quiesce(sch); + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); } /** diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 2811b2040490..89eb3feffa41 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -384,6 +384,27 @@ static void fsm_open(struct vfio_ccw_private *private, spin_unlock_irq(sch->lock); } +static void fsm_close(struct vfio_ccw_private *private, + enum vfio_ccw_event event) +{ + struct subchannel *sch = private->sch; + int ret; + + spin_lock_irq(sch->lock); + + if (!sch->schib.pmcw.ena) + goto out_unlock; + + ret = cio_disable_subchannel(sch); + if (ret == -EBUSY) + vfio_ccw_sch_quiesce(sch); + +out_unlock: + private->state = VFIO_CCW_STATE_NOT_OPER; + spin_unlock_irq(sch->lock); + cp_free(&private->cp); +} + /* * Device statemachine */ @@ -394,6 +415,7 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq, [VFIO_CCW_EVENT_OPEN] = fsm_open, + [VFIO_CCW_EVENT_CLOSE] = fsm_nop, }, [VFIO_CCW_STATE_STANDBY] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, @@ -401,6 +423,7 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, [VFIO_CCW_EVENT_OPEN] = fsm_notoper, + [VFIO_CCW_EVENT_CLOSE] = fsm_close, }, [VFIO_CCW_STATE_IDLE] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, @@ -408,6 +431,7 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_request, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, [VFIO_CCW_EVENT_OPEN] = fsm_notoper, + [VFIO_CCW_EVENT_CLOSE] = fsm_close, }, [VFIO_CCW_STATE_CP_PROCESSING] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, @@ -415,6 +439,7 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_retry, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, [VFIO_CCW_EVENT_OPEN] = fsm_notoper, + [VFIO_CCW_EVENT_CLOSE] = fsm_close, }, [VFIO_CCW_STATE_CP_PENDING] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, @@ -422,5 +447,6 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_request, [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, [VFIO_CCW_EVENT_OPEN] = fsm_notoper, + [VFIO_CCW_EVENT_CLOSE] = fsm_close, }, }; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index a7ea9358e461..fc5b83187bd9 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -33,9 +33,7 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) * There are still a lot more instructions need to be handled. We * should come back here later. */ - ret = vfio_ccw_sch_quiesce(sch); - if (ret) - return ret; + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); if (!ret) @@ -64,7 +62,6 @@ static int vfio_ccw_mdev_notifier(struct notifier_block *nb, if (vfio_ccw_mdev_reset(private)) return NOTIFY_BAD; - cp_free(&private->cp); return NOTIFY_OK; } @@ -159,15 +156,9 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) vfio_unregister_group_dev(&private->vdev); - if ((private->state != VFIO_CCW_STATE_NOT_OPER) && - (private->state != VFIO_CCW_STATE_STANDBY)) { - if (!vfio_ccw_sch_quiesce(private->sch)) - private->state = VFIO_CCW_STATE_STANDBY; - /* The state will be NOT_OPER on error. */ - } + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_uninit_group_dev(&private->vdev); - cp_free(&private->cp); atomic_inc(&private->avail); } @@ -217,7 +208,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) /* The state will be NOT_OPER on error. */ } - cp_free(&private->cp); vfio_ccw_unregister_dev_regions(private); vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); } diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index 93e136ba369b..abac532bf03e 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -143,6 +143,7 @@ enum vfio_ccw_event { VFIO_CCW_EVENT_INTERRUPT, VFIO_CCW_EVENT_ASYNC_REQ, VFIO_CCW_EVENT_OPEN, + VFIO_CCW_EVENT_CLOSE, /* last element! */ NR_VFIO_CCW_EVENTS }; -- cgit 1.4.1 From bfec266c8159450720705f16208736e03812e5dc Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:36 +0200 Subject: vfio/ccw: Refactor vfio_ccw_mdev_reset Use both the FSM Close and Open events when resetting an mdev, rather than making a separate call to cio_enable_subchannel(). Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-11-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_ops.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index fc5b83187bd9..4673b7ddfe20 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -21,25 +21,21 @@ static const struct vfio_device_ops vfio_ccw_dev_ops; static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) { - struct subchannel *sch; - int ret; - - sch = private->sch; /* - * TODO: - * In the cureent stage, some things like "no I/O running" and "no - * interrupt pending" are clear, but we are not sure what other state - * we need to care about. - * There are still a lot more instructions need to be handled. We - * should come back here later. + * If the FSM state is seen as Not Operational after closing + * and re-opening the mdev, return an error. + * + * Otherwise, change the FSM from STANDBY to IDLE which is + * normally done by vfio_ccw_mdev_probe() in current lifecycle. */ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OPEN); + if (private->state == VFIO_CCW_STATE_NOT_OPER) + return -EINVAL; - ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); - if (!ret) - private->state = VFIO_CCW_STATE_IDLE; + private->state = VFIO_CCW_STATE_IDLE; - return ret; + return 0; } static int vfio_ccw_mdev_notifier(struct notifier_block *nb, -- cgit 1.4.1 From 204b394a23ad5e30944f23518e21e844614da2ff Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 7 Jul 2022 15:57:37 +0200 Subject: vfio/ccw: Move FSM open/close to MDEV open/close Part of the confusion that has existed is the FSM lifecycle of subchannels between the common CSS driver and the vfio-ccw driver. During configuration, the FSM state goes from NOT_OPER to STANDBY to IDLE, but then back to NOT_OPER. For example: vfio_ccw_sch_probe: VFIO_CCW_STATE_NOT_OPER vfio_ccw_sch_probe: VFIO_CCW_STATE_STANDBY vfio_ccw_mdev_probe: VFIO_CCW_STATE_IDLE vfio_ccw_mdev_remove: VFIO_CCW_STATE_NOT_OPER vfio_ccw_sch_remove: VFIO_CCW_STATE_NOT_OPER vfio_ccw_sch_shutdown: VFIO_CCW_STATE_NOT_OPER Rearrange the open/close events to align with the mdev open/close, to better manage the memory and state of the devices as time progresses. Specifically, make mdev_open() perform the FSM open, and mdev_close() perform the FSM close instead of reset (which is both close and open). This makes the NOT_OPER state a dead-end path, indicating the device is probably not recoverable without fully probing and re-configuring the device. This has the nice side-effect of removing a number of special-cases where the FSM state is managed outside of the FSM itself (such as the aforementioned mdev_close() routine). Suggested-by: Jason Gunthorpe Signed-off-by: Eric Farman Reviewed-by: Jason Gunthorpe Reviewed-by: Matthew Rosato Link: https://lore.kernel.org/r/20220707135737.720765-12-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 11 +++-------- drivers/s390/cio/vfio_ccw_fsm.c | 34 +++++++++++++++++++++++++--------- drivers/s390/cio/vfio_ccw_ops.c | 26 +++++++++++--------------- 3 files changed, 39 insertions(+), 32 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index f98c9915e73d..4804101ccb0f 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -138,7 +138,7 @@ static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch) private->sch = sch; mutex_init(&private->io_mutex); - private->state = VFIO_CCW_STATE_NOT_OPER; + private->state = VFIO_CCW_STATE_STANDBY; INIT_LIST_HEAD(&private->crw); INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); @@ -222,21 +222,15 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) dev_set_drvdata(&sch->dev, private); - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OPEN); - if (private->state == VFIO_CCW_STATE_NOT_OPER) - goto out_free; - ret = mdev_register_device(&sch->dev, &vfio_ccw_mdev_driver); if (ret) - goto out_disable; + goto out_free; VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no); return 0; -out_disable: - cio_disable_subchannel(sch); out_free: dev_set_drvdata(&sch->dev, NULL); vfio_ccw_free_private(private); @@ -264,6 +258,7 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch) struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); } /** diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 89eb3feffa41..4b8b623df24f 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -175,6 +175,9 @@ static void fsm_notoper(struct vfio_ccw_private *private, */ css_sched_sch_todo(sch, SCH_TODO_UNREG); private->state = VFIO_CCW_STATE_NOT_OPER; + + /* This is usually handled during CLOSE event */ + cp_free(&private->cp); } /* @@ -379,9 +382,16 @@ static void fsm_open(struct vfio_ccw_private *private, spin_lock_irq(sch->lock); sch->isc = VFIO_CCW_ISC; ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); - if (!ret) - private->state = VFIO_CCW_STATE_STANDBY; + if (ret) + goto err_unlock; + + private->state = VFIO_CCW_STATE_IDLE; spin_unlock_irq(sch->lock); + return; + +err_unlock: + spin_unlock_irq(sch->lock); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); } static void fsm_close(struct vfio_ccw_private *private, @@ -393,16 +403,22 @@ static void fsm_close(struct vfio_ccw_private *private, spin_lock_irq(sch->lock); if (!sch->schib.pmcw.ena) - goto out_unlock; + goto err_unlock; ret = cio_disable_subchannel(sch); if (ret == -EBUSY) vfio_ccw_sch_quiesce(sch); + if (ret) + goto err_unlock; -out_unlock: - private->state = VFIO_CCW_STATE_NOT_OPER; + private->state = VFIO_CCW_STATE_STANDBY; spin_unlock_irq(sch->lock); cp_free(&private->cp); + return; + +err_unlock: + spin_unlock_irq(sch->lock); + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); } /* @@ -414,16 +430,16 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = { [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq, - [VFIO_CCW_EVENT_OPEN] = fsm_open, + [VFIO_CCW_EVENT_OPEN] = fsm_nop, [VFIO_CCW_EVENT_CLOSE] = fsm_nop, }, [VFIO_CCW_STATE_STANDBY] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error, [VFIO_CCW_EVENT_ASYNC_REQ] = fsm_async_error, - [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq, - [VFIO_CCW_EVENT_OPEN] = fsm_notoper, - [VFIO_CCW_EVENT_CLOSE] = fsm_close, + [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq, + [VFIO_CCW_EVENT_OPEN] = fsm_open, + [VFIO_CCW_EVENT_CLOSE] = fsm_notoper, }, [VFIO_CCW_STATE_IDLE] = { [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper, diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 4673b7ddfe20..bc2176421dc5 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -24,17 +24,12 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) /* * If the FSM state is seen as Not Operational after closing * and re-opening the mdev, return an error. - * - * Otherwise, change the FSM from STANDBY to IDLE which is - * normally done by vfio_ccw_mdev_probe() in current lifecycle. */ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OPEN); if (private->state == VFIO_CCW_STATE_NOT_OPER) return -EINVAL; - private->state = VFIO_CCW_STATE_IDLE; - return 0; } @@ -121,8 +116,6 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) vfio_init_group_dev(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops); - private->state = VFIO_CCW_STATE_IDLE; - VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", private->sch->schid.cssid, private->sch->schid.ssid, @@ -137,7 +130,6 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) err_atomic: vfio_uninit_group_dev(&private->vdev); atomic_inc(&private->avail); - private->state = VFIO_CCW_STATE_STANDBY; return ret; } @@ -165,6 +157,10 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; int ret; + /* Device cannot simply be opened again from this state */ + if (private->state == VFIO_CCW_STATE_NOT_OPER) + return -EINVAL; + private->nb.notifier_call = vfio_ccw_mdev_notifier; ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, @@ -184,6 +180,12 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) if (ret) goto out_unregister; + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OPEN); + if (private->state == VFIO_CCW_STATE_NOT_OPER) { + ret = -EINVAL; + goto out_unregister; + } + return ret; out_unregister: @@ -197,13 +199,7 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - if ((private->state != VFIO_CCW_STATE_NOT_OPER) && - (private->state != VFIO_CCW_STATE_STANDBY)) { - if (!vfio_ccw_mdev_reset(private)) - private->state = VFIO_CCW_STATE_STANDBY; - /* The state will be NOT_OPER on error. */ - } - + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_ccw_unregister_dev_regions(private); vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); } -- cgit 1.4.1 From ce4b4657ff18925c315855aa290e93c5fa652d96 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 19 Jul 2022 21:02:48 -0300 Subject: vfio: Replace the DMA unmapping notifier with a callback Instead of having drivers register the notifier with explicit code just have them provide a dma_unmap callback op in their driver ops and rely on the core code to wire it up. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Kevin Tian Reviewed-by: Tony Krowiak Reviewed-by: Eric Farman Reviewed-by: Zhenyu Wang Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com Signed-off-by: Alex Williamson --- drivers/gpu/drm/i915/gvt/gvt.h | 1 - drivers/gpu/drm/i915/gvt/kvmgt.c | 75 +++++--------------- drivers/s390/cio/vfio_ccw_ops.c | 39 +++------- drivers/s390/cio/vfio_ccw_private.h | 2 - drivers/s390/crypto/vfio_ap_ops.c | 53 +++----------- drivers/s390/crypto/vfio_ap_private.h | 3 - drivers/vfio/vfio.c | 129 ++++++++++++---------------------- drivers/vfio/vfio.h | 3 + include/linux/vfio.h | 21 ++---- 9 files changed, 86 insertions(+), 240 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index aee1a45da74b..705689e64011 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -226,7 +226,6 @@ struct intel_vgpu { unsigned long nr_cache_entries; struct mutex cache_lock; - struct notifier_block iommu_notifier; atomic_t released; struct kvm_page_track_notifier_node track_node; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e2f6c56ab342..ecd5bb37b63a 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) return ret; } -static int intel_vgpu_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova, + u64 length) { - struct intel_vgpu *vgpu = - container_of(nb, struct intel_vgpu, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - struct gvt_dma *entry; - unsigned long iov_pfn, end_iov_pfn; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); + struct gvt_dma *entry; + u64 iov_pfn = iova >> PAGE_SHIFT; + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE; - iov_pfn = unmap->iova >> PAGE_SHIFT; - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; + mutex_lock(&vgpu->cache_lock); + for (; iov_pfn < end_iov_pfn; iov_pfn++) { + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); + if (!entry) + continue; - mutex_lock(&vgpu->cache_lock); - for (; iov_pfn < end_iov_pfn; iov_pfn++) { - entry = __gvt_cache_find_gfn(vgpu, iov_pfn); - if (!entry) - continue; - - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, - entry->size); - __gvt_cache_remove_entry(vgpu, entry); - } - mutex_unlock(&vgpu->cache_lock); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); + __gvt_cache_remove_entry(vgpu, entry); } - - return NOTIFY_OK; + mutex_unlock(&vgpu->cache_lock); } static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) @@ -783,36 +774,20 @@ out: static int intel_vgpu_open_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - unsigned long events; - int ret; - - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, - &vgpu->iommu_notifier); - if (ret != 0) { - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", - ret); - goto out; - } - - ret = -EEXIST; if (vgpu->attached) - goto undo_iommu; + return -EEXIST; - ret = -ESRCH; if (!vgpu->vfio_device.kvm || vgpu->vfio_device.kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); - goto undo_iommu; + return -ESRCH; } kvm_get_kvm(vgpu->vfio_device.kvm); - ret = -EEXIST; if (__kvmgt_vgpu_exist(vgpu)) - goto undo_iommu; + return -EEXIST; vgpu->attached = true; @@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) atomic_set(&vgpu->released, 0); return 0; - -undo_iommu: - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); -out: - return ret; } static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) @@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - int ret; if (!vgpu->attached) return; @@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) intel_gvt_release_vgpu(vgpu); - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); - drm_WARN(&i915->drm, ret, - "vfio_unregister_notifier for iommu failed: %d\n", ret); - debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, @@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .write = intel_vgpu_write, .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, + .dma_unmap = intel_vgpu_dma_unmap, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index bc2176421dc5..0047fd88f938 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -33,30 +33,16 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private) return 0; } -static int vfio_ccw_mdev_notifier(struct notifier_block *nb, - unsigned long action, - void *data) +static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) { struct vfio_ccw_private *private = - container_of(nb, struct vfio_ccw_private, nb); - - /* - * Vendor drivers MUST unpin pages in response to an - * invalidation. - */ - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - - if (!cp_iova_pinned(&private->cp, unmap->iova)) - return NOTIFY_OK; - - if (vfio_ccw_mdev_reset(private)) - return NOTIFY_BAD; + container_of(vdev, struct vfio_ccw_private, vdev); - return NOTIFY_OK; - } + /* Drivers MUST unpin pages in response to an invalidation. */ + if (!cp_iova_pinned(&private->cp, iova)) + return; - return NOTIFY_DONE; + vfio_ccw_mdev_reset(private); } static ssize_t name_show(struct mdev_type *mtype, @@ -154,23 +140,15 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) { struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; int ret; /* Device cannot simply be opened again from this state */ if (private->state == VFIO_CCW_STATE_NOT_OPER) return -EINVAL; - private->nb.notifier_call = vfio_ccw_mdev_notifier; - - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, - &events, &private->nb); - if (ret) - return ret; - ret = vfio_ccw_register_async_dev_regions(private); if (ret) - goto out_unregister; + return ret; ret = vfio_ccw_register_schib_dev_regions(private); if (ret) @@ -190,7 +168,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) out_unregister: vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); return ret; } @@ -201,7 +178,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev) vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_ccw_unregister_dev_regions(private); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb); } static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private, @@ -624,6 +600,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .write = vfio_ccw_mdev_write, .ioctl = vfio_ccw_mdev_ioctl, .request = vfio_ccw_mdev_request, + .dma_unmap = vfio_ccw_dma_unmap, }; struct mdev_driver vfio_ccw_mdev_driver = { diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index abac532bf03e..cd24b7fada91 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -73,7 +73,6 @@ struct vfio_ccw_crw { * @state: internal state of the device * @completion: synchronization helper of the I/O completion * @avail: available for creating a mediated device - * @nb: notifier for vfio events * @io_region: MMIO region to input/output I/O arguments/results * @io_mutex: protect against concurrent update of I/O regions * @region: additional regions for other subchannel operations @@ -96,7 +95,6 @@ struct vfio_ccw_private { int state; struct completion *completion; atomic_t avail; - struct notifier_block nb; struct ccw_io_region *io_region; struct mutex io_mutex; struct vfio_ccw_region *region; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index a7d2a95796d3..bb1a1677c5c2 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, return 0; } -/** - * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback - * - * @nb: The notifier block - * @action: Action to be taken - * @data: data associated with the request - * - * For an UNMAP request, unpin the guest IOVA (the NIB guest address we - * pinned before). Other requests are ignored. - * - * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE. - */ -static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, + u64 length) { - struct ap_matrix_mdev *matrix_mdev; - - matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - unsigned long g_pfn = unmap->iova >> PAGE_SHIFT; - - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); - return NOTIFY_OK; - } + struct ap_matrix_mdev *matrix_mdev = + container_of(vdev, struct ap_matrix_mdev, vdev); + unsigned long g_pfn = iova >> PAGE_SHIFT; - return NOTIFY_DONE; + vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); } /** @@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - unsigned long events; - int ret; if (!vdev->kvm) return -EINVAL; - ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); - if (ret) - return ret; - - matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier; - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, - &matrix_mdev->iommu_notifier); - if (ret) - goto err_kvm; - return 0; - -err_kvm: - vfio_ap_mdev_unset_kvm(matrix_mdev); - return ret; + return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); } static void vfio_ap_mdev_close_device(struct vfio_device *vdev) @@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, - &matrix_mdev->iommu_notifier); vfio_ap_mdev_unset_kvm(matrix_mdev); } @@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .open_device = vfio_ap_mdev_open_device, .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, + .dma_unmap = vfio_ap_mdev_dma_unmap, }; static struct mdev_driver vfio_ap_matrix_driver = { diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index a26efd804d0d..abb59d59f81b 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -81,8 +81,6 @@ struct ap_matrix { * @node: allows the ap_matrix_mdev struct to be added to a list * @matrix: the adapters, usage domains and control domains assigned to the * mediated matrix device. - * @iommu_notifier: notifier block used for specifying callback function for - * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even * @kvm: the struct holding guest's state * @pqap_hook: the function pointer to the interception handler for the * PQAP(AQIC) instruction. @@ -92,7 +90,6 @@ struct ap_matrix_mdev { struct vfio_device vdev; struct list_head node; struct ap_matrix matrix; - struct notifier_block iommu_notifier; struct kvm *kvm; crypto_hook pqap_hook; struct mdev_device *mdev; diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index bd84ca7c5e35..83c375fa2421 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops) { struct vfio_iommu_driver *driver, *tmp; + if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier)) + return -EINVAL; + driver = kzalloc(sizeof(*driver), GFP_KERNEL); if (!driver) return -ENOMEM; @@ -1079,8 +1082,20 @@ static void vfio_device_unassign_container(struct vfio_device *device) up_write(&device->group->group_rwsem); } +static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct vfio_device *vfio_device = + container_of(nb, struct vfio_device, iommu_nb); + struct vfio_iommu_type1_dma_unmap *unmap = data; + + vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size); + return NOTIFY_OK; +} + static struct file *vfio_device_open(struct vfio_device *device) { + struct vfio_iommu_driver *iommu_driver; struct file *filep; int ret; @@ -1111,6 +1126,18 @@ static struct file *vfio_device_open(struct vfio_device *device) if (ret) goto err_undo_count; } + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->register_notifier) { + unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; + + device->iommu_nb.notifier_call = vfio_iommu_notifier; + iommu_driver->ops->register_notifier( + device->group->container->iommu_data, &events, + &device->iommu_nb); + } + up_read(&device->group->group_rwsem); } mutex_unlock(&device->dev_set->lock); @@ -1145,8 +1172,16 @@ static struct file *vfio_device_open(struct vfio_device *device) err_close_device: mutex_lock(&device->dev_set->lock); down_read(&device->group->group_rwsem); - if (device->open_count == 1 && device->ops->close_device) + if (device->open_count == 1 && device->ops->close_device) { device->ops->close_device(device); + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->unregister_notifier) + iommu_driver->ops->unregister_notifier( + device->group->container->iommu_data, + &device->iommu_nb); + } err_undo_count: up_read(&device->group->group_rwsem); device->open_count--; @@ -1341,12 +1376,20 @@ static const struct file_operations vfio_group_fops = { static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; + struct vfio_iommu_driver *iommu_driver; mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); down_read(&device->group->group_rwsem); if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); + + iommu_driver = device->group->container->iommu_driver; + if (device->ops->dma_unmap && iommu_driver && + iommu_driver->ops->unregister_notifier) + iommu_driver->ops->unregister_notifier( + device->group->container->iommu_data, + &device->iommu_nb); up_read(&device->group->group_rwsem); device->open_count--; if (device->open_count == 0) @@ -2029,90 +2072,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, } EXPORT_SYMBOL(vfio_dma_rw); -static int vfio_register_iommu_notifier(struct vfio_group *group, - unsigned long *events, - struct notifier_block *nb) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - lockdep_assert_held_read(&group->group_rwsem); - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->register_notifier)) - ret = driver->ops->register_notifier(container->iommu_data, - events, nb); - else - ret = -ENOTTY; - - return ret; -} - -static int vfio_unregister_iommu_notifier(struct vfio_group *group, - struct notifier_block *nb) -{ - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret; - - lockdep_assert_held_read(&group->group_rwsem); - - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->unregister_notifier)) - ret = driver->ops->unregister_notifier(container->iommu_data, - nb); - else - ret = -ENOTTY; - - return ret; -} - -int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, unsigned long *events, - struct notifier_block *nb) -{ - struct vfio_group *group = device->group; - int ret; - - if (!nb || !events || (*events == 0) || - !vfio_assert_device_open(device)) - return -EINVAL; - - switch (type) { - case VFIO_IOMMU_NOTIFY: - ret = vfio_register_iommu_notifier(group, events, nb); - break; - default: - ret = -EINVAL; - } - return ret; -} -EXPORT_SYMBOL(vfio_register_notifier); - -int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb) -{ - struct vfio_group *group = device->group; - int ret; - - if (!nb || !vfio_assert_device_open(device)) - return -EINVAL; - - switch (type) { - case VFIO_IOMMU_NOTIFY: - ret = vfio_unregister_iommu_notifier(group, nb); - break; - default: - ret = -EINVAL; - } - return ret; -} -EXPORT_SYMBOL(vfio_unregister_notifier); - /* * Module/class support */ diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index a67130221151..25da02ca1568 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -33,6 +33,9 @@ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, }; +/* events for register_notifier() */ +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) + /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 4d26e149db81..1f9fc7a9be9e 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -49,6 +49,7 @@ struct vfio_device { unsigned int open_count; struct completion comp; struct list_head group_next; + struct notifier_block iommu_nb; }; /** @@ -65,6 +66,8 @@ struct vfio_device { * @match: Optional device name match callback (return: 0 for no-match, >0 for * match, -errno for abort (ex. match with insufficient or incorrect * additional args) + * @dma_unmap: Called when userspace unmaps IOVA from the container + * this device is attached to. * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl */ struct vfio_device_ops { @@ -80,6 +83,7 @@ struct vfio_device_ops { int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma); void (*request)(struct vfio_device *vdev, unsigned int count); int (*match)(struct vfio_device *vdev, char *buf); + void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); }; @@ -164,23 +168,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -/* each type has independent events */ -enum vfio_notify_type { - VFIO_IOMMU_NOTIFY = 0, -}; - -/* events for VFIO_IOMMU_NOTIFY */ -#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) - -int vfio_register_notifier(struct vfio_device *device, - enum vfio_notify_type type, - unsigned long *required_events, - struct notifier_block *nb); -int vfio_unregister_notifier(struct vfio_device *device, - enum vfio_notify_type type, - struct notifier_block *nb); - - /* * Sub-module helpers */ -- cgit 1.4.1 From 10e19d492a326afe7f016a8735ccdfd7c65fc979 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:49 -0700 Subject: vfio/ap: Pass in physical address of ind to ap_aqic() The ap_aqic() is called by vfio_ap_irq_enable() where it passes in a virt value that's casted from a physical address "h_nib". Inside the ap_aqic(), it does virt_to_phys() again. Since ap_aqic() needs a physical address, let's just pass in a pa of ind directly. So change the "ind" to "pa_ind". Reviewed-by: Harald Freudenberger Reviewed-by: Jason Gunthorpe Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-4-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- arch/s390/include/asm/ap.h | 6 +++--- drivers/s390/crypto/ap_queue.c | 2 +- drivers/s390/crypto/vfio_ap_ops.c | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/s390') diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index b515cfa62bd9..f508f5025e38 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -227,13 +227,13 @@ struct ap_qirq_ctrl { * ap_aqic(): Control interruption for a specific AP. * @qid: The AP queue number * @qirqctrl: struct ap_qirq_ctrl (64 bit value) - * @ind: The notification indicator byte + * @pa_ind: Physical address of the notification indicator byte * * Returns AP queue status. */ static inline struct ap_queue_status ap_aqic(ap_qid_t qid, struct ap_qirq_ctrl qirqctrl, - void *ind) + phys_addr_t pa_ind) { unsigned long reg0 = qid | (3UL << 24); /* fc 3UL is AQIC */ union { @@ -241,7 +241,7 @@ static inline struct ap_queue_status ap_aqic(ap_qid_t qid, struct ap_qirq_ctrl qirqctrl; struct ap_queue_status status; } reg1; - unsigned long reg2 = virt_to_phys(ind); + unsigned long reg2 = pa_ind; reg1.qirqctrl = qirqctrl; diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index c48b0db824e3..a32457b4cbb8 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -34,7 +34,7 @@ static int ap_queue_enable_irq(struct ap_queue *aq, void *ind) qirqctrl.ir = 1; qirqctrl.isc = AP_ISC; - status = ap_aqic(aq->qid, qirqctrl, ind); + status = ap_aqic(aq->qid, qirqctrl, virt_to_phys(ind)); switch (status.response_code) { case AP_RESPONSE_NORMAL: case AP_RESPONSE_OTHERWISE_CHANGED: diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index bb1a1677c5c2..5781059d3ed2 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -154,7 +154,7 @@ static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) int retries = 5; do { - status = ap_aqic(q->apqn, aqic_gisa, NULL); + status = ap_aqic(q->apqn, aqic_gisa, 0); switch (status.response_code) { case AP_RESPONSE_OTHERWISE_CHANGED: case AP_RESPONSE_NORMAL: @@ -245,7 +245,8 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, struct kvm_s390_gisa *gisa; int nisc; struct kvm *kvm; - unsigned long h_nib, g_pfn, h_pfn; + unsigned long g_pfn, h_pfn; + phys_addr_t h_nib; int ret; /* Verify that the notification indicator byte address is valid */ @@ -290,7 +291,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, aqic_gisa.ir = 1; aqic_gisa.gisa = (uint64_t)gisa >> 4; - status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib); + status = ap_aqic(q->apqn, aqic_gisa, h_nib); switch (status.response_code) { case AP_RESPONSE_NORMAL: /* See if we did clear older IRQ configuration */ -- cgit 1.4.1 From cfedb3d5e602dbf12e254cf88aceac348342f9b2 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:50 -0700 Subject: vfio/ccw: Only pass in contiguous pages This driver is the only caller of vfio_pin/unpin_pages that might pass in a non-contiguous PFN list, but in many cases it has a contiguous PFN list to process. So letting VFIO API handle a non-contiguous PFN list is actually counterproductive. Add a pair of simple loops to pass in contiguous PFNs only, to have an efficient implementation in VFIO. Reviewed-by: Jason Gunthorpe Reviewed-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-5-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_cp.c | 70 +++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 14 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 0c2be9421ab7..3b94863ad24e 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -90,6 +90,38 @@ static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len) return 0; } +/* + * pfn_array_unpin() - Unpin user pages in memory + * @pa: pfn_array on which to perform the operation + * @vdev: the vfio device to perform the operation + * @pa_nr: number of user pages to unpin + * + * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, + * otherwise only clear pa->pa_nr + */ +static void pfn_array_unpin(struct pfn_array *pa, + struct vfio_device *vdev, int pa_nr) +{ + int unpinned = 0, npage = 1; + + while (unpinned < pa_nr) { + unsigned long *first = &pa->pa_iova_pfn[unpinned]; + unsigned long *last = &first[npage]; + + if (unpinned + npage < pa_nr && + *first + npage == *last) { + npage++; + continue; + } + + vfio_unpin_pages(vdev, first, npage); + unpinned += npage; + npage = 1; + } + + pa->pa_nr = 0; +} + /* * pfn_array_pin() - Pin user pages in memory * @pa: pfn_array on which to perform the operation @@ -101,34 +133,44 @@ static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len) */ static int pfn_array_pin(struct pfn_array *pa, struct vfio_device *vdev) { + int pinned = 0, npage = 1; int ret = 0; - ret = vfio_pin_pages(vdev, pa->pa_iova_pfn, pa->pa_nr, - IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); + while (pinned < pa->pa_nr) { + unsigned long *first = &pa->pa_iova_pfn[pinned]; + unsigned long *last = &first[npage]; - if (ret < 0) { - goto err_out; - } else if (ret > 0 && ret != pa->pa_nr) { - vfio_unpin_pages(vdev, pa->pa_iova_pfn, ret); - ret = -EINVAL; - goto err_out; + if (pinned + npage < pa->pa_nr && + *first + npage == *last) { + npage++; + continue; + } + + ret = vfio_pin_pages(vdev, first, npage, + IOMMU_READ | IOMMU_WRITE, + &pa->pa_pfn[pinned]); + if (ret < 0) { + goto err_out; + } else if (ret > 0 && ret != npage) { + pinned += ret; + ret = -EINVAL; + goto err_out; + } + pinned += npage; + npage = 1; } return ret; err_out: - pa->pa_nr = 0; - + pfn_array_unpin(pa, vdev, pinned); return ret; } /* Unpin the pages before releasing the memory. */ static void pfn_array_unpin_free(struct pfn_array *pa, struct vfio_device *vdev) { - /* Only unpin if any pages were pinned to begin with */ - if (pa->pa_nr) - vfio_unpin_pages(vdev, pa->pa_iova_pfn, pa->pa_nr); - pa->pa_nr = 0; + pfn_array_unpin(pa, vdev, pa->pa_nr); kfree(pa->pa_iova_pfn); } -- cgit 1.4.1 From 44abdd1646e1fbfb781972c0bffc90b4eb3e87b3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:51 -0700 Subject: vfio: Pass in starting IOVA to vfio_pin/unpin_pages API The vfio_pin/unpin_pages() so far accepted arrays of PFNs of user IOVA. Among all three callers, there was only one caller possibly passing in a non-contiguous PFN list, which is now ensured to have contiguous PFN inputs too. Pass in the starting address with "iova" alone to simplify things, so callers no longer need to maintain a PFN list or to pin/unpin one page at a time. This also allows VFIO to use more efficient implementations of pin/unpin_pages. For now, also update vfio_iommu_type1 to fit this new parameter too, while keeping its input intact (being user_iova) since we don't want to spend too much effort swapping its parameters and local variables at that level. Reviewed-by: Christoph Hellwig Reviewed-by: Kirti Wankhede Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Tony Krowiak Acked-by: Eric Farman Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-6-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- Documentation/driver-api/vfio-mediated-device.rst | 4 ++-- drivers/gpu/drm/i915/gvt/kvmgt.c | 18 +++++---------- drivers/s390/cio/vfio_ccw_cp.c | 4 ++-- drivers/s390/crypto/vfio_ap_ops.c | 9 ++++---- drivers/vfio/vfio.c | 27 ++++++++++------------- drivers/vfio/vfio.h | 4 ++-- drivers/vfio/vfio_iommu_type1.c | 15 ++++++------- include/linux/vfio.h | 5 ++--- 8 files changed, 37 insertions(+), 49 deletions(-) (limited to 'drivers/s390') diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst index b0fdf76b339a..ea32a0f13ddb 100644 --- a/Documentation/driver-api/vfio-mediated-device.rst +++ b/Documentation/driver-api/vfio-mediated-device.rst @@ -262,10 +262,10 @@ Translation APIs for Mediated Devices The following APIs are provided for translating user pfn to host pfn in a VFIO driver:: - int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, + int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn); - void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, + void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); These functions call back into the back-end IOMMU module by using the pin_pages diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 2fee5695515a..8be75c282611 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -231,14 +231,8 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { - int total_pages = DIV_ROUND_UP(size, PAGE_SIZE); - int npage; - - for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; - - vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1); - } + vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT, + DIV_ROUND_UP(size, PAGE_SIZE)); } /* Pin a normal or compound guest page for dma. */ @@ -255,14 +249,14 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, * on stack to hold pfns. */ for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; + dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT; unsigned long pfn; - ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1, + ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1, IOMMU_READ | IOMMU_WRITE, &pfn); if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", - cur_gfn, ret); + gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n", + &cur_iova, ret); goto err; } diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 3b94863ad24e..a739262f988d 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -114,7 +114,7 @@ static void pfn_array_unpin(struct pfn_array *pa, continue; } - vfio_unpin_pages(vdev, first, npage); + vfio_unpin_pages(vdev, *first << PAGE_SHIFT, npage); unpinned += npage; npage = 1; } @@ -146,7 +146,7 @@ static int pfn_array_pin(struct pfn_array *pa, struct vfio_device *vdev) continue; } - ret = vfio_pin_pages(vdev, first, npage, + ret = vfio_pin_pages(vdev, *first << PAGE_SHIFT, npage, IOMMU_READ | IOMMU_WRITE, &pa->pa_pfn[pinned]); if (ret < 0) { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 5781059d3ed2..70f484668ca0 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -124,7 +124,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) q->saved_isc = VFIO_AP_ISC_INVALID; } if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { - vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1); + vfio_unpin_pages(&q->matrix_mdev->vdev, q->saved_pfn << PAGE_SHIFT, 1); q->saved_pfn = 0; } } @@ -258,7 +258,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, return status; } - ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1, + ret = vfio_pin_pages(&q->matrix_mdev->vdev, g_pfn << PAGE_SHIFT, 1, IOMMU_READ | IOMMU_WRITE, &h_pfn); switch (ret) { case 1: @@ -301,7 +301,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, break; case AP_RESPONSE_OTHERWISE_CHANGED: /* We could not modify IRQ setings: clear new configuration */ - vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1); + vfio_unpin_pages(&q->matrix_mdev->vdev, g_pfn << PAGE_SHIFT, 1); kvm_s390_gisc_unregister(kvm, isc); break; default: @@ -1232,9 +1232,8 @@ static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - unsigned long g_pfn = iova >> PAGE_SHIFT; - vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1); + vfio_unpin_pages(&matrix_mdev->vdev, iova, 1); } /** diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 92b10aafae28..ffd1a492eea9 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1934,17 +1934,17 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* - * Pin a set of guest PFNs and return their associated host PFNs for local + * Pin contiguous user pages and return their associated host pages for local * domain only. * @device [in] : device - * @user_pfn [in]: array of user/guest PFNs to be pinned. - * @npage [in] : count of elements in user_pfn array. This count should not - * be greater VFIO_PIN_PAGES_MAX_ENTRIES. + * @iova [in] : starting IOVA of user pages to be pinned. + * @npage [in] : count of pages to be pinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @prot [in] : protection flags * @phys_pfn[out]: array of host PFNs * Return error or number of pages pinned. */ -int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn) { struct vfio_container *container; @@ -1952,8 +1952,7 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, struct vfio_iommu_driver *driver; int ret; - if (!user_pfn || !phys_pfn || !npage || - !vfio_assert_device_open(device)) + if (!phys_pfn || !npage || !vfio_assert_device_open(device)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) @@ -1967,7 +1966,7 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, driver = container->iommu_driver; if (likely(driver && driver->ops->pin_pages)) ret = driver->ops->pin_pages(container->iommu_data, - group->iommu_group, user_pfn, + group->iommu_group, iova, npage, prot, phys_pfn); else ret = -ENOTTY; @@ -1977,15 +1976,13 @@ int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, EXPORT_SYMBOL(vfio_pin_pages); /* - * Unpin set of host PFNs for local domain only. + * Unpin contiguous host pages for local domain only. * @device [in] : device - * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest - * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - * @npage [in] : count of elements in user_pfn array. This count should not + * @iova [in] : starting address of user pages to be unpinned. + * @npage [in] : count of pages to be unpinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. */ -void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage) +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) { struct vfio_container *container; struct vfio_iommu_driver *driver; @@ -2000,7 +1997,7 @@ void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, container = device->group->container; driver = container->iommu_driver; - driver->ops->unpin_pages(container->iommu_data, user_pfn, npage); + driver->ops->unpin_pages(container->iommu_data, iova, npage); } EXPORT_SYMBOL(vfio_unpin_pages); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index 6a8424b407c7..e9767e13f00f 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -50,11 +50,11 @@ struct vfio_iommu_driver_ops { struct iommu_group *group); int (*pin_pages)(void *iommu_data, struct iommu_group *group, - unsigned long *user_pfn, + dma_addr_t user_iova, int npage, int prot, unsigned long *phys_pfn); void (*unpin_pages)(void *iommu_data, - unsigned long *user_pfn, int npage); + dma_addr_t user_iova, int npage); void (*register_device)(void *iommu_data, struct vfio_device *vdev); void (*unregister_device)(void *iommu_data, diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e49fbe9968ef..e629e059118c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -829,7 +829,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, static int vfio_iommu_type1_pin_pages(void *iommu_data, struct iommu_group *iommu_group, - unsigned long *user_pfn, + dma_addr_t user_iova, int npage, int prot, unsigned long *phys_pfn) { @@ -841,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, bool do_accounting; dma_addr_t iova; - if (!iommu || !user_pfn || !phys_pfn) + if (!iommu || !phys_pfn) return -EINVAL; /* Supported for v2 version only */ @@ -857,7 +857,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, again: if (iommu->vaddr_invalid_count) { for (i = 0; i < npage; i++) { - iova = user_pfn[i] << PAGE_SHIFT; + iova = user_iova + PAGE_SIZE * i; ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma); if (ret < 0) goto pin_done; @@ -882,7 +882,7 @@ again: for (i = 0; i < npage; i++) { struct vfio_pfn *vpfn; - iova = user_pfn[i] << PAGE_SHIFT; + iova = user_iova + PAGE_SIZE * i; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); if (!dma) { ret = -EINVAL; @@ -939,7 +939,7 @@ pin_unwind: for (j = 0; j < i; j++) { dma_addr_t iova; - iova = user_pfn[j] << PAGE_SHIFT; + iova = user_iova + PAGE_SIZE * j; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); vfio_unpin_page_external(dma, iova, do_accounting); phys_pfn[j] = 0; @@ -950,7 +950,7 @@ pin_done: } static void vfio_iommu_type1_unpin_pages(void *iommu_data, - unsigned long *user_pfn, int npage) + dma_addr_t user_iova, int npage) { struct vfio_iommu *iommu = iommu_data; bool do_accounting; @@ -964,10 +964,9 @@ static void vfio_iommu_type1_unpin_pages(void *iommu_data, do_accounting = list_empty(&iommu->domain_list); for (i = 0; i < npage; i++) { + dma_addr_t iova = user_iova + PAGE_SIZE * i; struct vfio_dma *dma; - dma_addr_t iova; - iova = user_pfn[i] << PAGE_SHIFT; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); if (!dma) break; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 9f7d74c24925..9e3b6abcf890 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -161,10 +161,9 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) -int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn, +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, int npage, int prot, unsigned long *phys_pfn); -void vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn, - int npage); +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data, size_t len, bool write); -- cgit 1.4.1 From 3fad3a26139d5a41bc6b20df47ff067f5db2fe75 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:52 -0700 Subject: vfio/ap: Change saved_pfn to saved_iova The vfio_ap_ops code maintains both nib address and its PFN, which is redundant, merely because vfio_pin/unpin_pages API wanted pfn. Since vfio_pin/unpin_pages() now accept "iova", change "saved_pfn" to "saved_iova" and remove pfn in the vfio_ap_validate_nib(). Reviewed-by: Jason Gunthorpe Reviewed-by: Tony Krowiak Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-7-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- drivers/s390/crypto/vfio_ap_ops.c | 42 ++++++++++++++--------------------- drivers/s390/crypto/vfio_ap_private.h | 4 ++-- 2 files changed, 19 insertions(+), 27 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 70f484668ca0..d7c38c82f694 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -112,7 +112,7 @@ static void vfio_ap_wait_for_irqclear(int apqn) * * Unregisters the ISC in the GIB when the saved ISC not invalid. * Unpins the guest's page holding the NIB when it exists. - * Resets the saved_pfn and saved_isc to invalid values. + * Resets the saved_iova and saved_isc to invalid values. */ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) { @@ -123,9 +123,9 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); q->saved_isc = VFIO_AP_ISC_INVALID; } - if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { - vfio_unpin_pages(&q->matrix_mdev->vdev, q->saved_pfn << PAGE_SHIFT, 1); - q->saved_pfn = 0; + if (q->saved_iova && !WARN_ON(!q->matrix_mdev)) { + vfio_unpin_pages(&q->matrix_mdev->vdev, q->saved_iova, 1); + q->saved_iova = 0; } } @@ -189,27 +189,19 @@ end_free: * * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction. * @nib: the location for storing the nib address. - * @g_pfn: the location for storing the page frame number of the page containing - * the nib. * * When the PQAP(AQIC) instruction is executed, general register 2 contains the * address of the notification indicator byte (nib) used for IRQ notification. - * This function parses the nib from gr2 and calculates the page frame - * number for the guest of the page containing the nib. The values are - * stored in @nib and @g_pfn respectively. - * - * The g_pfn of the nib is then validated to ensure the nib address is valid. + * This function parses and validates the nib from gr2. * * Return: returns zero if the nib address is a valid; otherwise, returns * -EINVAL. */ -static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib, - unsigned long *g_pfn) +static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib) { *nib = vcpu->run->s.regs.gprs[2]; - *g_pfn = *nib >> PAGE_SHIFT; - if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn))) + if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *nib >> PAGE_SHIFT))) return -EINVAL; return 0; @@ -239,34 +231,34 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, int isc, struct kvm_vcpu *vcpu) { - unsigned long nib; struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; int nisc; struct kvm *kvm; - unsigned long g_pfn, h_pfn; + unsigned long h_pfn; phys_addr_t h_nib; + dma_addr_t nib; int ret; /* Verify that the notification indicator byte address is valid */ - if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) { - VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", - __func__, nib, g_pfn, q->apqn); + if (vfio_ap_validate_nib(vcpu, &nib)) { + VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%pad, apqn=%#04x\n", + __func__, &nib, q->apqn); status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status; } - ret = vfio_pin_pages(&q->matrix_mdev->vdev, g_pfn << PAGE_SHIFT, 1, + ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1, IOMMU_READ | IOMMU_WRITE, &h_pfn); switch (ret) { case 1: break; default: VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d," - "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n", - __func__, ret, nib, g_pfn, q->apqn); + "nib=%pad, apqn=%#04x\n", + __func__, ret, &nib, q->apqn); status.response_code = AP_RESPONSE_INVALID_ADDRESS; return status; @@ -296,12 +288,12 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, case AP_RESPONSE_NORMAL: /* See if we did clear older IRQ configuration */ vfio_ap_free_aqic_resources(q); - q->saved_pfn = g_pfn; + q->saved_iova = nib; q->saved_isc = isc; break; case AP_RESPONSE_OTHERWISE_CHANGED: /* We could not modify IRQ setings: clear new configuration */ - vfio_unpin_pages(&q->matrix_mdev->vdev, g_pfn << PAGE_SHIFT, 1); + vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1); kvm_s390_gisc_unregister(kvm, isc); break; default: diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index abb59d59f81b..d912487175d3 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -99,13 +99,13 @@ struct ap_matrix_mdev { * struct vfio_ap_queue - contains the data associated with a queue bound to the * vfio_ap device driver * @matrix_mdev: the matrix mediated device - * @saved_pfn: the guest PFN pinned for the guest + * @saved_iova: the notification indicator byte (nib) address * @apqn: the APQN of the AP queue device * @saved_isc: the guest ISC registered with the GIB interface */ struct vfio_ap_queue { struct ap_matrix_mdev *matrix_mdev; - unsigned long saved_pfn; + dma_addr_t saved_iova; int apqn; #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; -- cgit 1.4.1 From 1331460514ff7d378373223109eefae4cd55ec77 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:53 -0700 Subject: vfio/ccw: Change pa_pfn list to pa_iova list The vfio_ccw_cp code maintains both iova and its PFN list because the vfio_pin/unpin_pages API wanted pfn list. Since vfio_pin/unpin_pages() now accept "iova", change to maintain only pa_iova list and rename all "pfn_array" strings to "page_array", so as to simplify the code. Reviewed-by: Jason Gunthorpe Reviewed-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-8-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_cp.c | 135 +++++++++++++++++++---------------------- 1 file changed, 64 insertions(+), 71 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index a739262f988d..3854c3d573f5 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -18,11 +18,9 @@ #include "vfio_ccw_cp.h" #include "vfio_ccw_private.h" -struct pfn_array { - /* Starting guest physical I/O address. */ - unsigned long pa_iova; - /* Array that stores PFNs of the pages need to pin. */ - unsigned long *pa_iova_pfn; +struct page_array { + /* Array that stores pages need to pin. */ + dma_addr_t *pa_iova; /* Array that receives PFNs of the pages pinned. */ unsigned long *pa_pfn; /* Number of pages pinned from @pa_iova. */ @@ -37,53 +35,50 @@ struct ccwchain { /* Count of the valid ccws in chain. */ int ch_len; /* Pinned PAGEs for the original data. */ - struct pfn_array *ch_pa; + struct page_array *ch_pa; }; /* - * pfn_array_alloc() - alloc memory for PFNs - * @pa: pfn_array on which to perform the operation + * page_array_alloc() - alloc memory for page array + * @pa: page_array on which to perform the operation * @iova: target guest physical address * @len: number of bytes that should be pinned from @iova * - * Attempt to allocate memory for PFNs. + * Attempt to allocate memory for page array. * - * Usage of pfn_array: - * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in + * Usage of page_array: + * We expect (pa_nr == 0) and (pa_iova == NULL), any field in * this structure will be filled in by this function. * * Returns: - * 0 if PFNs are allocated - * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL + * 0 if page array is allocated + * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL * -ENOMEM if alloc failed */ -static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len) +static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len) { int i; - if (pa->pa_nr || pa->pa_iova_pfn) + if (pa->pa_nr || pa->pa_iova) return -EINVAL; - pa->pa_iova = iova; - pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (!pa->pa_nr) return -EINVAL; - pa->pa_iova_pfn = kcalloc(pa->pa_nr, - sizeof(*pa->pa_iova_pfn) + - sizeof(*pa->pa_pfn), - GFP_KERNEL); - if (unlikely(!pa->pa_iova_pfn)) { + pa->pa_iova = kcalloc(pa->pa_nr, + sizeof(*pa->pa_iova) + sizeof(*pa->pa_pfn), + GFP_KERNEL); + if (unlikely(!pa->pa_iova)) { pa->pa_nr = 0; return -ENOMEM; } - pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr; + pa->pa_pfn = (unsigned long *)&pa->pa_iova[pa->pa_nr]; - pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; + pa->pa_iova[0] = iova; pa->pa_pfn[0] = -1ULL; for (i = 1; i < pa->pa_nr; i++) { - pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; + pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE; pa->pa_pfn[i] = -1ULL; } @@ -91,30 +86,30 @@ static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len) } /* - * pfn_array_unpin() - Unpin user pages in memory - * @pa: pfn_array on which to perform the operation + * page_array_unpin() - Unpin user pages in memory + * @pa: page_array on which to perform the operation * @vdev: the vfio device to perform the operation * @pa_nr: number of user pages to unpin * * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, * otherwise only clear pa->pa_nr */ -static void pfn_array_unpin(struct pfn_array *pa, - struct vfio_device *vdev, int pa_nr) +static void page_array_unpin(struct page_array *pa, + struct vfio_device *vdev, int pa_nr) { int unpinned = 0, npage = 1; while (unpinned < pa_nr) { - unsigned long *first = &pa->pa_iova_pfn[unpinned]; - unsigned long *last = &first[npage]; + dma_addr_t *first = &pa->pa_iova[unpinned]; + dma_addr_t *last = &first[npage]; if (unpinned + npage < pa_nr && - *first + npage == *last) { + *first + npage * PAGE_SIZE == *last) { npage++; continue; } - vfio_unpin_pages(vdev, *first << PAGE_SHIFT, npage); + vfio_unpin_pages(vdev, *first, npage); unpinned += npage; npage = 1; } @@ -123,30 +118,30 @@ static void pfn_array_unpin(struct pfn_array *pa, } /* - * pfn_array_pin() - Pin user pages in memory - * @pa: pfn_array on which to perform the operation + * page_array_pin() - Pin user pages in memory + * @pa: page_array on which to perform the operation * @mdev: the mediated device to perform pin operations * * Returns number of pages pinned upon success. * If the pin request partially succeeds, or fails completely, * all pages are left unpinned and a negative error value is returned. */ -static int pfn_array_pin(struct pfn_array *pa, struct vfio_device *vdev) +static int page_array_pin(struct page_array *pa, struct vfio_device *vdev) { int pinned = 0, npage = 1; int ret = 0; while (pinned < pa->pa_nr) { - unsigned long *first = &pa->pa_iova_pfn[pinned]; - unsigned long *last = &first[npage]; + dma_addr_t *first = &pa->pa_iova[pinned]; + dma_addr_t *last = &first[npage]; if (pinned + npage < pa->pa_nr && - *first + npage == *last) { + *first + npage * PAGE_SIZE == *last) { npage++; continue; } - ret = vfio_pin_pages(vdev, *first << PAGE_SHIFT, npage, + ret = vfio_pin_pages(vdev, *first, npage, IOMMU_READ | IOMMU_WRITE, &pa->pa_pfn[pinned]); if (ret < 0) { @@ -163,32 +158,30 @@ static int pfn_array_pin(struct pfn_array *pa, struct vfio_device *vdev) return ret; err_out: - pfn_array_unpin(pa, vdev, pinned); + page_array_unpin(pa, vdev, pinned); return ret; } /* Unpin the pages before releasing the memory. */ -static void pfn_array_unpin_free(struct pfn_array *pa, struct vfio_device *vdev) +static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev) { - pfn_array_unpin(pa, vdev, pa->pa_nr); - kfree(pa->pa_iova_pfn); + page_array_unpin(pa, vdev, pa->pa_nr); + kfree(pa->pa_iova); } -static bool pfn_array_iova_pinned(struct pfn_array *pa, unsigned long iova) +static bool page_array_iova_pinned(struct page_array *pa, unsigned long iova) { - unsigned long iova_pfn = iova >> PAGE_SHIFT; int i; for (i = 0; i < pa->pa_nr; i++) - if (pa->pa_iova_pfn[i] == iova_pfn) + if (pa->pa_iova[i] == iova) return true; return false; } -/* Create the list of IDAL words for a pfn_array. */ -static inline void pfn_array_idal_create_words( - struct pfn_array *pa, - unsigned long *idaws) +/* Create the list of IDAL words for a page_array. */ +static inline void page_array_idal_create_words(struct page_array *pa, + unsigned long *idaws) { int i; @@ -204,7 +197,7 @@ static inline void pfn_array_idal_create_words( idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT; /* Adjust the first IDAW, since it may not start on a page boundary */ - idaws[0] += pa->pa_iova & (PAGE_SIZE - 1); + idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1); } static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) @@ -236,18 +229,18 @@ static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, unsigned long n) { - struct pfn_array pa = {0}; + struct page_array pa = {0}; u64 from; int i, ret; unsigned long l, m; - ret = pfn_array_alloc(&pa, iova, n); + ret = page_array_alloc(&pa, iova, n); if (ret < 0) return ret; - ret = pfn_array_pin(&pa, vdev); + ret = page_array_pin(&pa, vdev); if (ret < 0) { - pfn_array_unpin_free(&pa, vdev); + page_array_unpin_free(&pa, vdev); return ret; } @@ -268,7 +261,7 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, break; } - pfn_array_unpin_free(&pa, vdev); + page_array_unpin_free(&pa, vdev); return l; } @@ -371,7 +364,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) chain->ch_ccw = (struct ccw1 *)data; data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len; - chain->ch_pa = (struct pfn_array *)data; + chain->ch_pa = (struct page_array *)data; chain->ch_len = len; @@ -555,7 +548,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, struct vfio_device *vdev = &container_of(cp, struct vfio_ccw_private, cp)->vdev; struct ccw1 *ccw; - struct pfn_array *pa; + struct page_array *pa; u64 iova; unsigned long *idaws; int ret; @@ -589,13 +582,13 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, } /* - * Allocate an array of pfn's for pages to pin/translate. + * Allocate an array of pages to pin/translate. * The number of pages is actually the count of the idaws * required for the data transfer, since we only only support * 4K IDAWs today. */ pa = chain->ch_pa + idx; - ret = pfn_array_alloc(pa, iova, bytes); + ret = page_array_alloc(pa, iova, bytes); if (ret < 0) goto out_free_idaws; @@ -606,21 +599,21 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, goto out_unpin; /* - * Copy guest IDAWs into pfn_array, in case the memory they + * Copy guest IDAWs into page_array, in case the memory they * occupy is not contiguous. */ for (i = 0; i < idaw_nr; i++) - pa->pa_iova_pfn[i] = idaws[i] >> PAGE_SHIFT; + pa->pa_iova[i] = idaws[i]; } else { /* - * No action is required here; the iova addresses in pfn_array - * were initialized sequentially in pfn_array_alloc() beginning + * No action is required here; the iova addresses in page_array + * were initialized sequentially in page_array_alloc() beginning * with the contents of ccw->cda. */ } if (ccw_does_data_transfer(ccw)) { - ret = pfn_array_pin(pa, vdev); + ret = page_array_pin(pa, vdev); if (ret < 0) goto out_unpin; } else { @@ -630,13 +623,13 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, ccw->cda = (__u32) virt_to_phys(idaws); ccw->flags |= CCW_FLAG_IDA; - /* Populate the IDAL with pinned/translated addresses from pfn */ - pfn_array_idal_create_words(pa, idaws); + /* Populate the IDAL with pinned/translated addresses from page */ + page_array_idal_create_words(pa, idaws); return 0; out_unpin: - pfn_array_unpin_free(pa, vdev); + page_array_unpin_free(pa, vdev); out_free_idaws: kfree(idaws); out_init: @@ -742,7 +735,7 @@ void cp_free(struct channel_program *cp) cp->initialized = false; list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) { - pfn_array_unpin_free(chain->ch_pa + i, vdev); + page_array_unpin_free(chain->ch_pa + i, vdev); ccwchain_cda_free(chain, i); } ccwchain_free(chain); @@ -918,7 +911,7 @@ bool cp_iova_pinned(struct channel_program *cp, u64 iova) list_for_each_entry(chain, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) - if (pfn_array_iova_pinned(chain->ch_pa + i, iova)) + if (page_array_iova_pinned(chain->ch_pa + i, iova)) return true; } -- cgit 1.4.1 From c2863febd88bb2d0028eebcf0ee94b49859a06d3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:55 -0700 Subject: vfio/ccw: Add kmap_local_page() for memcpy A PFN is not secure enough to promise that the memory is not IO. And direct access via memcpy() that only handles CPU memory will crash on S390 if the PFN is an IO PFN, as we have to use the memcpy_to/fromio() that uses the special S390 IO access instructions. On the other hand, a "struct page *" is always a CPU coherent thing that fits memcpy(). Also, casting a PFN to "void *" for memcpy() is not a proper practice, kmap_local_page() is the correct API to call here, though S390 doesn't use highmem, which means kmap_local_page() is a NOP. There's a following patch changing the vfio_pin_pages() API to return a list of "struct page *" instead of PFNs. It will block any IO memory from ever getting into this call path, for such a security purpose. In this patch, add kmap_local_page() to prepare for that. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Acked-by: Eric Farman Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-10-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_cp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 3854c3d573f5..cd4ec4f6d6ff 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -230,7 +231,6 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, unsigned long n) { struct page_array pa = {0}; - u64 from; int i, ret; unsigned long l, m; @@ -246,7 +246,9 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, l = n; for (i = 0; i < pa.pa_nr; i++) { - from = pa.pa_pfn[i] << PAGE_SHIFT; + struct page *page = pfn_to_page(pa.pa_pfn[i]); + void *from = kmap_local_page(page); + m = PAGE_SIZE; if (i == 0) { from += iova & (PAGE_SIZE - 1); @@ -254,7 +256,8 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, } m = min(l, m); - memcpy(to + (n - l), (void *)from, m); + memcpy(to + (n - l), from, m); + kunmap_local(from); l -= m; if (l == 0) -- cgit 1.4.1 From 34a255e67615995f729254307a0581c143e03752 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 22 Jul 2022 19:02:56 -0700 Subject: vfio: Replace phys_pfn with pages for vfio_pin_pages() Most of the callers of vfio_pin_pages() want "struct page *" and the low-level mm code to pin pages returns a list of "struct page *" too. So there's no gain in converting "struct page *" to PFN in between. Replace the output parameter "phys_pfn" list with a "pages" list, to simplify callers. This also allows us to replace the vfio_iommu_type1 implementation with a more efficient one. And drop the pfn_valid check in the gvt code, as there is no need to do such a check at a page-backed struct page pointer. For now, also update vfio_iommu_type1 to fit this new parameter too. Reviewed-by: Christoph Hellwig Reviewed-by: Kirti Wankhede Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Eric Farman Tested-by: Terrence Xu Tested-by: Eric Farman Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20220723020256.30081-11-nicolinc@nvidia.com Signed-off-by: Alex Williamson --- Documentation/driver-api/vfio-mediated-device.rst | 2 +- drivers/gpu/drm/i915/gvt/kvmgt.c | 19 ++++++------------- drivers/s390/cio/vfio_ccw_cp.c | 19 +++++++++---------- drivers/s390/crypto/vfio_ap_ops.c | 6 +++--- drivers/vfio/vfio.c | 8 ++++---- drivers/vfio/vfio.h | 2 +- drivers/vfio/vfio_iommu_type1.c | 19 +++++++++++-------- include/linux/vfio.h | 2 +- 8 files changed, 36 insertions(+), 41 deletions(-) (limited to 'drivers/s390') diff --git a/Documentation/driver-api/vfio-mediated-device.rst b/Documentation/driver-api/vfio-mediated-device.rst index ea32a0f13ddb..ba5fefcdae1a 100644 --- a/Documentation/driver-api/vfio-mediated-device.rst +++ b/Documentation/driver-api/vfio-mediated-device.rst @@ -263,7 +263,7 @@ The following APIs are provided for translating user pfn to host pfn in a VFIO driver:: int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, unsigned long *phys_pfn); + int npage, int prot, struct page **pages); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 8be75c282611..e3cd58946477 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -240,7 +240,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { int total_pages = DIV_ROUND_UP(size, PAGE_SIZE); - unsigned long base_pfn = 0; + struct page *base_page = NULL; int npage; int ret; @@ -250,26 +250,19 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, */ for (npage = 0; npage < total_pages; npage++) { dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT; - unsigned long pfn; + struct page *cur_page; ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); + IOMMU_READ | IOMMU_WRITE, &cur_page); if (ret != 1) { gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n", &cur_iova, ret); goto err; } - if (!pfn_valid(pfn)) { - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); - npage++; - ret = -EFAULT; - goto err; - } - if (npage == 0) - base_pfn = pfn; - else if (base_pfn + npage != pfn) { + base_page = cur_page; + else if (base_page + npage != cur_page) { gvt_vgpu_err("The pages are not continuous\n"); ret = -EINVAL; npage++; @@ -277,7 +270,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, } } - *page = pfn_to_page(base_pfn); + *page = base_page; return 0; err: gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index cd4ec4f6d6ff..8963f452f963 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -22,8 +22,8 @@ struct page_array { /* Array that stores pages need to pin. */ dma_addr_t *pa_iova; - /* Array that receives PFNs of the pages pinned. */ - unsigned long *pa_pfn; + /* Array that receives the pinned pages. */ + struct page **pa_page; /* Number of pages pinned from @pa_iova. */ int pa_nr; }; @@ -68,19 +68,19 @@ static int page_array_alloc(struct page_array *pa, u64 iova, unsigned int len) return -EINVAL; pa->pa_iova = kcalloc(pa->pa_nr, - sizeof(*pa->pa_iova) + sizeof(*pa->pa_pfn), + sizeof(*pa->pa_iova) + sizeof(*pa->pa_page), GFP_KERNEL); if (unlikely(!pa->pa_iova)) { pa->pa_nr = 0; return -ENOMEM; } - pa->pa_pfn = (unsigned long *)&pa->pa_iova[pa->pa_nr]; + pa->pa_page = (struct page **)&pa->pa_iova[pa->pa_nr]; pa->pa_iova[0] = iova; - pa->pa_pfn[0] = -1ULL; + pa->pa_page[0] = NULL; for (i = 1; i < pa->pa_nr; i++) { pa->pa_iova[i] = pa->pa_iova[i - 1] + PAGE_SIZE; - pa->pa_pfn[i] = -1ULL; + pa->pa_page[i] = NULL; } return 0; @@ -144,7 +144,7 @@ static int page_array_pin(struct page_array *pa, struct vfio_device *vdev) ret = vfio_pin_pages(vdev, *first, npage, IOMMU_READ | IOMMU_WRITE, - &pa->pa_pfn[pinned]); + &pa->pa_page[pinned]); if (ret < 0) { goto err_out; } else if (ret > 0 && ret != npage) { @@ -195,7 +195,7 @@ static inline void page_array_idal_create_words(struct page_array *pa, */ for (i = 0; i < pa->pa_nr; i++) - idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT; + idaws[i] = page_to_phys(pa->pa_page[i]); /* Adjust the first IDAW, since it may not start on a page boundary */ idaws[0] += pa->pa_iova[0] & (PAGE_SIZE - 1); @@ -246,8 +246,7 @@ static long copy_from_iova(struct vfio_device *vdev, void *to, u64 iova, l = n; for (i = 0; i < pa.pa_nr; i++) { - struct page *page = pfn_to_page(pa.pa_pfn[i]); - void *from = kmap_local_page(page); + void *from = kmap_local_page(pa.pa_page[i]); m = PAGE_SIZE; if (i == 0) { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index d7c38c82f694..75cd92c291e3 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -234,9 +234,9 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status = {}; struct kvm_s390_gisa *gisa; + struct page *h_page; int nisc; struct kvm *kvm; - unsigned long h_pfn; phys_addr_t h_nib; dma_addr_t nib; int ret; @@ -251,7 +251,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, } ret = vfio_pin_pages(&q->matrix_mdev->vdev, nib, 1, - IOMMU_READ | IOMMU_WRITE, &h_pfn); + IOMMU_READ | IOMMU_WRITE, &h_page); switch (ret) { case 1: break; @@ -267,7 +267,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, kvm = q->matrix_mdev->kvm; gisa = kvm->arch.gisa_int.origin; - h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK); + h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK); aqic_gisa.gisc = isc; nisc = kvm_s390_gisc_register(kvm, isc); diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 606a20b605ba..8e23ca59ceed 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1941,18 +1941,18 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); * @npage [in] : count of pages to be pinned. This count should not * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. * @prot [in] : protection flags - * @phys_pfn[out]: array of host PFNs + * @pages[out] : array of host pages * Return error or number of pages pinned. */ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, unsigned long *phys_pfn) + int npage, int prot, struct page **pages) { struct vfio_container *container; struct vfio_group *group = device->group; struct vfio_iommu_driver *driver; int ret; - if (!phys_pfn || !npage || !vfio_assert_device_open(device)) + if (!pages || !npage || !vfio_assert_device_open(device)) return -EINVAL; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) @@ -1967,7 +1967,7 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, if (likely(driver && driver->ops->pin_pages)) ret = driver->ops->pin_pages(container->iommu_data, group->iommu_group, iova, - npage, prot, phys_pfn); + npage, prot, pages); else ret = -ENOTTY; diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index e9767e13f00f..503bea6c843d 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -52,7 +52,7 @@ struct vfio_iommu_driver_ops { struct iommu_group *group, dma_addr_t user_iova, int npage, int prot, - unsigned long *phys_pfn); + struct page **pages); void (*unpin_pages)(void *iommu_data, dma_addr_t user_iova, int npage); void (*register_device)(void *iommu_data, diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e629e059118c..db516c90a977 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -831,7 +831,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, struct iommu_group *iommu_group, dma_addr_t user_iova, int npage, int prot, - unsigned long *phys_pfn) + struct page **pages) { struct vfio_iommu *iommu = iommu_data; struct vfio_iommu_group *group; @@ -841,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, bool do_accounting; dma_addr_t iova; - if (!iommu || !phys_pfn) + if (!iommu || !pages) return -EINVAL; /* Supported for v2 version only */ @@ -880,6 +880,7 @@ again: do_accounting = list_empty(&iommu->domain_list); for (i = 0; i < npage; i++) { + unsigned long phys_pfn; struct vfio_pfn *vpfn; iova = user_iova + PAGE_SIZE * i; @@ -896,23 +897,25 @@ again: vpfn = vfio_iova_get_vfio_pfn(dma, iova); if (vpfn) { - phys_pfn[i] = vpfn->pfn; + pages[i] = pfn_to_page(vpfn->pfn); continue; } remote_vaddr = dma->vaddr + (iova - dma->iova); - ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i], + ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn, do_accounting); if (ret) goto pin_unwind; - ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]); + ret = vfio_add_to_pfn_list(dma, iova, phys_pfn); if (ret) { - if (put_pfn(phys_pfn[i], dma->prot) && do_accounting) + if (put_pfn(phys_pfn, dma->prot) && do_accounting) vfio_lock_acct(dma, -1, true); goto pin_unwind; } + pages[i] = pfn_to_page(phys_pfn); + if (iommu->dirty_page_tracking) { unsigned long pgshift = __ffs(iommu->pgsize_bitmap); @@ -935,14 +938,14 @@ again: goto pin_done; pin_unwind: - phys_pfn[i] = 0; + pages[i] = NULL; for (j = 0; j < i; j++) { dma_addr_t iova; iova = user_iova + PAGE_SIZE * j; dma = vfio_find_dma(iommu, iova, PAGE_SIZE); vfio_unpin_page_external(dma, iova, do_accounting); - phys_pfn[j] = 0; + pages[j] = NULL; } pin_done: mutex_unlock(&iommu->lock); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index acefd663e63b..e05ddc6fe6a5 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -162,7 +162,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device); #define VFIO_PIN_PAGES_MAX_ENTRIES (PAGE_SIZE/sizeof(unsigned long)) int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, unsigned long *phys_pfn); + int npage, int prot, struct page **pages); void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage); int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, size_t len, bool write); -- cgit 1.4.1 From 5a4fe7c41b586399d502f9970b077178e5dfbacf Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 28 Jul 2022 22:49:12 +0200 Subject: vfio/ccw: Add length to DMA_UNMAP checks As pointed out with the simplification of the VFIO_IOMMU_NOTIFY_DMA_UNMAP notifier [1], the length parameter was never used to check against the pinned pages. Let's correct that, and see if a page is within the affected range instead of simply the first page of the range. [1] https://lore.kernel.org/kvm/20220720170457.39cda0d0.alex.williamson@redhat.com/ Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220728204914.2420989-2-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_cp.c | 16 +++++++++++----- drivers/s390/cio/vfio_ccw_cp.h | 2 +- drivers/s390/cio/vfio_ccw_ops.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 8963f452f963..7b02e97f4b29 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -170,13 +170,18 @@ static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vde kfree(pa->pa_iova); } -static bool page_array_iova_pinned(struct page_array *pa, unsigned long iova) +static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length) { + u64 iova_pfn_start = iova >> PAGE_SHIFT; + u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT; + u64 pfn; int i; - for (i = 0; i < pa->pa_nr; i++) - if (pa->pa_iova[i] == iova) + for (i = 0; i < pa->pa_nr; i++) { + pfn = pa->pa_iova[i] >> PAGE_SHIFT; + if (pfn >= iova_pfn_start && pfn <= iova_pfn_end) return true; + } return false; } @@ -899,11 +904,12 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw) * cp_iova_pinned() - check if an iova is pinned for a ccw chain. * @cp: channel_program on which to perform the operation * @iova: the iova to check + * @length: the length to check from @iova * * If the @iova is currently pinned for the ccw chain, return true; * else return false. */ -bool cp_iova_pinned(struct channel_program *cp, u64 iova) +bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length) { struct ccwchain *chain; int i; @@ -913,7 +919,7 @@ bool cp_iova_pinned(struct channel_program *cp, u64 iova) list_for_each_entry(chain, &cp->ccwchain_list, next) { for (i = 0; i < chain->ch_len; i++) - if (page_array_iova_pinned(chain->ch_pa + i, iova)) + if (page_array_iova_pinned(chain->ch_pa + i, iova, length)) return true; } diff --git a/drivers/s390/cio/vfio_ccw_cp.h b/drivers/s390/cio/vfio_ccw_cp.h index 3194d887e08e..54d26e242533 100644 --- a/drivers/s390/cio/vfio_ccw_cp.h +++ b/drivers/s390/cio/vfio_ccw_cp.h @@ -46,6 +46,6 @@ void cp_free(struct channel_program *cp); int cp_prefetch(struct channel_program *cp); union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm); void cp_update_scsw(struct channel_program *cp, union scsw *scsw); -bool cp_iova_pinned(struct channel_program *cp, u64 iova); +bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length); #endif diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 0047fd88f938..3f67fa103c7f 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -39,7 +39,7 @@ static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) container_of(vdev, struct vfio_ccw_private, vdev); /* Drivers MUST unpin pages in response to an invalidation. */ - if (!cp_iova_pinned(&private->cp, iova)) + if (!cp_iova_pinned(&private->cp, iova, length)) return; vfio_ccw_mdev_reset(private); -- cgit 1.4.1 From 96a4c9ecaeac2863d53c086d4b5ec9985ba7154f Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 28 Jul 2022 22:49:13 +0200 Subject: vfio/ccw: Remove FSM Close from remove handlers Now that neither vfio_ccw_sch_probe() nor vfio_ccw_mdev_probe() affect the FSM state, it doesn't make sense for their _remove() counterparts try to revert things in this way. Since the FSM open and close are handled alongside MDEV open/close, these are unnecessary. Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220728204914.2420989-3-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_drv.c | 1 - drivers/s390/cio/vfio_ccw_ops.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 4804101ccb0f..86d9e428357b 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -241,7 +241,6 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) { struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); mdev_unregister_device(&sch->dev); dev_set_drvdata(&sch->dev, NULL); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 3f67fa103c7f..4a806a2273b5 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -130,8 +130,6 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) vfio_unregister_group_dev(&private->vdev); - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); - vfio_uninit_group_dev(&private->vdev); atomic_inc(&private->avail); } -- cgit 1.4.1 From 4eb919663d97e056dff8963fde22df3b0ad4d02b Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 28 Jul 2022 22:49:14 +0200 Subject: vfio/ccw: Check return code from subchannel quiesce If a subchannel is busy when a close is performed, the subchannel needs to be quiesced and left nice and tidy, so nothing unexpected (like a solicited interrupt) shows up while in the closed state. Unfortunately, the return code from this call isn't checked, so any busy subchannel is treated as a failing one. Fix that, so that the close on a busy subchannel happens normally. Signed-off-by: Eric Farman Reviewed-by: Matthew Rosato Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220728204914.2420989-4-farman@linux.ibm.com Signed-off-by: Alex Williamson --- drivers/s390/cio/vfio_ccw_fsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/s390') diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 4b8b623df24f..a59c758869f8 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -407,7 +407,7 @@ static void fsm_close(struct vfio_ccw_private *private, ret = cio_disable_subchannel(sch); if (ret == -EBUSY) - vfio_ccw_sch_quiesce(sch); + ret = vfio_ccw_sch_quiesce(sch); if (ret) goto err_unlock; -- cgit 1.4.1