summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c9
-rw-r--r--include/linux/mlx5/driver.h4
3 files changed, 38 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 2cb4094c9c49..2d00022c92d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -64,6 +64,10 @@ enum {
 	MLX5_NIC_IFC_NO_DRAM_NIC	= 2
 };
 
+enum {
+	MLX5_DROP_NEW_HEALTH_WORK,
+};
+
 static u8 get_nic_interface(struct mlx5_core_dev *dev)
 {
 	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
@@ -272,7 +276,13 @@ static void poll_health(unsigned long data)
 	if (in_fatal(dev) && !health->sick) {
 		health->sick = true;
 		print_health_info(dev);
-		schedule_work(&health->work);
+		spin_lock(&health->wq_lock);
+		if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+			queue_work(health->wq, &health->work);
+		else
+			dev_err(&dev->pdev->dev,
+				"new health works are not permitted at this stage\n");
+		spin_unlock(&health->wq_lock);
 	}
 }
 
@@ -282,6 +292,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 
 	init_timer(&health->timer);
 	health->sick = 0;
+	clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 	health->health = &dev->iseg->health;
 	health->health_counter = &dev->iseg->health_counter;
 
@@ -298,11 +309,21 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
 	del_timer_sync(&health->timer);
 }
 
+void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	spin_lock(&health->wq_lock);
+	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+	spin_unlock(&health->wq_lock);
+	cancel_work_sync(&health->work);
+}
+
 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 
-	flush_work(&health->work);
+	destroy_workqueue(health->wq);
 }
 
 int mlx5_health_init(struct mlx5_core_dev *dev)
@@ -317,8 +338,11 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 
 	strcpy(name, "mlx5_health");
 	strcat(name, dev_name(&dev->pdev->dev));
+	health->wq = create_singlethread_workqueue(name);
 	kfree(name);
-
+	if (!health->wq)
+		return -ENOMEM;
+	spin_lock_init(&health->wq_lock);
 	INIT_WORK(&health->work, health_care);
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 8a63910bfccf..9f90226bc120 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1315,8 +1315,13 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 	dev_info(&pdev->dev, "%s was called\n", __func__);
 	mlx5_enter_error_state(dev);
 	mlx5_unload_one(dev, priv, false);
-	pci_save_state(pdev);
-	mlx5_pci_disable_device(dev);
+	/* In case of kernel call save the pci state and drain health wq */
+	if (state) {
+		pci_save_state(pdev);
+		mlx5_drain_health_wq(dev);
+		mlx5_pci_disable_device(dev);
+	}
+
 	return state == pci_channel_io_perm_failure ?
 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5dbda60a09f4..7d9a5d08eb59 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -418,7 +418,10 @@ struct mlx5_core_health {
 	u32				prev;
 	int				miss_counter;
 	bool				sick;
+	/* wq spinlock to synchronize draining */
+	spinlock_t			wq_lock;
 	struct workqueue_struct	       *wq;
+	unsigned long			flags;
 	struct work_struct		work;
 };
 
@@ -778,6 +781,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
+void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
 			struct mlx5_buf *buf, int node);
 int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);