summary refs log tree commit diff
diff options
context:
space:
mode:
authorCristian Ciocaltea <cristian.ciocaltea@collabora.com>2023-09-15 19:15:48 +0300
committerCristian Ciocaltea <cristian.ciocaltea@collabora.com>2023-09-15 19:15:48 +0300
commit0eb67c19c36cf2f517acea59dbb1bc38ccddf5eb (patch)
treea362533dbc4e728cb182bec9a2fb9793cda20553
parentae4f9e9d596068d3b2137e55aaf17d2efc568c1c (diff)
parentb65228799e5ac96211b0e4c342d0a4478a1b6410 (diff)
downloadlinux-0eb67c19c36cf2f517acea59dbb1bc38ccddf5eb.tar.gz
Merge branch 6.1/features/gpu-reset
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c27
-rw-r--r--drivers/gpu/drm/drm_sysfs.c31
-rw-r--r--include/drm/drm_sysfs.h10
4 files changed, 71 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c8dbc281072b..afe39421d10c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -59,6 +59,7 @@
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_ioctl.h>
+#include <drm/drm_sysfs.h>
 
 #include <kgd_kfd_interface.h>
 #include "dm_pp_interface.h"
@@ -1038,6 +1039,7 @@ struct amdgpu_device {
 
 	int asic_reset_res;
 	struct work_struct		xgmi_reset_work;
+	struct work_struct		gpu_reset_event_work;
 	struct list_head		reset_list;
 
 	long				gfx_timeout;
@@ -1070,6 +1072,7 @@ struct amdgpu_device {
 	pci_channel_state_t		pci_channel_state;
 
 	struct amdgpu_reset_control     *reset_cntl;
+	struct drm_reset_event		reset_event_info;
 	uint32_t                        ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE];
 
 	bool				ram_is_direct_mapped;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index eb1fe518e78c..1ecc6b095a87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -79,6 +79,7 @@
 #include <linux/pm_runtime.h>
 
 #include <drm/drm_drv.h>
+#include <drm/drm_sysfs.h>
 
 #if IS_ENABLED(CONFIG_X86)
 #include <asm/intel-family.h>
@@ -3580,6 +3581,17 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
 }
 
+static void amdgpu_device_reset_event_func(struct work_struct *__work)
+{
+	struct amdgpu_device *adev = container_of(__work, struct amdgpu_device,
+						  gpu_reset_event_work);
+	/*
+	 * A GPU reset has happened, inform the userspace and pass the
+	 * reset related information.
+	 */
+	drm_sysfs_reset_event(&adev->ddev, &adev->reset_event_info);
+}
+
 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
 {
 	struct amdgpu_device *adev =
@@ -3835,6 +3847,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 			  amdgpu_device_delay_enable_gfx_off);
 
 	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+	INIT_WORK(&adev->gpu_reset_event_work, amdgpu_device_reset_event_func);
 
 	adev->gfx.gfx_off_req_count = 1;
 	adev->gfx.gfx_off_residency = 0;
@@ -5179,6 +5192,20 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
 						reset_context->job->vm->task_info;
 				amdgpu_reset_capture_coredumpm(tmp_adev);
 #endif
+				if (reset_context->job && reset_context->job->vm) {
+					tmp_adev->reset_event_info.pid =
+						reset_context->job->vm->task_info.pid;
+					memset(tmp_adev->reset_event_info.pname, 0, TASK_COMM_LEN);
+					strcpy(tmp_adev->reset_event_info.pname,
+						reset_context->job->vm->task_info.process_name);
+				} else {
+					tmp_adev->reset_event_info.pid = 0;
+					memset(tmp_adev->reset_event_info.pname, 0, TASK_COMM_LEN);
+				}
+
+				tmp_adev->reset_event_info.flags = vram_lost;
+				schedule_work(&tmp_adev->gpu_reset_event_work);
+
 				if (vram_lost) {
 					DRM_INFO("VRAM is lost due to GPU reset!\n");
 					amdgpu_inc_vram_lost(tmp_adev);
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index b8da978d85bb..62c3c007e4a9 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -436,6 +436,37 @@ void drm_sysfs_connector_hotplug_event(struct drm_connector *connector)
 EXPORT_SYMBOL(drm_sysfs_connector_hotplug_event);
 
 /**
+ * drm_sysfs_reset_event - generate a DRM uevent to indicate GPU reset
+ * @dev: DRM device
+ * @reset_info: The contextual information about the reset (like PID, flags)
+ *
+ * Send a uevent for the DRM device specified by @dev. This informs
+ * user that a GPU reset has occurred, so that an interested client
+ * can take any recovery or profiling measure.
+ */
+void drm_sysfs_reset_event(struct drm_device *dev, struct drm_reset_event *reset_info)
+{
+	unsigned char pid_str[13];
+	unsigned char flags_str[15];
+	unsigned char pname_str[TASK_COMM_LEN + 6];
+	unsigned char reset_str[] = "RESET=1";
+	char *envp[] = { reset_str, pid_str, pname_str, flags_str, NULL };
+
+	if (!reset_info) {
+		DRM_WARN("No reset info, not sending the event\n");
+		return;
+	}
+
+	DRM_DEBUG("generating reset event\n");
+
+	snprintf(pid_str, ARRAY_SIZE(pid_str), "PID=%u", reset_info->pid);
+	snprintf(pname_str, ARRAY_SIZE(pname_str), "NAME=%s", reset_info->pname);
+	snprintf(flags_str, ARRAY_SIZE(flags_str), "FLAGS=%u", reset_info->flags);
+	kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
+}
+EXPORT_SYMBOL(drm_sysfs_reset_event);
+
+/**
  * drm_sysfs_connector_status_event - generate a DRM uevent for connector
  * property status change
  * @connector: connector on which property status changed
diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h
index 6273cac44e47..8c37d6a52932 100644
--- a/include/drm/drm_sysfs.h
+++ b/include/drm/drm_sysfs.h
@@ -1,17 +1,27 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _DRM_SYSFS_H_
 #define _DRM_SYSFS_H_
+#include <linux/sched.h>
+
+#define DRM_GPU_RESET_FLAG_VRAM_INVALID (1 << 0)
 
 struct drm_device;
 struct device;
 struct drm_connector;
 struct drm_property;
 
+struct drm_reset_event {
+	uint32_t pid;
+	uint32_t flags;
+	char pname[TASK_COMM_LEN];
+};
+
 int drm_class_device_register(struct device *dev);
 void drm_class_device_unregister(struct device *dev);
 
 void drm_sysfs_hotplug_event(struct drm_device *dev);
 void drm_sysfs_connector_hotplug_event(struct drm_connector *connector);
+void drm_sysfs_reset_event(struct drm_device *dev, struct drm_reset_event *reset_info);
 void drm_sysfs_connector_status_event(struct drm_connector *connector,
 				      struct drm_property *property);
 #endif