diff options
author | Cristian Ciocaltea <cristian.ciocaltea@collabora.com> | 2023-09-15 19:15:48 +0300 |
---|---|---|
committer | Cristian Ciocaltea <cristian.ciocaltea@collabora.com> | 2023-09-15 19:15:48 +0300 |
commit | 0eb67c19c36cf2f517acea59dbb1bc38ccddf5eb (patch) | |
tree | a362533dbc4e728cb182bec9a2fb9793cda20553 /drivers/gpu | |
parent | ae4f9e9d596068d3b2137e55aaf17d2efc568c1c (diff) | |
parent | b65228799e5ac96211b0e4c342d0a4478a1b6410 (diff) | |
download | linux-0eb67c19c36cf2f517acea59dbb1bc38ccddf5eb.tar.gz |
Merge branch 6.1/features/gpu-reset
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 | ||||
-rw-r--r-- | drivers/gpu/drm/drm_sysfs.c | 31 |
3 files changed, 61 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8dbc281072b..afe39421d10c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -59,6 +59,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_gem.h> #include <drm/drm_ioctl.h> +#include <drm/drm_sysfs.h> #include <kgd_kfd_interface.h> #include "dm_pp_interface.h" @@ -1038,6 +1039,7 @@ struct amdgpu_device { int asic_reset_res; struct work_struct xgmi_reset_work; + struct work_struct gpu_reset_event_work; struct list_head reset_list; long gfx_timeout; @@ -1070,6 +1072,7 @@ struct amdgpu_device { pci_channel_state_t pci_channel_state; struct amdgpu_reset_control *reset_cntl; + struct drm_reset_event reset_event_info; uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; bool ram_is_direct_mapped; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index eb1fe518e78c..1ecc6b095a87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -79,6 +79,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_drv.h> +#include <drm/drm_sysfs.h> #if IS_ENABLED(CONFIG_X86) #include <asm/intel-family.h> @@ -3580,6 +3581,17 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } +static void amdgpu_device_reset_event_func(struct work_struct *__work) +{ + struct amdgpu_device *adev = container_of(__work, struct amdgpu_device, + gpu_reset_event_work); + /* + * A GPU reset has happened, inform the userspace and pass the + * reset related information. + */ + drm_sysfs_reset_event(&adev->ddev, &adev->reset_event_info); +} + static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) { struct amdgpu_device *adev = @@ -3835,6 +3847,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_delay_enable_gfx_off); INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + INIT_WORK(&adev->gpu_reset_event_work, amdgpu_device_reset_event_func); adev->gfx.gfx_off_req_count = 1; adev->gfx.gfx_off_residency = 0; @@ -5179,6 +5192,20 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, reset_context->job->vm->task_info; amdgpu_reset_capture_coredumpm(tmp_adev); #endif + if (reset_context->job && reset_context->job->vm) { + tmp_adev->reset_event_info.pid = + reset_context->job->vm->task_info.pid; + memset(tmp_adev->reset_event_info.pname, 0, TASK_COMM_LEN); + strcpy(tmp_adev->reset_event_info.pname, + reset_context->job->vm->task_info.process_name); + } else { + tmp_adev->reset_event_info.pid = 0; + memset(tmp_adev->reset_event_info.pname, 0, TASK_COMM_LEN); + } + + tmp_adev->reset_event_info.flags = vram_lost; + schedule_work(&tmp_adev->gpu_reset_event_work); + if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); amdgpu_inc_vram_lost(tmp_adev); diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index b8da978d85bb..62c3c007e4a9 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -436,6 +436,37 @@ void drm_sysfs_connector_hotplug_event(struct drm_connector *connector) EXPORT_SYMBOL(drm_sysfs_connector_hotplug_event); /** + * drm_sysfs_reset_event - generate a DRM uevent to indicate GPU reset + * @dev: DRM device + * @reset_info: The contextual information about the reset (like PID, flags) + * + * Send a uevent for the DRM device specified by @dev. This informs + * user that a GPU reset has occurred, so that an interested client + * can take any recovery or profiling measure. + */ +void drm_sysfs_reset_event(struct drm_device *dev, struct drm_reset_event *reset_info) +{ + unsigned char pid_str[13]; + unsigned char flags_str[15]; + unsigned char pname_str[TASK_COMM_LEN + 6]; + unsigned char reset_str[] = "RESET=1"; + char *envp[] = { reset_str, pid_str, pname_str, flags_str, NULL }; + + if (!reset_info) { + DRM_WARN("No reset info, not sending the event\n"); + return; + } + + DRM_DEBUG("generating reset event\n"); + + snprintf(pid_str, ARRAY_SIZE(pid_str), "PID=%u", reset_info->pid); + snprintf(pname_str, ARRAY_SIZE(pname_str), "NAME=%s", reset_info->pname); + snprintf(flags_str, ARRAY_SIZE(flags_str), "FLAGS=%u", reset_info->flags); + kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp); +} +EXPORT_SYMBOL(drm_sysfs_reset_event); + +/** * drm_sysfs_connector_status_event - generate a DRM uevent for connector * property status change * @connector: connector on which property status changed |