diff options
author | Cristian Ciocaltea <cristian.ciocaltea@collabora.com> | 2023-09-15 19:15:48 +0300 |
---|---|---|
committer | Cristian Ciocaltea <cristian.ciocaltea@collabora.com> | 2023-09-15 19:15:48 +0300 |
commit | 0eb67c19c36cf2f517acea59dbb1bc38ccddf5eb (patch) | |
tree | a362533dbc4e728cb182bec9a2fb9793cda20553 /drivers/gpu/drm/amd/amdgpu | |
parent | ae4f9e9d596068d3b2137e55aaf17d2efc568c1c (diff) | |
parent | b65228799e5ac96211b0e4c342d0a4478a1b6410 (diff) | |
download | linux-0eb67c19c36cf2f517acea59dbb1bc38ccddf5eb.tar.gz |
Merge branch 6.1/features/gpu-reset
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 |
2 files changed, 30 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8dbc281072b..afe39421d10c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -59,6 +59,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_gem.h> #include <drm/drm_ioctl.h> +#include <drm/drm_sysfs.h> #include <kgd_kfd_interface.h> #include "dm_pp_interface.h" @@ -1038,6 +1039,7 @@ struct amdgpu_device { int asic_reset_res; struct work_struct xgmi_reset_work; + struct work_struct gpu_reset_event_work; struct list_head reset_list; long gfx_timeout; @@ -1070,6 +1072,7 @@ struct amdgpu_device { pci_channel_state_t pci_channel_state; struct amdgpu_reset_control *reset_cntl; + struct drm_reset_event reset_event_info; uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; bool ram_is_direct_mapped; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index eb1fe518e78c..1ecc6b095a87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -79,6 +79,7 @@ #include <linux/pm_runtime.h> #include <drm/drm_drv.h> +#include <drm/drm_sysfs.h> #if IS_ENABLED(CONFIG_X86) #include <asm/intel-family.h> @@ -3580,6 +3581,17 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } +static void amdgpu_device_reset_event_func(struct work_struct *__work) +{ + struct amdgpu_device *adev = container_of(__work, struct amdgpu_device, + gpu_reset_event_work); + /* + * A GPU reset has happened, inform the userspace and pass the + * reset related information. + */ + drm_sysfs_reset_event(&adev->ddev, &adev->reset_event_info); +} + static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) { struct amdgpu_device *adev = @@ -3835,6 +3847,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_device_delay_enable_gfx_off); INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + INIT_WORK(&adev->gpu_reset_event_work, amdgpu_device_reset_event_func); adev->gfx.gfx_off_req_count = 1; adev->gfx.gfx_off_residency = 0; @@ -5179,6 +5192,20 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, reset_context->job->vm->task_info; amdgpu_reset_capture_coredumpm(tmp_adev); #endif + if (reset_context->job && reset_context->job->vm) { + tmp_adev->reset_event_info.pid = + reset_context->job->vm->task_info.pid; + memset(tmp_adev->reset_event_info.pname, 0, TASK_COMM_LEN); + strcpy(tmp_adev->reset_event_info.pname, + reset_context->job->vm->task_info.process_name); + } else { + tmp_adev->reset_event_info.pid = 0; + memset(tmp_adev->reset_event_info.pname, 0, TASK_COMM_LEN); + } + + tmp_adev->reset_event_info.flags = vram_lost; + schedule_work(&tmp_adev->gpu_reset_event_work); + if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); amdgpu_inc_vram_lost(tmp_adev); |