summary refs log tree commit diff
path: root/drivers/gpu/host1x/job.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 11:26:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 11:26:46 -0700
commit477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6 (patch)
tree1897dd1de49e1ea24897163533e2d8ead5dad0ad /drivers/gpu/host1x/job.c
parent835d31d319d9c8c4eb6cac074643360ba0ecab10 (diff)
parent8f0284f190e6a0aa09015090568c03f18288231a (diff)
downloadlinux-477f70cd2a67904e04c2c2b9bd0fa2e95222f2f6.tar.gz
Merge tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
 "Highlights:

   - i915 has seen a lot of refactoring and uAPI cleanups due to a
     change in the upstream direction going forward

     This has all been audited with known userspace, but there may be
     some pitfalls that were missed.

   - i915 now uses common TTM to enable discrete memory on DG1/2 GPUs

   - i915 enables Jasper and Elkhart Lake by default and has preliminary
     XeHP/DG2 support

   - amdgpu adds support for Cyan Skillfish

   - lots of implicit fencing rules documented and fixed up in drivers

   - msm now uses the core scheduler

   - the irq midlayer has been removed for non-legacy drivers

   - the sysfb code now works on more than x86.

  Otherwise the usual smattering of stuff everywhere, panels, bridges,
  refactorings.

  Detailed summary:

  core:
   - extract i915 eDP backlight into core
   - DP aux bus support
   - drm_device.irq_enabled removed
   - port drivers to native irq interfaces
   - export gem shadow plane handling for vgem
   - print proper driver name in framebuffer registration
   - driver fixes for implicit fencing rules
   - ARM fixed rate compression modifier added
   - updated fb damage handling
   - rmfb ioctl logging/docs
   - drop drm_gem_object_put_locked
   - define DRM_FORMAT_MAX_PLANES
   - add gem fb vmap/vunmap helpers
   - add lockdep_assert(once) helpers
   - mark drm irq midlayer as legacy
   - use offset adjusted bo mapping conversion

  vgaarb:
   - cleanups

  fbdev:
   - extend efifb handling to all arches
   - div by 0 fixes for multiple drivers

  udmabuf:
   - add hugepage mapping support

  dma-buf:
   - non-dynamic exporter fixups
   - document implicit fencing rules

  amdgpu:
   - Initial Cyan Skillfish support
   - switch virtual DCE over to vkms based atomic
   - VCN/JPEG power down fixes
   - NAVI PCIE link handling fixes
   - AMD HDMI freesync fixes
   - Yellow Carp + Beige Goby fixes
   - Clockgating/S0ix/SMU/EEPROM fixes
   - embed hw fence in job
   - rework dma-resv handling
   - ensure eviction to system ram

  amdkfd:
   - uapi: SVM address range query added
   - sysfs leak fix
   - GPUVM TLB optimizations
   - vmfault/migration counters

  i915:
   - Enable JSL and EHL by default
   - preliminary XeHP/DG2 support
   - remove all CNL support (never shipped)
   - move to TTM for discrete memory support
   - allow mixed object mmap handling
   - GEM uAPI spring cleaning
       - add I915_MMAP_OBJECT_FIXED
       - reinstate ADL-P mmap ioctls
       - drop a bunch of unused by userspace features
       - disable and remove GPU relocations
   - revert some i915 misfeatures
   - major refactoring of GuC for Gen11+
   - execbuffer object locking separate step
   - reject caching/set-domain on discrete
   - Enable pipe DMC loading on XE-LPD and ADL-P
   - add PSF GV point support
   - Refactor and fix DDI buffer translations
   - Clean up FBC CFB allocation code
   - Finish INTEL_GEN() and friends macro conversions

  nouveau:
   - add eDP backlight support
   - implicit fence fix

  msm:
   - a680/7c3 support
   - drm/scheduler conversion

  panfrost:
   - rework GPU reset

  virtio:
   - fix fencing for planes

  ast:
   - add detect support

  bochs:
   - move to tiny GPU driver

  vc4:
   - use hotplug irqs
   - HDMI codec support

  vmwgfx:
   - use internal vmware device headers

  ingenic:
   - demidlayering irq

  rcar-du:
   - shutdown fixes
   - convert to bridge connector helpers

  zynqmp-dsub:
   - misc fixes

  mgag200:
   - convert PLL handling to atomic

  mediatek:
   - MT8133 AAL support
   - gem mmap object support
   - MT8167 support

  etnaviv:
   - NXP Layerscape LS1028A SoC support
   - GEM mmap cleanups

  tegra:
   - new user API

  exynos:
   - missing unlock fix
   - build warning fix
   - use refcount_t"

* tag 'drm-next-2021-08-31-1' of git://anongit.freedesktop.org/drm/drm: (1318 commits)
  drm/amd/display: Move AllowDRAMSelfRefreshOrDRAMClockChangeInVblank to bounding box
  drm/amd/display: Remove duplicate dml init
  drm/amd/display: Update bounding box states (v2)
  drm/amd/display: Update number of DCN3 clock states
  drm/amdgpu: disable GFX CGCG in aldebaran
  drm/amdgpu: Clear RAS interrupt status on aldebaran
  drm/amdgpu: Add support for RAS XGMI err query
  drm/amdkfd: Account for SH/SE count when setting up cu masks.
  drm/amdgpu: rename amdgpu_bo_get_preferred_pin_domain
  drm/amdgpu: drop redundant cancel_delayed_work_sync call
  drm/amdgpu: add missing cleanups for more ASICs on UVD/VCE suspend
  drm/amdgpu: add missing cleanups for Polaris12 UVD/VCE on suspend
  drm/amdkfd: map SVM range with correct access permission
  drm/amdkfd: check access permisson to restore retry fault
  drm/amdgpu: Update RAS XGMI Error Query
  drm/amdgpu: Add driver infrastructure for MCA RAS
  drm/amd/display: Add Logging for HDMI color depth information
  drm/amd/amdgpu: consolidate PSP TA init shared buf functions
  drm/amd/amdgpu: add name field back to ras_common_if
  drm/amdgpu: Fix build with missing pm_suspend_target_state module export
  ...
Diffstat (limited to 'drivers/gpu/host1x/job.c')
-rw-r--r--drivers/gpu/host1x/job.c98
1 files changed, 71 insertions, 27 deletions
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index adbdc225de8d..0eef6df7c89e 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -24,21 +24,25 @@
 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs)
+				    u32 num_cmdbufs, u32 num_relocs,
+				    bool skip_firewall)
 {
 	struct host1x_job *job = NULL;
 	unsigned int num_unpins = num_relocs;
+	bool enable_firewall;
 	u64 total;
 	void *mem;
 
-	if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+	enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
+
+	if (!enable_firewall)
 		num_unpins += num_cmdbufs;
 
 	/* Check that we're not going to overflow */
 	total = sizeof(struct host1x_job) +
 		(u64)num_relocs * sizeof(struct host1x_reloc) +
 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
-		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
+		(u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
 		(u64)num_unpins * sizeof(dma_addr_t) +
 		(u64)num_unpins * sizeof(u32 *);
 	if (total > ULONG_MAX)
@@ -48,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 	if (!job)
 		return NULL;
 
+	job->enable_firewall = enable_firewall;
+
 	kref_init(&job->ref);
 	job->channel = ch;
 
@@ -57,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 	mem += num_relocs * sizeof(struct host1x_reloc);
 	job->unpins = num_unpins ? mem : NULL;
 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
-	job->gathers = num_cmdbufs ? mem : NULL;
-	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
+	job->cmds = num_cmdbufs ? mem : NULL;
+	mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
 	job->addr_phys = num_unpins ? mem : NULL;
 
 	job->reloc_addr_phys = job->addr_phys;
@@ -79,6 +85,13 @@ static void job_free(struct kref *ref)
 {
 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
 
+	if (job->release)
+		job->release(job);
+
+	if (job->waiter)
+		host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
+				    job->waiter, false);
+
 	if (job->syncpt)
 		host1x_syncpt_put(job->syncpt);
 
@@ -94,22 +107,38 @@ EXPORT_SYMBOL(host1x_job_put);
 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
 			   unsigned int words, unsigned int offset)
 {
-	struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
+	struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
 
 	gather->words = words;
 	gather->bo = bo;
 	gather->offset = offset;
 
-	job->num_gathers++;
+	job->num_cmds++;
 }
 EXPORT_SYMBOL(host1x_job_add_gather);
 
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+			 bool relative, u32 next_class)
+{
+	struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
+
+	cmd->is_wait = true;
+	cmd->wait.id = id;
+	cmd->wait.threshold = thresh;
+	cmd->wait.next_class = next_class;
+	cmd->wait.relative = relative;
+
+	job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_wait);
+
 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
 	struct host1x_client *client = job->client;
 	struct device *dev = client->dev;
 	struct host1x_job_gather *g;
 	struct iommu_domain *domain;
+	struct sg_table *sgt;
 	unsigned int i;
 	int err;
 
@@ -119,7 +148,6 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 	for (i = 0; i < job->num_relocs; i++) {
 		struct host1x_reloc *reloc = &job->relocs[i];
 		dma_addr_t phys_addr, *phys;
-		struct sg_table *sgt;
 
 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
 		if (!reloc->target.bo) {
@@ -192,20 +220,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 	 * We will copy gathers BO content later, so there is no need to
 	 * hold and pin them.
 	 */
-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+	if (job->enable_firewall)
 		return 0;
 
-	for (i = 0; i < job->num_gathers; i++) {
+	for (i = 0; i < job->num_cmds; i++) {
 		size_t gather_size = 0;
 		struct scatterlist *sg;
-		struct sg_table *sgt;
 		dma_addr_t phys_addr;
 		unsigned long shift;
 		struct iova *alloc;
 		dma_addr_t *phys;
 		unsigned int j;
 
-		g = &job->gathers[i];
+		if (job->cmds[i].is_wait)
+			continue;
+
+		g = &job->cmds[i].gather;
+
 		g->bo = host1x_bo_get(g->bo);
 		if (!g->bo) {
 			err = -EINVAL;
@@ -296,7 +327,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
 		if (cmdbuf != reloc->cmdbuf.bo)
 			continue;
 
-		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+		if (job->enable_firewall) {
 			target = (u32 *)job->gather_copy_mapped +
 					reloc->cmdbuf.offset / sizeof(u32) +
 						g->offset / sizeof(u32);
@@ -538,8 +569,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
 	fw.num_relocs = job->num_relocs;
 	fw.class = job->class;
 
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
+
+		if (job->cmds[i].is_wait)
+			continue;
+
+		g = &job->cmds[i].gather;
 
 		size += g->words * sizeof(u32);
 	}
@@ -561,10 +597,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
 
 	job->gather_copy_size = size;
 
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
 		void *gather;
 
+		if (job->cmds[i].is_wait)
+			continue;
+		g = &job->cmds[i].gather;
+
 		/* Copy the gather */
 		gather = host1x_bo_mmap(g->bo);
 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
@@ -600,28 +640,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 	if (err)
 		goto out;
 
-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+	if (job->enable_firewall) {
 		err = copy_gathers(host->dev, job, dev);
 		if (err)
 			goto out;
 	}
 
 	/* patch gathers */
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
+
+		if (job->cmds[i].is_wait)
+			continue;
+		g = &job->cmds[i].gather;
 
 		/* process each gather mem only once */
 		if (g->handled)
 			continue;
 
 		/* copy_gathers() sets gathers base if firewall is enabled */
-		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+		if (!job->enable_firewall)
 			g->base = job->gather_addr_phys[i];
 
-		for (j = i + 1; j < job->num_gathers; j++) {
-			if (job->gathers[j].bo == g->bo) {
-				job->gathers[j].handled = true;
-				job->gathers[j].base = g->base;
+		for (j = i + 1; j < job->num_cmds; j++) {
+			if (!job->cmds[j].is_wait &&
+			    job->cmds[j].gather.bo == g->bo) {
+				job->cmds[j].gather.handled = true;
+				job->cmds[j].gather.base = g->base;
 			}
 		}
 
@@ -649,8 +694,7 @@ void host1x_job_unpin(struct host1x_job *job)
 		struct device *dev = unpin->dev ?: host->dev;
 		struct sg_table *sgt = unpin->sgt;
 
-		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
-		    unpin->size && host->domain) {
+		if (!job->enable_firewall && unpin->size && host->domain) {
 			iommu_unmap(host->domain, job->addr_phys[i],
 				    unpin->size);
 			free_iova(&host->iova,