From b1a3dc0b85bde4d8d549ea3aa31106b599694f37 Mon Sep 17 00:00:00 2001
From: Stefan Schake <stschake@gmail.com>
Date: Wed, 18 Apr 2018 03:40:19 -0700
Subject: [PATCH 01/36] drm/tegra: hub: Use state directly

Using drm_atomic_get_private_obj_state() after state has been swapped
will return old state.

Fixes: 0281c4149021 ("drm/tegra: hub: Use private object for global state")
Signed-off-by: Stefan Schake <stschake@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/hub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 9a3f23d4780f..8f4fcbb515fb 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -687,7 +687,7 @@ void tegra_display_hub_atomic_commit(struct drm_device *drm,
 	struct device *dev = hub->client.dev;
 	int err;
 
-	hub_state = tegra_display_hub_get_state(hub, state);
+	hub_state = to_tegra_display_hub_state(hub->base.state);
 
 	if (hub_state->clk) {
 		err = clk_set_rate(hub_state->clk, hub_state->rate);

From 6f75b16b2683eb7c86ce2c8d150bf3fa759103b9 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 4 May 2018 02:47:19 +0300
Subject: [PATCH 02/36] drm/tegra: dc: Balance IOMMU group refcounting

Remove unneeded iommu_group_get() and add missing iommu_group_put(),
correcting IOMMU group refcount. This is a minor correction / cleanup that
doesn't really fix anything because Tegra's IOMMU driver are built-in and
hence groups refcounting can't hold IOMMU driver from unloading.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c | 31 +++++++++++++++----------------
 drivers/gpu/drm/tegra/dc.h |  2 +-
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 9f83a65b5ea9..f20648f58e49 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1826,7 +1826,6 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 static int tegra_dc_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
-	struct iommu_group *group = iommu_group_get(client->dev);
 	unsigned long flags = HOST1X_SYNCPT_CLIENT_MANAGED;
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	struct tegra_drm *tegra = drm->dev_private;
@@ -1838,20 +1837,21 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
-	if (group && tegra->domain) {
-		if (group != tegra->group) {
-			err = iommu_attach_group(tegra->domain, group);
+	if (tegra->domain) {
+		dc->group = iommu_group_get(client->dev);
+
+		if (dc->group && dc->group != tegra->group) {
+			err = iommu_attach_group(tegra->domain, dc->group);
 			if (err < 0) {
 				dev_err(dc->dev,
 					"failed to attach to domain: %d\n",
 					err);
+				iommu_group_put(dc->group);
 				return err;
 			}
 
-			tegra->group = group;
+			tegra->group = dc->group;
 		}
-
-		dc->domain = tegra->domain;
 	}
 
 	if (dc->soc->wgrps)
@@ -1916,13 +1916,13 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!IS_ERR(primary))
 		drm_plane_cleanup(primary);
 
-	if (group && dc->domain) {
-		if (group == tegra->group) {
-			iommu_detach_group(dc->domain, group);
+	if (dc->group) {
+		if (dc->group == tegra->group) {
+			iommu_detach_group(tegra->domain, dc->group);
 			tegra->group = NULL;
 		}
 
-		dc->domain = NULL;
+		iommu_group_put(dc->group);
 	}
 
 	return err;
@@ -1931,7 +1931,6 @@ static int tegra_dc_init(struct host1x_client *client)
 static int tegra_dc_exit(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
-	struct iommu_group *group = iommu_group_get(client->dev);
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	struct tegra_drm *tegra = drm->dev_private;
 	int err;
@@ -1944,13 +1943,13 @@ static int tegra_dc_exit(struct host1x_client *client)
 		return err;
 	}
 
-	if (group && dc->domain) {
-		if (group == tegra->group) {
-			iommu_detach_group(dc->domain, group);
+	if (dc->group) {
+		if (dc->group == tegra->group) {
+			iommu_detach_group(tegra->domain, dc->group);
 			tegra->group = NULL;
 		}
 
-		dc->domain = NULL;
+		iommu_group_put(dc->group);
 	}
 
 	host1x_syncpt_free(dc->syncpt);
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index d2b50d32de4d..7be786febb17 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -92,7 +92,7 @@ struct tegra_dc {
 
 	const struct tegra_dc_soc_info *soc;
 
-	struct iommu_domain *domain;
+	struct iommu_group *group;
 };
 
 static inline struct tegra_dc *

From 5fda01b50d769d600c34cb00ab15ce1b6a66c028 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 4 May 2018 02:47:20 +0300
Subject: [PATCH 03/36] drm/tegra: gr2d: Add IOMMU support

Attach GR2D to the display IOMMU group in order to provide GR2D access
to BO's IOVA.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr2d.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 9a8ea93016a9..8eb530a85dd0 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -7,12 +7,14 @@
  */
 
 #include <linux/clk.h>
+#include <linux/iommu.h>
 
 #include "drm.h"
 #include "gem.h"
 #include "gr2d.h"
 
 struct gr2d {
+	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk;
@@ -30,7 +32,9 @@ static int gr2d_init(struct host1x_client *client)
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
+	struct tegra_drm *tegra = dev->dev_private;
 	struct gr2d *gr2d = to_gr2d(drm);
+	int err;
 
 	gr2d->channel = host1x_channel_request(client->dev);
 	if (!gr2d->channel)
@@ -42,23 +46,46 @@ static int gr2d_init(struct host1x_client *client)
 		return -ENOMEM;
 	}
 
-	return tegra_drm_register_client(dev->dev_private, drm);
+	if (tegra->domain) {
+		gr2d->group = iommu_group_get(client->dev);
+
+		if (gr2d->group) {
+			err = iommu_attach_group(tegra->domain, gr2d->group);
+			if (err < 0) {
+				dev_err(client->dev,
+					"failed to attach to domain: %d\n",
+					err);
+				host1x_syncpt_free(client->syncpts[0]);
+				host1x_channel_put(gr2d->channel);
+				iommu_group_put(gr2d->group);
+				return err;
+			}
+		}
+	}
+
+	return tegra_drm_register_client(tegra, drm);
 }
 
 static int gr2d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = dev->dev_private;
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
 
-	err = tegra_drm_unregister_client(dev->dev_private, drm);
+	err = tegra_drm_unregister_client(tegra, drm);
 	if (err < 0)
 		return err;
 
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr2d->channel);
 
+	if (gr2d->group) {
+		iommu_detach_group(tegra->domain, gr2d->group);
+		iommu_group_put(gr2d->group);
+	}
+
 	return 0;
 }
 

From c9ac52175b38e7f22fe37b9f943973d9095e53b7 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 4 May 2018 02:47:21 +0300
Subject: [PATCH 04/36] drm/tegra: gr3d: Add IOMMU support

Attach GR3D to the displays IOMMU group in order to provide GR3D access
to BO's IOVA.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr3d.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 28c4ef63065b..ce5120683091 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -9,6 +9,7 @@
 
 #include <linux/clk.h>
 #include <linux/host1x.h>
+#include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
@@ -20,6 +21,7 @@
 #include "gr3d.h"
 
 struct gr3d {
+	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk_secondary;
@@ -40,7 +42,9 @@ static int gr3d_init(struct host1x_client *client)
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
+	struct tegra_drm *tegra = dev->dev_private;
 	struct gr3d *gr3d = to_gr3d(drm);
+	int err;
 
 	gr3d->channel = host1x_channel_request(client->dev);
 	if (!gr3d->channel)
@@ -52,6 +56,23 @@ static int gr3d_init(struct host1x_client *client)
 		return -ENOMEM;
 	}
 
+	if (tegra->domain) {
+		gr3d->group = iommu_group_get(client->dev);
+
+		if (gr3d->group) {
+			err = iommu_attach_group(tegra->domain, gr3d->group);
+			if (err < 0) {
+				dev_err(client->dev,
+					"failed to attach to domain: %d\n",
+					err);
+				host1x_syncpt_free(client->syncpts[0]);
+				host1x_channel_put(gr3d->channel);
+				iommu_group_put(gr3d->group);
+				return err;
+			}
+		}
+	}
+
 	return tegra_drm_register_client(dev->dev_private, drm);
 }
 
@@ -59,6 +80,7 @@ static int gr3d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = dev->dev_private;
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
@@ -69,6 +91,11 @@ static int gr3d_exit(struct host1x_client *client)
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr3d->channel);
 
+	if (gr3d->group) {
+		iommu_detach_group(tegra->domain, gr3d->group);
+		iommu_group_put(gr3d->group);
+	}
+
 	return 0;
 }
 

From fd5ec0dc34dafa6c5bb46770ca283ae90a4db3c7 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 4 May 2018 15:00:54 +0200
Subject: [PATCH 05/36] drm/tegra: dc: Free syncpoint on errors

If an error happens during display controller initialization, the host1x
syncpoint previously requested would be leaked. Properly clean up the
syncpoint along with the other resources.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index f20648f58e49..c843f11043db 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1925,6 +1925,8 @@ static int tegra_dc_init(struct host1x_client *client)
 		iommu_group_put(dc->group);
 	}
 
+	host1x_syncpt_free(dc->syncpt);
+
 	return err;
 }
 

From dd99b4b48833a55817d78a48034b606664b1fff8 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 4 May 2018 14:58:26 +0200
Subject: [PATCH 06/36] drm/tegra: gr2d: Properly clean up resources

Failure to register the Tegra DRM client would leak the resources. Move
cleanup code to error unwinding gotos to fix that and share the cleanup
code with the other error paths.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr2d.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 8eb530a85dd0..0b42e99da8ad 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -42,8 +42,9 @@ static int gr2d_init(struct host1x_client *client)
 
 	client->syncpts[0] = host1x_syncpt_request(client, flags);
 	if (!client->syncpts[0]) {
-		host1x_channel_put(gr2d->channel);
-		return -ENOMEM;
+		err = -ENOMEM;
+		dev_err(client->dev, "failed to request syncpoint: %d\n", err);
+		goto put;
 	}
 
 	if (tegra->domain) {
@@ -55,15 +56,30 @@ static int gr2d_init(struct host1x_client *client)
 				dev_err(client->dev,
 					"failed to attach to domain: %d\n",
 					err);
-				host1x_syncpt_free(client->syncpts[0]);
-				host1x_channel_put(gr2d->channel);
 				iommu_group_put(gr2d->group);
-				return err;
+				goto free;
 			}
 		}
 	}
 
-	return tegra_drm_register_client(tegra, drm);
+	err = tegra_drm_register_client(tegra, drm);
+	if (err < 0) {
+		dev_err(client->dev, "failed to register client: %d\n", err);
+		goto detach;
+	}
+
+	return 0;
+
+detach:
+	if (gr2d->group) {
+		iommu_detach_group(tegra->domain, gr2d->group);
+		iommu_group_put(gr2d->group);
+	}
+free:
+	host1x_syncpt_free(client->syncpts[0]);
+put:
+	host1x_channel_put(gr2d->channel);
+	return err;
 }
 
 static int gr2d_exit(struct host1x_client *client)

From 230630bd3834af0ea6ec75354ec21819de148ee1 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 4 May 2018 15:08:49 +0200
Subject: [PATCH 07/36] drm/tegra: gr3d: Properly clean up resources

Failure to register the Tegra DRM client would leak the resources. Move
cleanup code to error unwinding gotos to fix that and share the cleanup
code with the other error paths.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr3d.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index ce5120683091..9303278efc1d 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -52,8 +52,9 @@ static int gr3d_init(struct host1x_client *client)
 
 	client->syncpts[0] = host1x_syncpt_request(client, flags);
 	if (!client->syncpts[0]) {
-		host1x_channel_put(gr3d->channel);
-		return -ENOMEM;
+		err = -ENOMEM;
+		dev_err(client->dev, "failed to request syncpoint: %d\n", err);
+		goto put;
 	}
 
 	if (tegra->domain) {
@@ -65,15 +66,30 @@ static int gr3d_init(struct host1x_client *client)
 				dev_err(client->dev,
 					"failed to attach to domain: %d\n",
 					err);
-				host1x_syncpt_free(client->syncpts[0]);
-				host1x_channel_put(gr3d->channel);
 				iommu_group_put(gr3d->group);
-				return err;
+				goto free;
 			}
 		}
 	}
 
-	return tegra_drm_register_client(dev->dev_private, drm);
+	err = tegra_drm_register_client(dev->dev_private, drm);
+	if (err < 0) {
+		dev_err(client->dev, "failed to register client: %d\n", err);
+		goto detach;
+	}
+
+	return 0;
+
+detach:
+	if (gr3d->group) {
+		iommu_detach_group(tegra->domain, gr3d->group);
+		iommu_group_put(gr3d->group);
+	}
+free:
+	host1x_syncpt_free(client->syncpts[0]);
+put:
+	host1x_channel_put(gr3d->channel);
+	return err;
 }
 
 static int gr3d_exit(struct host1x_client *client)

From 0c407de5ed1a329468122cbf4f3e727e0c1e3f36 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 4 May 2018 15:02:24 +0200
Subject: [PATCH 08/36] drm/tegra: Refactor IOMMU attach/detach

Attaching to and detaching from an IOMMU uses the same code sequence in
every driver, so factor it out into separate helpers.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c   | 42 ++++++--------------------------
 drivers/gpu/drm/tegra/drm.c  | 46 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/drm.h  |  4 ++++
 drivers/gpu/drm/tegra/gr2d.c | 32 +++++++------------------
 drivers/gpu/drm/tegra/gr3d.c | 31 ++++++------------------
 5 files changed, 72 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index c843f11043db..3e7ec3937346 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1837,21 +1837,11 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
-	if (tegra->domain) {
-		dc->group = iommu_group_get(client->dev);
-
-		if (dc->group && dc->group != tegra->group) {
-			err = iommu_attach_group(tegra->domain, dc->group);
-			if (err < 0) {
-				dev_err(dc->dev,
-					"failed to attach to domain: %d\n",
-					err);
-				iommu_group_put(dc->group);
-				return err;
-			}
-
-			tegra->group = dc->group;
-		}
+	dc->group = host1x_client_iommu_attach(client, true);
+	if (IS_ERR(dc->group)) {
+		err = PTR_ERR(dc->group);
+		dev_err(client->dev, "failed to attach to domain: %d\n", err);
+		return err;
 	}
 
 	if (dc->soc->wgrps)
@@ -1916,15 +1906,7 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!IS_ERR(primary))
 		drm_plane_cleanup(primary);
 
-	if (dc->group) {
-		if (dc->group == tegra->group) {
-			iommu_detach_group(tegra->domain, dc->group);
-			tegra->group = NULL;
-		}
-
-		iommu_group_put(dc->group);
-	}
-
+	host1x_client_iommu_detach(client, dc->group);
 	host1x_syncpt_free(dc->syncpt);
 
 	return err;
@@ -1932,9 +1914,7 @@ static int tegra_dc_init(struct host1x_client *client)
 
 static int tegra_dc_exit(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_dc *dc = host1x_client_to_dc(client);
-	struct tegra_drm *tegra = drm->dev_private;
 	int err;
 
 	devm_free_irq(dc->dev, dc->irq, dc);
@@ -1945,15 +1925,7 @@ static int tegra_dc_exit(struct host1x_client *client)
 		return err;
 	}
 
-	if (dc->group) {
-		if (dc->group == tegra->group) {
-			iommu_detach_group(tegra->domain, dc->group);
-			tegra->group = NULL;
-		}
-
-		iommu_group_put(dc->group);
-	}
-
+	host1x_client_iommu_detach(client, dc->group);
 	host1x_syncpt_free(dc->syncpt);
 
 	return 0;
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7afe2f635f74..181e82c58a4f 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1114,6 +1114,52 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
 	return 0;
 }
 
+struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
+					       bool shared)
+{
+	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = drm->dev_private;
+	struct iommu_group *group = NULL;
+	int err;
+
+	if (tegra->domain) {
+		group = iommu_group_get(client->dev);
+		if (!group) {
+			dev_err(client->dev, "failed to get IOMMU group\n");
+			return ERR_PTR(-ENODEV);
+		}
+
+		if (!shared || (shared && (group != tegra->group))) {
+			err = iommu_attach_group(tegra->domain, group);
+			if (err < 0) {
+				iommu_group_put(group);
+				return ERR_PTR(err);
+			}
+
+			if (shared && !tegra->group)
+				tegra->group = group;
+		}
+	}
+
+	return group;
+}
+
+void host1x_client_iommu_detach(struct host1x_client *client,
+				struct iommu_group *group)
+{
+	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = drm->dev_private;
+
+	if (group) {
+		if (group == tegra->group) {
+			iommu_detach_group(tegra->domain, group);
+			tegra->group = NULL;
+		}
+
+		iommu_group_put(group);
+	}
+}
+
 void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma)
 {
 	struct iova *alloc;
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 4f41aaec8530..fe263cf58f34 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -110,6 +110,10 @@ int tegra_drm_register_client(struct tegra_drm *tegra,
 			      struct tegra_drm_client *client);
 int tegra_drm_unregister_client(struct tegra_drm *tegra,
 				struct tegra_drm_client *client);
+struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
+					       bool shared);
+void host1x_client_iommu_detach(struct host1x_client *client,
+				struct iommu_group *group);
 
 int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
 int tegra_drm_exit(struct tegra_drm *tegra);
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 0b42e99da8ad..2cd0f66c8aa9 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -32,7 +32,6 @@ static int gr2d_init(struct host1x_client *client)
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
-	struct tegra_drm *tegra = dev->dev_private;
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
 
@@ -47,22 +46,14 @@ static int gr2d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	if (tegra->domain) {
-		gr2d->group = iommu_group_get(client->dev);
-
-		if (gr2d->group) {
-			err = iommu_attach_group(tegra->domain, gr2d->group);
-			if (err < 0) {
-				dev_err(client->dev,
-					"failed to attach to domain: %d\n",
-					err);
-				iommu_group_put(gr2d->group);
-				goto free;
-			}
-		}
+	gr2d->group = host1x_client_iommu_attach(client, false);
+	if (IS_ERR(gr2d->group)) {
+		err = PTR_ERR(gr2d->group);
+		dev_err(client->dev, "failed to attach to domain: %d\n", err);
+		goto free;
 	}
 
-	err = tegra_drm_register_client(tegra, drm);
+	err = tegra_drm_register_client(dev->dev_private, drm);
 	if (err < 0) {
 		dev_err(client->dev, "failed to register client: %d\n", err);
 		goto detach;
@@ -71,10 +62,7 @@ static int gr2d_init(struct host1x_client *client)
 	return 0;
 
 detach:
-	if (gr2d->group) {
-		iommu_detach_group(tegra->domain, gr2d->group);
-		iommu_group_put(gr2d->group);
-	}
+	host1x_client_iommu_detach(client, gr2d->group);
 free:
 	host1x_syncpt_free(client->syncpts[0]);
 put:
@@ -94,14 +82,10 @@ static int gr2d_exit(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
+	host1x_client_iommu_detach(client, gr2d->group);
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr2d->channel);
 
-	if (gr2d->group) {
-		iommu_detach_group(tegra->domain, gr2d->group);
-		iommu_group_put(gr2d->group);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 9303278efc1d..b00002f1c590 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -42,7 +42,6 @@ static int gr3d_init(struct host1x_client *client)
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
-	struct tegra_drm *tegra = dev->dev_private;
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
@@ -57,19 +56,11 @@ static int gr3d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	if (tegra->domain) {
-		gr3d->group = iommu_group_get(client->dev);
-
-		if (gr3d->group) {
-			err = iommu_attach_group(tegra->domain, gr3d->group);
-			if (err < 0) {
-				dev_err(client->dev,
-					"failed to attach to domain: %d\n",
-					err);
-				iommu_group_put(gr3d->group);
-				goto free;
-			}
-		}
+	gr3d->group = host1x_client_iommu_attach(client, false);
+	if (IS_ERR(gr3d->group)) {
+		err = PTR_ERR(gr3d->group);
+		dev_err(client->dev, "failed to attach to domain: %d\n", err);
+		goto free;
 	}
 
 	err = tegra_drm_register_client(dev->dev_private, drm);
@@ -81,10 +72,7 @@ static int gr3d_init(struct host1x_client *client)
 	return 0;
 
 detach:
-	if (gr3d->group) {
-		iommu_detach_group(tegra->domain, gr3d->group);
-		iommu_group_put(gr3d->group);
-	}
+	host1x_client_iommu_detach(client, gr3d->group);
 free:
 	host1x_syncpt_free(client->syncpts[0]);
 put:
@@ -96,7 +84,6 @@ static int gr3d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
-	struct tegra_drm *tegra = dev->dev_private;
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
@@ -104,14 +91,10 @@ static int gr3d_exit(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
+	host1x_client_iommu_detach(client, gr3d->group);
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr3d->channel);
 
-	if (gr3d->group) {
-		iommu_detach_group(tegra->domain, gr3d->group);
-		iommu_group_put(gr3d->group);
-	}
-
 	return 0;
 }
 

From acc6a3a9afdd4e0537342012656cdb5c4a3127c5 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 4 May 2018 17:39:58 +0300
Subject: [PATCH 09/36] drm/tegra: dc: Enable plane scaling filters

Currently resized plane produces a "pixelated" image which doesn't look
nice, especially in a case of a video overlay. Enable scaling filters that
significantly improve image quality of a scaled overlay.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c | 81 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/dc.h |  7 ++++
 2 files changed, 88 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 3e7ec3937346..a4dd866fc8be 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -224,6 +224,39 @@ static void tegra_plane_setup_blending(struct tegra_plane *plane,
 	tegra_plane_writel(plane, value, DC_WIN_BLEND_LAYER_CONTROL);
 }
 
+static bool
+tegra_plane_use_horizontal_filtering(struct tegra_plane *plane,
+				     const struct tegra_dc_window *window)
+{
+	struct tegra_dc *dc = plane->dc;
+
+	if (window->src.w == window->dst.w)
+		return false;
+
+	if (plane->index == 0 && dc->soc->has_win_a_without_filters)
+		return false;
+
+	return true;
+}
+
+static bool
+tegra_plane_use_vertical_filtering(struct tegra_plane *plane,
+				   const struct tegra_dc_window *window)
+{
+	struct tegra_dc *dc = plane->dc;
+
+	if (window->src.h == window->dst.h)
+		return false;
+
+	if (plane->index == 0 && dc->soc->has_win_a_without_filters)
+		return false;
+
+	if (plane->index == 2 && dc->soc->has_win_c_without_vert_filter)
+		return false;
+
+	return true;
+}
+
 static void tegra_dc_setup_window(struct tegra_plane *plane,
 				  const struct tegra_dc_window *window)
 {
@@ -361,6 +394,44 @@ static void tegra_dc_setup_window(struct tegra_plane *plane,
 	if (window->bottom_up)
 		value |= V_DIRECTION;
 
+	if (tegra_plane_use_horizontal_filtering(plane, window)) {
+		/*
+		 * Enable horizontal 6-tap filter and set filtering
+		 * coefficients to the default values defined in TRM.
+		 */
+		tegra_plane_writel(plane, 0x00008000, DC_WIN_H_FILTER_P(0));
+		tegra_plane_writel(plane, 0x3e087ce1, DC_WIN_H_FILTER_P(1));
+		tegra_plane_writel(plane, 0x3b117ac1, DC_WIN_H_FILTER_P(2));
+		tegra_plane_writel(plane, 0x591b73aa, DC_WIN_H_FILTER_P(3));
+		tegra_plane_writel(plane, 0x57256d9a, DC_WIN_H_FILTER_P(4));
+		tegra_plane_writel(plane, 0x552f668b, DC_WIN_H_FILTER_P(5));
+		tegra_plane_writel(plane, 0x73385e8b, DC_WIN_H_FILTER_P(6));
+		tegra_plane_writel(plane, 0x72435583, DC_WIN_H_FILTER_P(7));
+		tegra_plane_writel(plane, 0x714c4c8b, DC_WIN_H_FILTER_P(8));
+		tegra_plane_writel(plane, 0x70554393, DC_WIN_H_FILTER_P(9));
+		tegra_plane_writel(plane, 0x715e389b, DC_WIN_H_FILTER_P(10));
+		tegra_plane_writel(plane, 0x71662faa, DC_WIN_H_FILTER_P(11));
+		tegra_plane_writel(plane, 0x536d25ba, DC_WIN_H_FILTER_P(12));
+		tegra_plane_writel(plane, 0x55731bca, DC_WIN_H_FILTER_P(13));
+		tegra_plane_writel(plane, 0x387a11d9, DC_WIN_H_FILTER_P(14));
+		tegra_plane_writel(plane, 0x3c7c08f1, DC_WIN_H_FILTER_P(15));
+
+		value |= H_FILTER;
+	}
+
+	if (tegra_plane_use_vertical_filtering(plane, window)) {
+		unsigned int i, k;
+
+		/*
+		 * Enable vertical 2-tap filter and set filtering
+		 * coefficients to the default values defined in TRM.
+		 */
+		for (i = 0, k = 128; i < 16; i++, k -= 8)
+			tegra_plane_writel(plane, k, DC_WIN_V_FILTER_P(i));
+
+		value |= V_FILTER;
+	}
+
 	tegra_plane_writel(plane, value, DC_WIN_WIN_OPTIONS);
 
 	if (dc->soc->supports_blending)
@@ -1951,6 +2022,8 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
 	.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
+	.has_win_a_without_filters = true,
+	.has_win_c_without_vert_filter = true,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -1968,6 +2041,8 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
 	.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -1985,6 +2060,8 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
 	.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra20_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -2002,6 +2079,8 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
 	.num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats),
 	.overlay_formats = tegra124_overlay_formats,
 	.modifiers = tegra124_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -2019,6 +2098,8 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
 	.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra124_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 7be786febb17..556321fde9d2 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -67,6 +67,8 @@ struct tegra_dc_soc_info {
 	const u32 *overlay_formats;
 	unsigned int num_overlay_formats;
 	const u64 *modifiers;
+	bool has_win_a_without_filters;
+	bool has_win_c_without_vert_filter;
 };
 
 struct tegra_dc {
@@ -553,6 +555,9 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define  THREAD_NUM(x) (((x) & 0x1f) << 1)
 #define  THREAD_GROUP_ENABLE (1 << 0)
 
+#define DC_WIN_H_FILTER_P(p)			(0x601 + (p))
+#define DC_WIN_V_FILTER_P(p)			(0x619 + (p))
+
 #define DC_WIN_CSC_YOF				0x611
 #define DC_WIN_CSC_KYRGB			0x612
 #define DC_WIN_CSC_KUR				0x613
@@ -566,6 +571,8 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define H_DIRECTION  (1 <<  0)
 #define V_DIRECTION  (1 <<  2)
 #define COLOR_EXPAND (1 <<  6)
+#define H_FILTER     (1 <<  8)
+#define V_FILTER     (1 << 10)
 #define CSC_ENABLE   (1 << 18)
 #define WIN_ENABLE   (1 << 30)
 

From 3dae08bc076b93487ed2df50bcfa892113e89d9d Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 4 May 2018 17:39:59 +0300
Subject: [PATCH 10/36] drm/tegra: plane: Implement zpos plane property for
 older Tegras

Older Tegra's do not support plane's Z position handling in hardware,
but the hardware provides knobs to implement it in software.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c    | 140 ++++++++++++++++-------
 drivers/gpu/drm/tegra/plane.c | 207 ++++++++++++++++++++++++----------
 drivers/gpu/drm/tegra/plane.h |  13 ++-
 3 files changed, 254 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index a4dd866fc8be..51581d9da509 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -163,28 +163,89 @@ static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane)
 			 BLEND_COLOR_KEY_NONE;
 	u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255);
 	struct tegra_plane_state *state;
+	u32 blending[2];
 	unsigned int i;
 
-	state = to_tegra_plane_state(plane->base.state);
-
-	/* alpha contribution is 1 minus sum of overlapping windows */
-	for (i = 0; i < 3; i++) {
-		if (state->dependent[i])
-			background[i] |= BLEND_CONTROL_DEPENDENT;
-	}
-
-	/* enable alpha blending if pixel format has an alpha component */
-	if (!state->opaque)
-		foreground |= BLEND_CONTROL_ALPHA;
-
-	/*
-	 * Disable blending and assume Window A is the bottom-most window,
-	 * Window C is the top-most window and Window B is in the middle.
-	 */
+	/* disable blending for non-overlapping case */
 	tegra_plane_writel(plane, blendnokey, DC_WIN_BLEND_NOKEY);
 	tegra_plane_writel(plane, foreground, DC_WIN_BLEND_1WIN);
 
-	switch (plane->index) {
+	state = to_tegra_plane_state(plane->base.state);
+
+	if (state->opaque) {
+		/*
+		 * Since custom fix-weight blending isn't utilized and weight
+		 * of top window is set to max, we can enforce dependent
+		 * blending which in this case results in transparent bottom
+		 * window if top window is opaque and if top window enables
+		 * alpha blending, then bottom window is getting alpha value
+		 * of 1 minus the sum of alpha components of the overlapping
+		 * plane.
+		 */
+		background[0] |= BLEND_CONTROL_DEPENDENT;
+		background[1] |= BLEND_CONTROL_DEPENDENT;
+
+		/*
+		 * The region where three windows overlap is the intersection
+		 * of the two regions where two windows overlap. It contributes
+		 * to the area if all of the windows on top of it have an alpha
+		 * component.
+		 */
+		switch (state->base.normalized_zpos) {
+		case 0:
+			if (state->blending[0].alpha &&
+			    state->blending[1].alpha)
+				background[2] |= BLEND_CONTROL_DEPENDENT;
+			break;
+
+		case 1:
+			background[2] |= BLEND_CONTROL_DEPENDENT;
+			break;
+		}
+	} else {
+		/*
+		 * Enable alpha blending if pixel format has an alpha
+		 * component.
+		 */
+		foreground |= BLEND_CONTROL_ALPHA;
+
+		/*
+		 * If any of the windows on top of this window is opaque, it
+		 * will completely conceal this window within that area. If
+		 * top window has an alpha component, it is blended over the
+		 * bottom window.
+		 */
+		for (i = 0; i < 2; i++) {
+			if (state->blending[i].alpha &&
+			    state->blending[i].top)
+				background[i] |= BLEND_CONTROL_DEPENDENT;
+		}
+
+		switch (state->base.normalized_zpos) {
+		case 0:
+			if (state->blending[0].alpha &&
+			    state->blending[1].alpha)
+				background[2] |= BLEND_CONTROL_DEPENDENT;
+			break;
+
+		case 1:
+			/*
+			 * When both middle and topmost windows have an alpha,
+			 * these windows a mixed together and then the result
+			 * is blended over the bottom window.
+			 */
+			if (state->blending[0].alpha &&
+			    state->blending[0].top)
+				background[2] |= BLEND_CONTROL_ALPHA;
+
+			if (state->blending[1].alpha &&
+			    state->blending[1].top)
+				background[2] |= BLEND_CONTROL_ALPHA;
+			break;
+		}
+	}
+
+	switch (state->base.normalized_zpos) {
 	case 0:
 		tegra_plane_writel(plane, background[0], DC_WIN_BLEND_2WIN_X);
 		tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y);
@@ -192,8 +253,21 @@ static void tegra_plane_setup_blending_legacy(struct tegra_plane *plane)
 		break;
 
 	case 1:
-		tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_X);
-		tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y);
+		/*
+		 * If window B / C is topmost, then X / Y registers are
+		 * matching the order of blending[...] state indices,
+		 * otherwise a swap is required.
+		 */
+		if (!state->blending[0].top && state->blending[1].top) {
+			blending[0] = foreground;
+			blending[1] = background[1];
+		} else {
+			blending[0] = background[0];
+			blending[1] = foreground;
+		}
+
+		tegra_plane_writel(plane, blending[0], DC_WIN_BLEND_2WIN_X);
+		tegra_plane_writel(plane, blending[1], DC_WIN_BLEND_2WIN_Y);
 		tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY);
 		break;
 
@@ -525,14 +599,14 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 	struct tegra_bo_tiling *tiling = &plane_state->tiling;
 	struct tegra_plane *tegra = to_tegra_plane(plane);
 	struct tegra_dc *dc = to_tegra_dc(state->crtc);
-	unsigned int format;
 	int err;
 
 	/* no need for further checks if the plane is being disabled */
 	if (!state->crtc)
 		return 0;
 
-	err = tegra_plane_format(state->fb->format->format, &format,
+	err = tegra_plane_format(state->fb->format->format,
+				 &plane_state->format,
 				 &plane_state->swap);
 	if (err < 0)
 		return err;
@@ -544,21 +618,11 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 	 * be emulated by disabling alpha blending for the plane.
 	 */
 	if (!dc->soc->supports_blending) {
-		if (!tegra_plane_format_has_alpha(format)) {
-			err = tegra_plane_format_get_alpha(format, &format);
-			if (err < 0)
-				return err;
-
-			plane_state->opaque = true;
-		} else {
-			plane_state->opaque = false;
-		}
-
-		tegra_plane_check_dependent(tegra, plane_state);
+		err = tegra_plane_setup_legacy_state(tegra, plane_state);
+		if (err < 0)
+			return err;
 	}
 
-	plane_state->format = format;
-
 	err = tegra_fb_get_tiling(state->fb, tiling);
 	if (err < 0)
 		return err;
@@ -710,9 +774,7 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
 	}
 
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
-
-	if (dc->soc->supports_blending)
-		drm_plane_create_zpos_property(&plane->base, 0, 0, 255);
+	drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255);
 
 	return &plane->base;
 }
@@ -989,9 +1051,7 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 	}
 
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
-
-	if (dc->soc->supports_blending)
-		drm_plane_create_zpos_property(&plane->base, 0, 0, 255);
+	drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255);
 
 	return &plane->base;
 }
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 176ef46c615c..0406c2ef432c 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -23,6 +23,7 @@ static void tegra_plane_destroy(struct drm_plane *plane)
 
 static void tegra_plane_reset(struct drm_plane *plane)
 {
+	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_plane_state *state;
 
 	if (plane->state)
@@ -35,6 +36,8 @@ static void tegra_plane_reset(struct drm_plane *plane)
 	if (state) {
 		plane->state = &state->base;
 		plane->state->plane = plane;
+		plane->state->zpos = p->index;
+		plane->state->normalized_zpos = p->index;
 	}
 }
 
@@ -55,8 +58,8 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
 	copy->swap = state->swap;
 	copy->opaque = state->opaque;
 
-	for (i = 0; i < 3; i++)
-		copy->dependent[i] = state->dependent[i];
+	for (i = 0; i < 2; i++)
+		copy->blending[i] = state->blending[i];
 
 	return &copy->base;
 }
@@ -267,24 +270,8 @@ static bool __drm_format_has_alpha(u32 format)
 	return false;
 }
 
-/*
- * This is applicable to Tegra20 and Tegra30 only where the opaque formats can
- * be emulated using the alpha formats and alpha blending disabled.
- */
-bool tegra_plane_format_has_alpha(unsigned int format)
-{
-	switch (format) {
-	case WIN_COLOR_DEPTH_B5G5R5A1:
-	case WIN_COLOR_DEPTH_A1B5G5R5:
-	case WIN_COLOR_DEPTH_R8G8B8A8:
-	case WIN_COLOR_DEPTH_B8G8R8A8:
-		return true;
-	}
-
-	return false;
-}
-
-int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha)
+static int tegra_plane_format_get_alpha(unsigned int opaque,
+					unsigned int *alpha)
 {
 	if (tegra_plane_format_is_yuv(opaque, NULL)) {
 		*alpha = opaque;
@@ -316,6 +303,67 @@ int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha)
 	return -EINVAL;
 }
 
+/*
+ * This is applicable to Tegra20 and Tegra30 only where the opaque formats can
+ * be emulated using the alpha formats and alpha blending disabled.
+ */
+static int tegra_plane_setup_opacity(struct tegra_plane *tegra,
+				     struct tegra_plane_state *state)
+{
+	unsigned int format;
+	int err;
+
+	switch (state->format) {
+	case WIN_COLOR_DEPTH_B5G5R5A1:
+	case WIN_COLOR_DEPTH_A1B5G5R5:
+	case WIN_COLOR_DEPTH_R8G8B8A8:
+	case WIN_COLOR_DEPTH_B8G8R8A8:
+		state->opaque = false;
+		break;
+
+	default:
+		err = tegra_plane_format_get_alpha(state->format, &format);
+		if (err < 0)
+			return err;
+
+		state->format = format;
+		state->opaque = true;
+		break;
+	}
+
+	return 0;
+}
+
+static int tegra_plane_check_transparency(struct tegra_plane *tegra,
+					  struct tegra_plane_state *state)
+{
+	struct drm_plane_state *old, *plane_state;
+	struct drm_plane *plane;
+
+	old = drm_atomic_get_old_plane_state(state->base.state, &tegra->base);
+
+	/* check if zpos / transparency changed */
+	if (old->normalized_zpos == state->base.normalized_zpos &&
+	    to_tegra_plane_state(old)->opaque == state->opaque)
+		return 0;
+
+	/* include all sibling planes into this commit */
+	drm_for_each_plane(plane, tegra->base.dev) {
+		struct tegra_plane *p = to_tegra_plane(plane);
+
+		/* skip this plane and planes on different CRTCs */
+		if (p == tegra || p->dc != tegra->dc)
+			continue;
+
+		plane_state = drm_atomic_get_plane_state(state->base.state,
+							 plane);
+		if (IS_ERR(plane_state))
+			return PTR_ERR(plane_state);
+	}
+
+	return 1;
+}
+
 static unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane,
 						  struct tegra_plane *other)
 {
@@ -336,61 +384,98 @@ static unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane,
 	return index;
 }
 
-void tegra_plane_check_dependent(struct tegra_plane *tegra,
-				 struct tegra_plane_state *state)
+static void tegra_plane_update_transparency(struct tegra_plane *tegra,
+					    struct tegra_plane_state *state)
 {
-	struct drm_plane_state *old, *new;
+	struct drm_plane_state *new;
 	struct drm_plane *plane;
-	unsigned int zpos[2];
 	unsigned int i;
 
-	for (i = 0; i < 2; i++)
-		zpos[i] = 0;
-
-	for_each_oldnew_plane_in_state(state->base.state, plane, old, new, i) {
+	for_each_new_plane_in_state(state->base.state, plane, new, i) {
 		struct tegra_plane *p = to_tegra_plane(plane);
 		unsigned index;
 
 		/* skip this plane and planes on different CRTCs */
-		if (p == tegra || new->crtc != state->base.crtc)
+		if (p == tegra || p->dc != tegra->dc)
 			continue;
 
 		index = tegra_plane_get_overlap_index(tegra, p);
 
-		state->dependent[index] = false;
+		if (new->fb && __drm_format_has_alpha(new->fb->format->format))
+			state->blending[index].alpha = true;
+		else
+			state->blending[index].alpha = false;
+
+		if (new->normalized_zpos > state->base.normalized_zpos)
+			state->blending[index].top = true;
+		else
+			state->blending[index].top = false;
 
 		/*
-		 * If any of the other planes is on top of this plane and uses
-		 * a format with an alpha component, mark this plane as being
-		 * dependent, meaning it's alpha value will be 1 minus the sum
-		 * of alpha components of the overlapping planes.
+		 * Missing framebuffer means that plane is disabled, in this
+		 * case mark B / C window as top to be able to differentiate
+		 * windows indices order in regards to zPos for the middle
+		 * window X / Y registers programming.
 		 */
-		if (p->index > tegra->index) {
-			if (__drm_format_has_alpha(new->fb->format->format))
-				state->dependent[index] = true;
-
-			/* keep track of the Z position */
-			zpos[index] = p->index;
-		}
-	}
-
-	/*
-	 * The region where three windows overlap is the intersection of the
-	 * two regions where two windows overlap. It contributes to the area
-	 * if any of the windows on top of it have an alpha component.
-	 */
-	for (i = 0; i < 2; i++)
-		state->dependent[2] = state->dependent[2] ||
-				      state->dependent[i];
-
-	/*
-	 * However, if any of the windows on top of this window is opaque, it
-	 * will completely conceal this window within that area, so avoid the
-	 * window from contributing to the area.
-	 */
-	for (i = 0; i < 2; i++) {
-		if (zpos[i] > tegra->index)
-			state->dependent[2] = state->dependent[2] &&
-					      state->dependent[i];
+		if (!new->fb)
+			state->blending[index].top = (index == 1);
 	}
 }
+
+static int tegra_plane_setup_transparency(struct tegra_plane *tegra,
+					  struct tegra_plane_state *state)
+{
+	struct tegra_plane_state *tegra_state;
+	struct drm_plane_state *new;
+	struct drm_plane *plane;
+	int err;
+
+	/*
+	 * If planes zpos / transparency changed, sibling planes blending
+	 * state may require adjustment and in this case they will be included
+	 * into this atom commit, otherwise blending state is unchanged.
+	 */
+	err = tegra_plane_check_transparency(tegra, state);
+	if (err <= 0)
+		return err;
+
+	/*
+	 * All planes are now in the atomic state, walk them up and update
+	 * transparency state for each plane.
+	 */
+	drm_for_each_plane(plane, tegra->base.dev) {
+		struct tegra_plane *p = to_tegra_plane(plane);
+
+		/* skip planes on different CRTCs */
+		if (p->dc != tegra->dc)
+			continue;
+
+		new = drm_atomic_get_new_plane_state(state->base.state, plane);
+		tegra_state = to_tegra_plane_state(new);
+
+		/*
+		 * There is no need to update blending state for the disabled
+		 * plane.
+		 */
+		if (new->fb)
+			tegra_plane_update_transparency(p, tegra_state);
+	}
+
+	return 0;
+}
+
+int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,
+				   struct tegra_plane_state *state)
+{
+	int err;
+
+	err = tegra_plane_setup_opacity(tegra, state);
+	if (err < 0)
+		return err;
+
+	err = tegra_plane_setup_transparency(tegra, state);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
index 6938719e7e5d..7360ddfafee8 100644
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -34,6 +34,11 @@ static inline struct tegra_plane *to_tegra_plane(struct drm_plane *plane)
 	return container_of(plane, struct tegra_plane, base);
 }
 
+struct tegra_plane_legacy_blending_state {
+	bool alpha;
+	bool top;
+};
+
 struct tegra_plane_state {
 	struct drm_plane_state base;
 
@@ -42,8 +47,8 @@ struct tegra_plane_state {
 	u32 swap;
 
 	/* used for legacy blending support only */
+	struct tegra_plane_legacy_blending_state blending[2];
 	bool opaque;
-	bool dependent[3];
 };
 
 static inline struct tegra_plane_state *
@@ -62,9 +67,7 @@ int tegra_plane_state_add(struct tegra_plane *plane,
 
 int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap);
 bool tegra_plane_format_is_yuv(unsigned int format, bool *planar);
-bool tegra_plane_format_has_alpha(unsigned int format);
-int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha);
-void tegra_plane_check_dependent(struct tegra_plane *tegra,
-				 struct tegra_plane_state *state);
+int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,
+				   struct tegra_plane_state *state);
 
 #endif /* TEGRA_PLANE_H */

From a43d0a00ea58a665905d94e8ab469ff888b1b0e1 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 4 May 2018 17:40:00 +0300
Subject: [PATCH 11/36] drm/tegra: dc: Rename supports_blending to
 has_legacy_blending

Older Tegra chips do support blending as well. Rename the SoC info entry
.supports_blending to .has_legacy_blending to eliminate the confusion.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c | 20 ++++++++++----------
 drivers/gpu/drm/tegra/dc.h |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 51581d9da509..31e12a9dfcb8 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -508,10 +508,10 @@ static void tegra_dc_setup_window(struct tegra_plane *plane,
 
 	tegra_plane_writel(plane, value, DC_WIN_WIN_OPTIONS);
 
-	if (dc->soc->supports_blending)
-		tegra_plane_setup_blending(plane, window);
-	else
+	if (dc->soc->has_legacy_blending)
 		tegra_plane_setup_blending_legacy(plane);
+	else
+		tegra_plane_setup_blending(plane, window);
 }
 
 static const u32 tegra20_primary_formats[] = {
@@ -617,7 +617,7 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 	 * the corresponding opaque formats. However, the opaque formats can
 	 * be emulated by disabling alpha blending for the plane.
 	 */
-	if (!dc->soc->supports_blending) {
+	if (dc->soc->has_legacy_blending) {
 		err = tegra_plane_setup_legacy_state(tegra, plane_state);
 		if (err < 0)
 			return err;
@@ -2072,7 +2072,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
-	.supports_blending = false,
+	.has_legacy_blending = true,
 	.pitch_align = 8,
 	.has_powergate = false,
 	.coupled_pm = true,
@@ -2091,7 +2091,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
-	.supports_blending = false,
+	.has_legacy_blending = true,
 	.pitch_align = 8,
 	.has_powergate = false,
 	.coupled_pm = false,
@@ -2110,7 +2110,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
-	.supports_blending = false,
+	.has_legacy_blending = true,
 	.pitch_align = 64,
 	.has_powergate = true,
 	.coupled_pm = false,
@@ -2129,7 +2129,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
-	.supports_blending = true,
+	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = true,
 	.coupled_pm = false,
@@ -2148,7 +2148,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
-	.supports_blending = true,
+	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = true,
 	.coupled_pm = false,
@@ -2201,7 +2201,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
-	.supports_blending = true,
+	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = false,
 	.coupled_pm = false,
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 556321fde9d2..e96f582ca692 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -55,7 +55,7 @@ struct tegra_dc_soc_info {
 	bool supports_interlacing;
 	bool supports_cursor;
 	bool supports_block_linear;
-	bool supports_blending;
+	bool has_legacy_blending;
 	unsigned int pitch_align;
 	bool has_powergate;
 	bool coupled_pm;

From 27db6a0073f162cdb15975c9d29d159d772b1ec0 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 23 Apr 2018 12:54:56 +0300
Subject: [PATCH 12/36] gpu: host1x: Fix dma_free_wc() argument in the error
 path

If IOVA allocation or IOMMU mapping fails, dma_free_wc() is invoked with
size=0 because of a typo, that triggers "kernel BUG at mm/vmalloc.c:124!".

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Reviewed-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/cdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 28541b280739..cf6caa90bf89 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -127,7 +127,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
 iommu_free_iova:
 	__free_iova(&host1x->iova, alloc);
 iommu_free_mem:
-	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+	dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);
 
 	return err;
 }

From 5f43ac8d80e4c768380e86d312a591472d080eeb Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 23 Apr 2018 08:57:44 +0200
Subject: [PATCH 13/36] drm/tegra: Fix order of teardown in IOMMU case

The original code works fine, this is merely a cosmetic change to make
the teardown order the reverse of the setup order.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 181e82c58a4f..7b9f73bcf155 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -204,10 +204,10 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 	drm_mode_config_cleanup(drm);
 
 	if (tegra->domain) {
-		iommu_domain_free(tegra->domain);
-		drm_mm_takedown(&tegra->mm);
 		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
 		put_iova_domain(&tegra->carveout.domain);
+		iommu_domain_free(tegra->domain);
 	}
 free:
 	kfree(tegra);
@@ -230,10 +230,10 @@ static void tegra_drm_unload(struct drm_device *drm)
 		return;
 
 	if (tegra->domain) {
-		iommu_domain_free(tegra->domain);
-		drm_mm_takedown(&tegra->mm);
 		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
 		put_iova_domain(&tegra->carveout.domain);
+		iommu_domain_free(tegra->domain);
 	}
 
 	kfree(tegra);

From 24cfdc1ac7d4260aa8416505b9cb6316c9e89021 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 23 Apr 2018 08:57:45 +0200
Subject: [PATCH 14/36] drm/tegra: Acquire a reference to the IOVA cache

The IOVA API uses a memory cache to allocate IOVA nodes from. To make
sure that this cache is available, obtain a reference to it and release
the reference when the cache is no longer needed.

On 64-bit ARM this is hidden by the fact that the DMA mapping API gets
that reference and never releases it. On 32-bit ARM, however, the DMA
mapping API doesn't do that, so allocation of IOVA nodes fails.

Fixes: ad92601521ea ("drm/tegra: Add Tegra DRM allocation API")
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7b9f73bcf155..3cdef659cd39 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -113,6 +113,10 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 			goto free;
 		}
 
+		err = iova_cache_get();
+		if (err < 0)
+			goto domain;
+
 		geometry = &tegra->domain->geometry;
 		gem_start = geometry->aperture_start;
 		gem_end = geometry->aperture_end - CARVEOUT_SZ;
@@ -207,8 +211,11 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 		mutex_destroy(&tegra->mm_lock);
 		drm_mm_takedown(&tegra->mm);
 		put_iova_domain(&tegra->carveout.domain);
-		iommu_domain_free(tegra->domain);
+		iova_cache_put();
 	}
+domain:
+	if (tegra->domain)
+		iommu_domain_free(tegra->domain);
 free:
 	kfree(tegra);
 	return err;
@@ -233,6 +240,7 @@ static void tegra_drm_unload(struct drm_device *drm)
 		mutex_destroy(&tegra->mm_lock);
 		drm_mm_takedown(&tegra->mm);
 		put_iova_domain(&tegra->carveout.domain);
+		iova_cache_put();
 		iommu_domain_free(tegra->domain);
 	}
 

From f40e1590c5270e5559fb95a5a0a7c1f5266a522d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 14 May 2018 11:14:00 +0200
Subject: [PATCH 15/36] gpu: host1x: Acquire a reference to the IOVA cache

The IOVA API uses a memory cache to allocate IOVA nodes from. To make
sure that this cache is available, obtain a reference to it and release
the reference when the cache is no longer needed.

On 64-bit ARM this is hidden by the fact that the DMA mapping API gets
that reference and never releases it. On 32-bit ARM, this is papered
over by the Tegra DRM driver (the sole user of the host1x API requiring
the cache) acquiring a reference to the IOVA cache for its own purposes.
However, there may be additional users of this API in the future, so fix
this upfront to avoid surprises.

Fixes: 404bfb78daf3 ("gpu: host1x: Add IOMMU support")
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 03db71173f5d..f1d5f76e9c33 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -223,10 +223,14 @@ static int host1x_probe(struct platform_device *pdev)
 		struct iommu_domain_geometry *geometry;
 		unsigned long order;
 
+		err = iova_cache_get();
+		if (err < 0)
+			goto put_group;
+
 		host->domain = iommu_domain_alloc(&platform_bus_type);
 		if (!host->domain) {
 			err = -ENOMEM;
-			goto put_group;
+			goto put_cache;
 		}
 
 		err = iommu_attach_group(host->domain, host->group);
@@ -234,6 +238,7 @@ static int host1x_probe(struct platform_device *pdev)
 			if (err == -ENODEV) {
 				iommu_domain_free(host->domain);
 				host->domain = NULL;
+				iova_cache_put();
 				iommu_group_put(host->group);
 				host->group = NULL;
 				goto skip_iommu;
@@ -308,6 +313,9 @@ static int host1x_probe(struct platform_device *pdev)
 fail_free_domain:
 	if (host->domain)
 		iommu_domain_free(host->domain);
+put_cache:
+	if (host->group)
+		iova_cache_put();
 put_group:
 	iommu_group_put(host->group);
 
@@ -328,6 +336,7 @@ static int host1x_remove(struct platform_device *pdev)
 		put_iova_domain(&host->iova);
 		iommu_detach_group(host->domain, host->group);
 		iommu_domain_free(host->domain);
+		iova_cache_put();
 		iommu_group_put(host->group);
 	}
 

From cc7add70cad12054e096b034578827d7065f64bb Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Tue, 17 Apr 2018 19:17:55 +0530
Subject: [PATCH 16/36] drm/tegra: Adding new typedef vm_fault_t

Use new return type vm_fault_t for fault handler. For now, this is just
documenting that the function returns a VM_FAULT value rather than an
errno. Once all instances are converted, vm_fault_t will become a
distinct type.

Reference id -> 1c8f422059ae ("mm: change return type to vm_fault_t")

Previously vm_insert_page() returns err which driver mapped into
VM_FAULT_* type. The new function vmf_insert_page() will replace this
inefficiency by returning VM_FAULT_* type.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gem.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 8b0b4ff64bb4..1c4011774c3f 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -422,14 +422,13 @@ int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
 	return 0;
 }
 
-static int tegra_bo_fault(struct vm_fault *vmf)
+static vm_fault_t tegra_bo_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *gem = vma->vm_private_data;
 	struct tegra_bo *bo = to_tegra_bo(gem);
 	struct page *page;
 	pgoff_t offset;
-	int err;
 
 	if (!bo->pages)
 		return VM_FAULT_SIGBUS;
@@ -437,20 +436,7 @@ static int tegra_bo_fault(struct vm_fault *vmf)
 	offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 	page = bo->pages[offset];
 
-	err = vm_insert_page(vma, vmf->address, page);
-	switch (err) {
-	case -EAGAIN:
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		return VM_FAULT_NOPAGE;
-
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	}
-
-	return VM_FAULT_SIGBUS;
+	return vmf_insert_page(vma, vmf->address, page);
 }
 
 const struct vm_operations_struct tegra_bo_vm_ops = {

From e1189921b5ff9dcfec52b21cf12bb52c5dccd34d Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniels@collabora.com>
Date: Fri, 30 Mar 2018 15:11:25 +0100
Subject: [PATCH 17/36] drm/tegra: Remove duplicate framebuffer num_planes

drm_framebuffer already stores num_planes for us.

Signed-off-by: Daniel Stone <daniels@collabora.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: linux-tegra@vger.kernel.org
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.h | 1 -
 drivers/gpu/drm/tegra/fb.c  | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index fe263cf58f34..61a4657e45fa 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -32,7 +32,6 @@ struct reset_control;
 struct tegra_fb {
 	struct drm_framebuffer base;
 	struct tegra_bo **planes;
-	unsigned int num_planes;
 };
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index e69434909a42..75badf371721 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -107,7 +107,7 @@ static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 	struct tegra_fb *fb = to_tegra_fb(framebuffer);
 	unsigned int i;
 
-	for (i = 0; i < fb->num_planes; i++) {
+	for (i = 0; i < framebuffer->format->num_planes; i++) {
 		struct tegra_bo *bo = fb->planes[i];
 
 		if (bo) {
@@ -155,11 +155,9 @@ static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	fb->num_planes = num_planes;
-
 	drm_helper_mode_fill_fb_struct(drm, &fb->base, mode_cmd);
 
-	for (i = 0; i < fb->num_planes; i++)
+	for (i = 0; i < fb->base.format->num_planes; i++)
 		fb->planes[i] = planes[i];
 
 	err = drm_framebuffer_init(drm, &fb->base, &tegra_fb_funcs);

From 0bc6af006f5ce7fb92d41dc8e01b621bd8d2226b Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniels@collabora.com>
Date: Fri, 30 Mar 2018 15:11:26 +0100
Subject: [PATCH 18/36] drm/tegra: Move GEM BOs to drm_framebuffer

Since drm_framebuffer can now store GEM objects directly, place them
there rather than in our own subclass. As this makes the framebuffer
create_handle function the same as the GEM framebuffer helper, we
can reuse that.

Signed-off-by: Daniel Stone <daniels@collabora.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: linux-tegra@vger.kernel.org
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.h |  1 -
 drivers/gpu/drm/tegra/fb.c  | 37 ++++++++-----------------------------
 2 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 61a4657e45fa..7f9810f026e8 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -31,7 +31,6 @@ struct reset_control;
 
 struct tegra_fb {
 	struct drm_framebuffer base;
-	struct tegra_bo **planes;
 };
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 75badf371721..5bc8f968284c 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -14,6 +14,7 @@
 
 #include "drm.h"
 #include "gem.h"
+#include <drm/drm_gem_framebuffer_helper.h>
 
 static inline struct tegra_fb *to_tegra_fb(struct drm_framebuffer *fb)
 {
@@ -30,19 +31,14 @@ static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper)
 struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
 				    unsigned int index)
 {
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-
-	if (index >= framebuffer->format->num_planes)
-		return NULL;
-
-	return fb->planes[index];
+	return to_tegra_bo(drm_gem_fb_get_obj(framebuffer, index));
 }
 
 bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer)
 {
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
+	struct tegra_bo *bo = tegra_fb_get_plane(framebuffer, 0);
 
-	if (fb->planes[0]->flags & TEGRA_BO_BOTTOM_UP)
+	if (bo->flags & TEGRA_BO_BOTTOM_UP)
 		return true;
 
 	return false;
@@ -51,8 +47,7 @@ bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer)
 int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 			struct tegra_bo_tiling *tiling)
 {
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-	uint64_t modifier = fb->base.modifier;
+	uint64_t modifier = framebuffer->modifier;
 
 	switch (modifier) {
 	case DRM_FORMAT_MOD_LINEAR:
@@ -108,7 +103,7 @@ static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 	unsigned int i;
 
 	for (i = 0; i < framebuffer->format->num_planes; i++) {
-		struct tegra_bo *bo = fb->planes[i];
+		struct tegra_bo *bo = tegra_fb_get_plane(framebuffer, i);
 
 		if (bo) {
 			if (bo->pages)
@@ -119,21 +114,12 @@ static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 	}
 
 	drm_framebuffer_cleanup(framebuffer);
-	kfree(fb->planes);
 	kfree(fb);
 }
 
-static int tegra_fb_create_handle(struct drm_framebuffer *framebuffer,
-				  struct drm_file *file, unsigned int *handle)
-{
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-
-	return drm_gem_handle_create(file, &fb->planes[0]->gem, handle);
-}
-
 static const struct drm_framebuffer_funcs tegra_fb_funcs = {
 	.destroy = tegra_fb_destroy,
-	.create_handle = tegra_fb_create_handle,
+	.create_handle = drm_gem_fb_create_handle,
 };
 
 static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
@@ -149,22 +135,15 @@ static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
 	if (!fb)
 		return ERR_PTR(-ENOMEM);
 
-	fb->planes = kzalloc(num_planes * sizeof(*planes), GFP_KERNEL);
-	if (!fb->planes) {
-		kfree(fb);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	drm_helper_mode_fill_fb_struct(drm, &fb->base, mode_cmd);
 
 	for (i = 0; i < fb->base.format->num_planes; i++)
-		fb->planes[i] = planes[i];
+		fb->base.obj[i] = &planes[i]->gem;
 
 	err = drm_framebuffer_init(drm, &fb->base, &tegra_fb_funcs);
 	if (err < 0) {
 		dev_err(drm->dev, "failed to initialize framebuffer: %d\n",
 			err);
-		kfree(fb->planes);
 		kfree(fb);
 		return ERR_PTR(err);
 	}

From dbc33c7d65536bce447057dc6f882decc515047d Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniels@collabora.com>
Date: Fri, 30 Mar 2018 15:11:27 +0100
Subject: [PATCH 19/36] drm/tegra: tegra_fb -> drm_framebuffer

Since tegra_fb is now the same as drm_framebuffer, we can just replace
the type completely.

Signed-off-by: Daniel Stone <daniels@collabora.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: linux-tegra@vger.kernel.org
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.h |  6 +-----
 drivers/gpu/drm/tegra/fb.c  | 34 ++++++++++++++--------------------
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 7f9810f026e8..f47a60592334 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -29,14 +29,10 @@
 
 struct reset_control;
 
-struct tegra_fb {
-	struct drm_framebuffer base;
-};
-
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 struct tegra_fbdev {
 	struct drm_fb_helper base;
-	struct tegra_fb *fb;
+	struct drm_framebuffer *fb;
 };
 #endif
 
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 5bc8f968284c..57da9683a713 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -16,11 +16,6 @@
 #include "gem.h"
 #include <drm/drm_gem_framebuffer_helper.h>
 
-static inline struct tegra_fb *to_tegra_fb(struct drm_framebuffer *fb)
-{
-	return container_of(fb, struct tegra_fb, base);
-}
-
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper)
 {
@@ -99,7 +94,6 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 
 static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 {
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
 	unsigned int i;
 
 	for (i = 0; i < framebuffer->format->num_planes; i++) {
@@ -114,7 +108,7 @@ static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 	}
 
 	drm_framebuffer_cleanup(framebuffer);
-	kfree(fb);
+	kfree(framebuffer);
 }
 
 static const struct drm_framebuffer_funcs tegra_fb_funcs = {
@@ -122,12 +116,12 @@ static const struct drm_framebuffer_funcs tegra_fb_funcs = {
 	.create_handle = drm_gem_fb_create_handle,
 };
 
-static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
-				       const struct drm_mode_fb_cmd2 *mode_cmd,
-				       struct tegra_bo **planes,
-				       unsigned int num_planes)
+static struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm,
+					      const struct drm_mode_fb_cmd2 *mode_cmd,
+					      struct tegra_bo **planes,
+					      unsigned int num_planes)
 {
-	struct tegra_fb *fb;
+	struct drm_framebuffer *fb;
 	unsigned int i;
 	int err;
 
@@ -135,12 +129,12 @@ static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
 	if (!fb)
 		return ERR_PTR(-ENOMEM);
 
-	drm_helper_mode_fill_fb_struct(drm, &fb->base, mode_cmd);
+	drm_helper_mode_fill_fb_struct(drm, fb, mode_cmd);
 
-	for (i = 0; i < fb->base.format->num_planes; i++)
-		fb->base.obj[i] = &planes[i]->gem;
+	for (i = 0; i < fb->format->num_planes; i++)
+		fb->obj[i] = &planes[i]->gem;
 
-	err = drm_framebuffer_init(drm, &fb->base, &tegra_fb_funcs);
+	err = drm_framebuffer_init(drm, fb, &tegra_fb_funcs);
 	if (err < 0) {
 		dev_err(drm->dev, "failed to initialize framebuffer: %d\n",
 			err);
@@ -158,7 +152,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 	unsigned int hsub, vsub, i;
 	struct tegra_bo *planes[4];
 	struct drm_gem_object *gem;
-	struct tegra_fb *fb;
+	struct drm_framebuffer *fb;
 	int err;
 
 	hsub = drm_format_horz_chroma_subsampling(cmd->pixel_format);
@@ -194,7 +188,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 		goto unreference;
 	}
 
-	return &fb->base;
+	return fb;
 
 unreference:
 	while (i--)
@@ -275,7 +269,7 @@ static int tegra_fbdev_probe(struct drm_fb_helper *helper,
 		return PTR_ERR(fbdev->fb);
 	}
 
-	fb = &fbdev->fb->base;
+	fb = fbdev->fb;
 	helper->fb = fb;
 	helper->fbdev = info;
 
@@ -376,7 +370,7 @@ static void tegra_fbdev_exit(struct tegra_fbdev *fbdev)
 	drm_fb_helper_unregister_fbi(&fbdev->base);
 
 	if (fbdev->fb)
-		drm_framebuffer_remove(&fbdev->fb->base);
+		drm_framebuffer_remove(fbdev->fb);
 
 	drm_fb_helper_fini(&fbdev->base);
 	tegra_fbdev_free(fbdev);

From c34a997d033df6bbeaf5c06e9124f89bc0ecac8d Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniels@collabora.com>
Date: Fri, 30 Mar 2018 15:11:28 +0100
Subject: [PATCH 20/36] drm/tegra: Move fbdev unmap special case

User framebuffers are created with either bo->pages or bo->vaddr set,
depending on whether or not an IOMMU is present. On the other hand, the
framebuffer created for fbdev emulation has a vaddr mapping made if
bo->pages is set after creation. This is set up in fbdev probe.

Remove the special case unmapping from the general-purpose framebuffer
destroy, and move it to fbdev teardown.

Signed-off-by: Daniel Stone <daniels@collabora.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: linux-tegra@vger.kernel.org
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/fb.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 57da9683a713..709aa6ef171a 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -99,12 +99,8 @@ static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 	for (i = 0; i < framebuffer->format->num_planes; i++) {
 		struct tegra_bo *bo = tegra_fb_get_plane(framebuffer, i);
 
-		if (bo) {
-			if (bo->pages)
-				vunmap(bo->vaddr);
-
+		if (bo)
 			drm_gem_object_put_unlocked(&bo->gem);
-		}
 	}
 
 	drm_framebuffer_cleanup(framebuffer);
@@ -369,8 +365,17 @@ static void tegra_fbdev_exit(struct tegra_fbdev *fbdev)
 {
 	drm_fb_helper_unregister_fbi(&fbdev->base);
 
-	if (fbdev->fb)
+	if (fbdev->fb) {
+		struct tegra_bo *bo = tegra_fb_get_plane(fbdev->fb, 0);
+
+		/* Undo the special mapping we made in fbdev probe. */
+		if (bo && bo->pages) {
+			vunmap(bo->vaddr);
+			bo->vaddr = 0;
+		}
+
 		drm_framebuffer_remove(fbdev->fb);
+	}
 
 	drm_fb_helper_fini(&fbdev->base);
 	tegra_fbdev_free(fbdev);

From 5cb8b9969be6f14ac3b7ba90de8f7a65f68e46fe Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniels@collabora.com>
Date: Fri, 30 Mar 2018 15:11:29 +0100
Subject: [PATCH 21/36] drm/tegra: Use drm_gem_fb_destroy

Now that our destroy function is the same as the helper, use that
directly.

Signed-off-by: Daniel Stone <daniels@collabora.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: linux-tegra@vger.kernel.org
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/fb.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 709aa6ef171a..4c22cdded3c2 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -92,23 +92,8 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 	return 0;
 }
 
-static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
-{
-	unsigned int i;
-
-	for (i = 0; i < framebuffer->format->num_planes; i++) {
-		struct tegra_bo *bo = tegra_fb_get_plane(framebuffer, i);
-
-		if (bo)
-			drm_gem_object_put_unlocked(&bo->gem);
-	}
-
-	drm_framebuffer_cleanup(framebuffer);
-	kfree(framebuffer);
-}
-
 static const struct drm_framebuffer_funcs tegra_fb_funcs = {
-	.destroy = tegra_fb_destroy,
+	.destroy = drm_gem_fb_destroy,
 	.create_handle = drm_gem_fb_create_handle,
 };
 

From 2f8a6da866eff746a9f8c7745790f3765baeb589 Mon Sep 17 00:00:00 2001
From: Emil Goode <emil.fsw@goode.io>
Date: Wed, 16 May 2018 12:22:04 +0200
Subject: [PATCH 22/36] gpu: host1x: Fix compiler errors by converting to
 dma_addr_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The compiler is complaining with the following errors:

drivers/gpu/host1x/cdma.c:94:48: error:
	passing argument 3 of ‘dma_alloc_wc’ from incompatible pointer type
	[-Werror=incompatible-pointer-types]

drivers/gpu/host1x/cdma.c:113:48: error:
	passing argument 3 of ‘dma_alloc_wc’ from incompatible pointer type
	[-Werror=incompatible-pointer-types]

The expected pointer type of the third argument to dma_alloc_wc() is
dma_addr_t but phys_addr_t is passed.

Change the phys member of struct push_buffer to be dma_addr_t so that we
pass the correct type to dma_alloc_wc().
Also check pb->mapped for non-NULL in the destroy function as that is the
right way of checking if dma_alloc_wc() was successful.

Signed-off-by: Emil Goode <emil.fsw@goode.io>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/cdma.c | 2 +-
 drivers/gpu/host1x/cdma.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index cf6caa90bf89..69bb77372ed9 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -51,7 +51,7 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
 	struct host1x_cdma *cdma = pb_to_cdma(pb);
 	struct host1x *host1x = cdma_to_host1x(cdma);
 
-	if (!pb->phys)
+	if (!pb->mapped)
 		return;
 
 	if (host1x->domain) {
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 286d49386be9..446ee1a84969 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -44,7 +44,7 @@ struct host1x_job;
 struct push_buffer {
 	void *mapped;			/* mapped pushbuffer memory */
 	dma_addr_t dma;			/* device address of pushbuffer */
-	phys_addr_t phys;		/* physical address of pushbuffer */
+	dma_addr_t phys;		/* physical address of pushbuffer */
 	u32 fence;			/* index we've written */
 	u32 pos;			/* index to write to */
 	u32 size;

From 24c94e166dfe89839129b8e0fae208b6af60d6f1 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Sat, 5 May 2018 08:45:47 +0200
Subject: [PATCH 23/36] gpu: host1x: Remove wait check support

The job submission userspace ABI doesn't support this and there are no
plans to implement it, so all of this code is dead and can be removed.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c        |  62 +--------------
 drivers/gpu/host1x/dev.h           |   8 --
 drivers/gpu/host1x/hw/channel_hw.c |   3 +-
 drivers/gpu/host1x/hw/syncpt_hw.c  |  11 ---
 drivers/gpu/host1x/job.c           | 124 +----------------------------
 drivers/gpu/host1x/syncpt.c        |   6 --
 drivers/gpu/host1x/syncpt.h        |   3 -
 include/linux/host1x.h             |  15 +---
 include/trace/events/host1x.h      |  16 ++--
 9 files changed, 14 insertions(+), 234 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 3cdef659cd39..204b10e33f16 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -321,46 +321,14 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
 	return 0;
 }
 
-static int host1x_waitchk_copy_from_user(struct host1x_waitchk *dest,
-					 struct drm_tegra_waitchk __user *src,
-					 struct drm_file *file)
-{
-	u32 cmdbuf;
-	int err;
-
-	err = get_user(cmdbuf, &src->handle);
-	if (err < 0)
-		return err;
-
-	err = get_user(dest->offset, &src->offset);
-	if (err < 0)
-		return err;
-
-	err = get_user(dest->syncpt_id, &src->syncpt);
-	if (err < 0)
-		return err;
-
-	err = get_user(dest->thresh, &src->thresh);
-	if (err < 0)
-		return err;
-
-	dest->bo = host1x_bo_lookup(file, cmdbuf);
-	if (!dest->bo)
-		return -ENOENT;
-
-	return 0;
-}
-
 int tegra_drm_submit(struct tegra_drm_context *context,
 		     struct drm_tegra_submit *args, struct drm_device *drm,
 		     struct drm_file *file)
 {
 	unsigned int num_cmdbufs = args->num_cmdbufs;
 	unsigned int num_relocs = args->num_relocs;
-	unsigned int num_waitchks = args->num_waitchks;
 	struct drm_tegra_cmdbuf __user *user_cmdbufs;
 	struct drm_tegra_reloc __user *user_relocs;
-	struct drm_tegra_waitchk __user *user_waitchks;
 	struct drm_tegra_syncpt __user *user_syncpt;
 	struct drm_tegra_syncpt syncpt;
 	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
@@ -372,7 +340,6 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
 	user_cmdbufs = u64_to_user_ptr(args->cmdbufs);
 	user_relocs = u64_to_user_ptr(args->relocs);
-	user_waitchks = u64_to_user_ptr(args->waitchks);
 	user_syncpt = u64_to_user_ptr(args->syncpts);
 
 	/* We don't yet support other than one syncpt_incr struct per submit */
@@ -384,12 +351,11 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		return -EINVAL;
 
 	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
-			       args->num_relocs, args->num_waitchks);
+			       args->num_relocs);
 	if (!job)
 		return -ENOMEM;
 
 	job->num_relocs = args->num_relocs;
-	job->num_waitchk = args->num_waitchks;
 	job->client = (u32)args->context;
 	job->class = context->client->base.class;
 	job->serialize = true;
@@ -398,7 +364,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	 * Track referenced BOs so that they can be unreferenced after the
 	 * submission is complete.
 	 */
-	num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks;
+	num_refs = num_cmdbufs + num_relocs * 2;
 
 	refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL);
 	if (!refs) {
@@ -489,30 +455,6 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		}
 	}
 
-	/* copy and resolve waitchks from submit */
-	while (num_waitchks--) {
-		struct host1x_waitchk *wait = &job->waitchk[num_waitchks];
-		struct tegra_bo *obj;
-
-		err = host1x_waitchk_copy_from_user(
-			wait, &user_waitchks[num_waitchks], file);
-		if (err < 0)
-			goto fail;
-
-		obj = host1x_to_tegra_bo(wait->bo);
-		refs[num_refs++] = &obj->gem;
-
-		/*
-		 * The unaligned offset will cause an unaligned write during
-		 * of the waitchks patching, corrupting the commands stream.
-		 */
-		if (wait->offset & 3 ||
-		    wait->offset >= obj->gem.size) {
-			err = -EINVAL;
-			goto fail;
-		}
-	}
-
 	if (copy_from_user(&syncpt, user_syncpt, sizeof(syncpt))) {
 		err = -EFAULT;
 		goto fail;
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 43e9fabb43a1..36f44ffebe73 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -78,7 +78,6 @@ struct host1x_syncpt_ops {
 	void (*load_wait_base)(struct host1x_syncpt *syncpt);
 	u32 (*load)(struct host1x_syncpt *syncpt);
 	int (*cpu_incr)(struct host1x_syncpt *syncpt);
-	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
 	void (*assign_to_channel)(struct host1x_syncpt *syncpt,
 	                          struct host1x_channel *channel);
 	void (*enable_protection)(struct host1x *host);
@@ -183,13 +182,6 @@ static inline int host1x_hw_syncpt_cpu_incr(struct host1x *host,
 	return host->syncpt_op->cpu_incr(sp);
 }
 
-static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
-					      struct host1x_syncpt *sp,
-					      void *patch_addr)
-{
-	return host->syncpt_op->patch_wait(sp, patch_addr);
-}
-
 static inline void host1x_hw_syncpt_assign_to_channel(
 	struct host1x *host, struct host1x_syncpt *sp,
 	struct host1x_channel *ch)
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 9af758785a11..4c9555038a95 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -104,8 +104,7 @@ static int channel_submit(struct host1x_job *job)
 	sp = host->syncpt + job->syncpt_id;
 	trace_host1x_channel_submit(dev_name(ch->dev),
 				    job->num_gathers, job->num_relocs,
-				    job->num_waitchk, job->syncpt_id,
-				    job->syncpt_incrs);
+				    job->syncpt_id, job->syncpt_incrs);
 
 	/* before error checks, return current max */
 	prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7dfd47d74f89..a23bb3352d02 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -96,16 +96,6 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp)
 	return 0;
 }
 
-/* remove a wait pointed to by patch_addr */
-static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
-{
-	u32 override = host1x_class_host_wait_syncpt(HOST1X_SYNCPT_RESERVED, 0);
-
-	*((u32 *)patch_addr) = override;
-
-	return 0;
-}
-
 /**
  * syncpt_assign_to_channel() - Assign syncpoint to channel
  * @sp: syncpoint
@@ -156,7 +146,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
 	.load_wait_base = syncpt_read_wait_base,
 	.load = syncpt_load,
 	.cpu_incr = syncpt_cpu_incr,
-	.patch_wait = syncpt_patch_wait,
 	.assign_to_channel = syncpt_assign_to_channel,
 	.enable_protection = syncpt_enable_protection,
 };
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index db509ab8874e..3cbfc6e37668 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -34,8 +34,7 @@
 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs,
-				    u32 num_waitchks)
+				    u32 num_cmdbufs, u32 num_relocs)
 {
 	struct host1x_job *job = NULL;
 	unsigned int num_unpins = num_cmdbufs + num_relocs;
@@ -46,7 +45,6 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 	total = sizeof(struct host1x_job) +
 		(u64)num_relocs * sizeof(struct host1x_reloc) +
 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
-		(u64)num_waitchks * sizeof(struct host1x_waitchk) +
 		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
 		(u64)num_unpins * sizeof(dma_addr_t) +
 		(u64)num_unpins * sizeof(u32 *);
@@ -66,8 +64,6 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 	mem += num_relocs * sizeof(struct host1x_reloc);
 	job->unpins = num_unpins ? mem : NULL;
 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
-	job->waitchk = num_waitchks ? mem : NULL;
-	mem += num_waitchks * sizeof(struct host1x_waitchk);
 	job->gathers = num_cmdbufs ? mem : NULL;
 	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
 	job->addr_phys = num_unpins ? mem : NULL;
@@ -111,73 +107,6 @@ void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
 }
 EXPORT_SYMBOL(host1x_job_add_gather);
 
-/*
- * NULL an already satisfied WAIT_SYNCPT host method, by patching its
- * args in the command stream. The method data is changed to reference
- * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt
- * with a matching threshold value of 0, so is guaranteed to be popped
- * by the host HW.
- */
-static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
-				       struct host1x_bo *h, u32 offset)
-{
-	void *patch_addr = NULL;
-
-	/* patch the wait */
-	patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT);
-	if (patch_addr) {
-		host1x_syncpt_patch_wait(sp,
-					 patch_addr + (offset & ~PAGE_MASK));
-		host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr);
-	} else
-		pr_err("Could not map cmdbuf for wait check\n");
-}
-
-/*
- * Check driver supplied waitchk structs for syncpt thresholds
- * that have already been satisfied and NULL the comparison (to
- * avoid a wrap condition in the HW).
- */
-static int do_waitchks(struct host1x_job *job, struct host1x *host,
-		       struct host1x_job_gather *g)
-{
-	struct host1x_bo *patch = g->bo;
-	int i;
-
-	/* compare syncpt vs wait threshold */
-	for (i = 0; i < job->num_waitchk; i++) {
-		struct host1x_waitchk *wait = &job->waitchk[i];
-		struct host1x_syncpt *sp =
-			host1x_syncpt_get(host, wait->syncpt_id);
-
-		/* validate syncpt id */
-		if (wait->syncpt_id > host1x_syncpt_nb_pts(host))
-			continue;
-
-		/* skip all other gathers */
-		if (patch != wait->bo)
-			continue;
-
-		trace_host1x_syncpt_wait_check(wait->bo, wait->offset,
-					       wait->syncpt_id, wait->thresh,
-					       host1x_syncpt_read_min(sp));
-
-		if (host1x_syncpt_is_expired(sp, wait->thresh)) {
-			dev_dbg(host->dev,
-				"drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n",
-				wait->syncpt_id, sp->name, wait->thresh,
-				host1x_syncpt_read_min(sp));
-
-			host1x_syncpt_patch_offset(sp, patch,
-						   g->offset + wait->offset);
-		}
-
-		wait->bo = NULL;
-	}
-
-	return 0;
-}
-
 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
 	unsigned int i;
@@ -331,17 +260,6 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
 	return true;
 }
 
-static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf,
-		       unsigned int offset)
-{
-	offset *= sizeof(u32);
-
-	if (wait->bo != cmdbuf || wait->offset != offset)
-		return false;
-
-	return true;
-}
-
 struct host1x_firewall {
 	struct host1x_job *job;
 	struct device *dev;
@@ -349,9 +267,6 @@ struct host1x_firewall {
 	unsigned int num_relocs;
 	struct host1x_reloc *reloc;
 
-	unsigned int num_waitchks;
-	struct host1x_waitchk *waitchk;
-
 	struct host1x_bo *cmdbuf;
 	unsigned int offset;
 
@@ -378,20 +293,6 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset)
 		fw->reloc++;
 	}
 
-	if (offset == HOST1X_WAIT_SYNCPT_OFFSET) {
-		if (fw->class != HOST1X_CLASS_HOST1X)
-			return -EINVAL;
-
-		if (!fw->num_waitchks)
-			return -EINVAL;
-
-		if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset))
-			return -EINVAL;
-
-		fw->num_waitchks--;
-		fw->waitchk++;
-	}
-
 	return 0;
 }
 
@@ -556,8 +457,6 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 	fw.dev = dev;
 	fw.reloc = job->relocarray;
 	fw.num_relocs = job->num_relocs;
-	fw.waitchk = job->waitchk;
-	fw.num_waitchks = job->num_waitchk;
 	fw.class = job->class;
 
 	for (i = 0; i < job->num_gathers; i++) {
@@ -604,8 +503,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 		offset += g->words * sizeof(u32);
 	}
 
-	/* No relocs and waitchks should remain at this point */
-	if (fw.num_relocs || fw.num_waitchks)
+	/* No relocs should remain at this point */
+	if (fw.num_relocs)
 		return -EINVAL;
 
 	return 0;
@@ -616,19 +515,6 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 	int err;
 	unsigned int i, j;
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
-
-	bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
-	for (i = 0; i < job->num_waitchk; i++) {
-		u32 syncpt_id = job->waitchk[i].syncpt_id;
-
-		if (syncpt_id < host1x_syncpt_nb_pts(host))
-			set_bit(syncpt_id, waitchk_mask);
-	}
-
-	/* get current syncpt values for waitchk */
-	for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host))
-		host1x_syncpt_load(host->syncpt + i);
 
 	/* pin memory */
 	err = pin_job(host, job);
@@ -663,10 +549,6 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 		err = do_relocs(job, g);
 		if (err)
 			break;
-
-		err = do_waitchks(job, host, g);
-		if (err)
-			break;
 	}
 
 out:
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index a2a952adc136..a108669188e8 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -373,12 +373,6 @@ bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh)
 		return (s32)(current_val - thresh) >= 0;
 }
 
-/* remove a wait pointed to by patch_addr */
-int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
-{
-	return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr);
-}
-
 int host1x_syncpt_init(struct host1x *host)
 {
 	struct host1x_syncpt_base *bases;
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 9d88d37c2397..d98e22325e9d 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -124,7 +124,4 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
 	return sp->id < host1x_syncpt_nb_pts(sp->host);
 }
 
-/* Patch a wait by replacing it with a wait for syncpt 0 value 0 */
-int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr);
-
 #endif
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index ddf7f9ca86cc..f66bece1e1b7 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -192,13 +192,6 @@ struct host1x_reloc {
 	unsigned long shift;
 };
 
-struct host1x_waitchk {
-	struct host1x_bo *bo;
-	u32 offset;
-	u32 syncpt_id;
-	u32 thresh;
-};
-
 struct host1x_job {
 	/* When refcount goes to zero, job can be freed */
 	struct kref ref;
@@ -215,11 +208,6 @@ struct host1x_job {
 	struct host1x_job_gather *gathers;
 	unsigned int num_gathers;
 
-	/* Wait checks to be processed at submit time */
-	struct host1x_waitchk *waitchk;
-	unsigned int num_waitchk;
-	u32 waitchk_mask;
-
 	/* Array of handles to be pinned & unpinned */
 	struct host1x_reloc *relocarray;
 	unsigned int num_relocs;
@@ -261,8 +249,7 @@ struct host1x_job {
 };
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs,
-				    u32 num_waitchks);
+				    u32 num_cmdbufs, u32 num_relocs);
 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id,
 			   u32 words, u32 offset);
 struct host1x_job *host1x_job_get(struct host1x_job *job);
diff --git a/include/trace/events/host1x.h b/include/trace/events/host1x.h
index 63116362543c..a37ef73092e5 100644
--- a/include/trace/events/host1x.h
+++ b/include/trace/events/host1x.h
@@ -115,16 +115,15 @@ TRACE_EVENT(host1x_cdma_push_gather,
 );
 
 TRACE_EVENT(host1x_channel_submit,
-	TP_PROTO(const char *name, u32 cmdbufs, u32 relocs, u32 waitchks,
-			u32 syncpt_id, u32 syncpt_incrs),
+	TP_PROTO(const char *name, u32 cmdbufs, u32 relocs, u32 syncpt_id,
+		 u32 syncpt_incrs),
 
-	TP_ARGS(name, cmdbufs, relocs, waitchks, syncpt_id, syncpt_incrs),
+	TP_ARGS(name, cmdbufs, relocs, syncpt_id, syncpt_incrs),
 
 	TP_STRUCT__entry(
 		__field(const char *, name)
 		__field(u32, cmdbufs)
 		__field(u32, relocs)
-		__field(u32, waitchks)
 		__field(u32, syncpt_id)
 		__field(u32, syncpt_incrs)
 	),
@@ -133,15 +132,14 @@ TRACE_EVENT(host1x_channel_submit,
 		__entry->name = name;
 		__entry->cmdbufs = cmdbufs;
 		__entry->relocs = relocs;
-		__entry->waitchks = waitchks;
 		__entry->syncpt_id = syncpt_id;
 		__entry->syncpt_incrs = syncpt_incrs;
 	),
 
-	TP_printk("name=%s, cmdbufs=%u, relocs=%u, waitchks=%d,"
-		"syncpt_id=%u, syncpt_incrs=%u",
-	  __entry->name, __entry->cmdbufs, __entry->relocs, __entry->waitchks,
-	  __entry->syncpt_id, __entry->syncpt_incrs)
+	TP_printk("name=%s, cmdbufs=%u, relocs=%u, syncpt_id=%u, "
+		  "syncpt_incrs=%u",
+		  __entry->name, __entry->cmdbufs, __entry->relocs,
+		  __entry->syncpt_id, __entry->syncpt_incrs)
 );
 
 TRACE_EVENT(host1x_channel_submitted,

From bf3d41ccabb53c57e19fcfc8b81d790043ac2bed Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 14:12:33 +0200
Subject: [PATCH 24/36] gpu: host1x: Store pointer to client in jobs

Rather than storing some identifier derived from the application
context that can't be used concretely anywhere, store a pointer to the
client directly so that accesses can be made directly through that
client object.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 5 +++--
 drivers/gpu/host1x/cdma.c   | 2 +-
 drivers/gpu/host1x/cdma.h   | 2 +-
 include/linux/host1x.h      | 3 ++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 204b10e33f16..8f29323611dd 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -325,6 +325,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		     struct drm_tegra_submit *args, struct drm_device *drm,
 		     struct drm_file *file)
 {
+	struct host1x_client *client = &context->client->base;
 	unsigned int num_cmdbufs = args->num_cmdbufs;
 	unsigned int num_relocs = args->num_relocs;
 	struct drm_tegra_cmdbuf __user *user_cmdbufs;
@@ -356,8 +357,8 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		return -ENOMEM;
 
 	job->num_relocs = args->num_relocs;
-	job->client = (u32)args->context;
-	job->class = context->client->base.class;
+	job->client = client;
+	job->class = client->class;
 	job->serialize = true;
 
 	/*
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 69bb77372ed9..91df51e631b2 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -247,7 +247,7 @@ static void cdma_start_timer_locked(struct host1x_cdma *cdma,
 static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
 {
 	cancel_delayed_work(&cdma->timeout.wq);
-	cdma->timeout.client = 0;
+	cdma->timeout.client = NULL;
 }
 
 /*
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 446ee1a84969..e97e17b82370 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -58,7 +58,7 @@ struct buffer_timeout {
 	u32 syncpt_val;			/* syncpt value when completed */
 	ktime_t start_ktime;		/* starting time */
 	/* context timeout information */
-	int client;
+	struct host1x_client *client;
 };
 
 enum cdma_event {
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index f66bece1e1b7..0632010f47fb 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -202,7 +202,8 @@ struct host1x_job {
 	/* Channel where job is submitted to */
 	struct host1x_channel *channel;
 
-	u32 client;
+	/* client where the job originated */
+	struct host1x_client *client;
 
 	/* Gathers and their memory */
 	struct host1x_job_gather *gathers;

From d4ad3ad9b81b73f568227563988b67708291900b Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 23 Mar 2018 13:31:24 +0100
Subject: [PATCH 25/36] gpu: host1x: Cleanup loop variable usage

Use unsigned int where possible and don't unnecessarily initialize the
loop variable.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/debug.c  | 2 +-
 drivers/gpu/host1x/intr.c   | 2 +-
 drivers/gpu/host1x/job.c    | 4 ++--
 drivers/gpu/host1x/syncpt.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index dc77ec452ffc..329e4a3d8ae7 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -103,7 +103,7 @@ static void show_syncpts(struct host1x *m, struct output *o)
 
 static void show_all(struct host1x *m, struct output *o, bool show_fifo)
 {
-	int i;
+	unsigned int i;
 
 	host1x_hw_show_mlocks(m, o);
 	show_syncpts(m, o);
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 8b4fad0ab35d..6028cf7b681f 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -144,7 +144,7 @@ static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
 static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
 {
 	struct list_head *head = completed;
-	int i;
+	unsigned int i;
 
 	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
 		action_handler handler = action_handlers[i];
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 3cbfc6e37668..2be0bcaf8288 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -196,10 +196,10 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 
 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
 {
-	int i = 0;
 	u32 last_page = ~0;
 	void *cmdbuf_page_addr = NULL;
 	struct host1x_bo *cmdbuf = g->bo;
+	unsigned int i;
 
 	/* pin & patch the relocs for one gather */
 	for (i = 0; i < job->num_relocs; i++) {
@@ -451,7 +451,7 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 	struct host1x_firewall fw;
 	size_t size = 0;
 	size_t offset = 0;
-	int i;
+	unsigned int i;
 
 	fw.job = job;
 	fw.dev = dev;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index a108669188e8..088c05dd884c 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -57,8 +57,8 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 						 struct host1x_client *client,
 						 unsigned long flags)
 {
-	int i;
 	struct host1x_syncpt *sp = host->syncpt;
+	unsigned int i;
 	char *name;
 
 	mutex_lock(&host->syncpt_mutex);

From ac330f45c7ca5b92e78b369c7034160947f03b8d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 14:29:33 +0200
Subject: [PATCH 26/36] gpu: host1x: Drop unnecessary host1x argument

Functions taking a pointer to a host1x syncpoint as an argument don't
need to specify a pointer to a host1x instance because it can be
obtained from the syncpoint.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/hw/channel_hw.c |  2 +-
 drivers/gpu/host1x/intr.c          | 14 ++++++--------
 drivers/gpu/host1x/intr.h          |  8 +++++---
 drivers/gpu/host1x/syncpt.c        |  2 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 4c9555038a95..d188f9068b91 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -164,7 +164,7 @@ static int channel_submit(struct host1x_job *job)
 	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
 
 	/* schedule a submit complete interrupt */
-	err = host1x_intr_add_action(host, job->syncpt_id, syncval,
+	err = host1x_intr_add_action(host, sp, syncval,
 				     HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
 				     completed_waiter, NULL);
 	completed_waiter = NULL;
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 6028cf7b681f..9629c009d10f 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -211,11 +211,11 @@ static void syncpt_thresh_work(struct work_struct *work)
 				host1x_syncpt_load(host->syncpt + id));
 }
 
-int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
-			   enum host1x_intr_action action, void *data,
-			   struct host1x_waitlist *waiter, void **ref)
+int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
+			   u32 thresh, enum host1x_intr_action action,
+			   void *data, struct host1x_waitlist *waiter,
+			   void **ref)
 {
-	struct host1x_syncpt *syncpt;
 	int queue_was_empty;
 
 	if (waiter == NULL) {
@@ -234,19 +234,17 @@ int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
 	waiter->data = data;
 	waiter->count = 1;
 
-	syncpt = host->syncpt + id;
-
 	spin_lock(&syncpt->intr.lock);
 
 	queue_was_empty = list_empty(&syncpt->intr.wait_head);
 
 	if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
 		/* added at head of list - new threshold value */
-		host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
+		host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh);
 
 		/* added as first waiter - enable interrupt */
 		if (queue_was_empty)
-			host1x_hw_intr_enable_syncpt_intr(host, id);
+			host1x_hw_intr_enable_syncpt_intr(host, syncpt->id);
 	}
 
 	spin_unlock(&syncpt->intr.lock);
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 1370c2bb75b8..6db96af484fe 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 
+struct host1x_syncpt;
 struct host1x;
 
 enum host1x_intr_action {
@@ -75,9 +76,10 @@ struct host1x_waitlist {
  *
  * This is a non-blocking api.
  */
-int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
-	enum host1x_intr_action action, void *data,
-	struct host1x_waitlist *waiter, void **ref);
+int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
+			   u32 thresh, enum host1x_intr_action action,
+			   void *data, struct host1x_waitlist *waiter,
+			   void **ref);
 
 /*
  * Unreference an action submitted to host1x_intr_add_action().
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 088c05dd884c..a5dbf1ba4645 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -255,7 +255,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	}
 
 	/* schedule a wakeup when the syncpoint value is reached */
-	err = host1x_intr_add_action(sp->host, sp->id, thresh,
+	err = host1x_intr_add_action(sp->host, sp, thresh,
 				     HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
 				     &wq, waiter, &ref);
 	if (err)

From 06490bb99e1840ab2b6814af7356e8b4ab0e3ee6 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 16:58:44 +0200
Subject: [PATCH 27/36] gpu: host1x: Rename relocarray -> relocs for
 consistency

All other array variables use a plural, and this is the only one using
the *array suffix. This is confusing, so rename it for consistency.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 4 ++--
 drivers/gpu/host1x/job.c    | 8 ++++----
 include/linux/host1x.h      | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 8f29323611dd..bfbd3a89c26f 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -426,13 +426,13 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		struct host1x_reloc *reloc;
 		struct tegra_bo *obj;
 
-		err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs],
+		err = host1x_reloc_copy_from_user(&job->relocs[num_relocs],
 						  &user_relocs[num_relocs], drm,
 						  file);
 		if (err < 0)
 			goto fail;
 
-		reloc = &job->relocarray[num_relocs];
+		reloc = &job->relocs[num_relocs];
 		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
 		refs[num_refs++] = &obj->gem;
 
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 2be0bcaf8288..9d6d3e151291 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -60,7 +60,7 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 
 	/* Redistribute memory to the structs  */
 	mem += sizeof(struct host1x_job);
-	job->relocarray = num_relocs ? mem : NULL;
+	job->relocs = num_relocs ? mem : NULL;
 	mem += num_relocs * sizeof(struct host1x_reloc);
 	job->unpins = num_unpins ? mem : NULL;
 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
@@ -115,7 +115,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 	job->num_unpins = 0;
 
 	for (i = 0; i < job->num_relocs; i++) {
-		struct host1x_reloc *reloc = &job->relocarray[i];
+		struct host1x_reloc *reloc = &job->relocs[i];
 		struct sg_table *sgt;
 		dma_addr_t phys_addr;
 
@@ -203,7 +203,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
 
 	/* pin & patch the relocs for one gather */
 	for (i = 0; i < job->num_relocs; i++) {
-		struct host1x_reloc *reloc = &job->relocarray[i];
+		struct host1x_reloc *reloc = &job->relocs[i];
 		u32 reloc_addr = (job->reloc_addr_phys[i] +
 				  reloc->target.offset) >> reloc->shift;
 		u32 *target;
@@ -455,7 +455,7 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 
 	fw.job = job;
 	fw.dev = dev;
-	fw.reloc = job->relocarray;
+	fw.reloc = job->relocs;
 	fw.num_relocs = job->num_relocs;
 	fw.class = job->class;
 
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 0632010f47fb..dcb6140d39d7 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -210,7 +210,7 @@ struct host1x_job {
 	unsigned int num_gathers;
 
 	/* Array of handles to be pinned & unpinned */
-	struct host1x_reloc *relocarray;
+	struct host1x_reloc *relocs;
 	unsigned int num_relocs;
 	struct host1x_job_unpin_data *unpins;
 	unsigned int num_unpins;

From 326bbd79fd61716841585a52d5b68f48f4e6644e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 17:01:43 +0200
Subject: [PATCH 28/36] gpu: host1x: Use not explicitly sized types

The number of words and the offset in a gather don't need to be
explicitly sized, so make them unsigned int instead.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/job.c | 11 ++++++-----
 drivers/gpu/host1x/job.h |  4 ++--
 include/linux/host1x.h   |  4 ++--
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 9d6d3e151291..e2f4a4d93d20 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -96,13 +96,14 @@ void host1x_job_put(struct host1x_job *job)
 EXPORT_SYMBOL(host1x_job_put);
 
 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
-			   u32 words, u32 offset)
+			   unsigned int words, unsigned int offset)
 {
-	struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers];
+	struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
+
+	gather->words = words;
+	gather->bo = bo;
+	gather->offset = offset;
 
-	cur_gather->words = words;
-	cur_gather->bo = bo;
-	cur_gather->offset = offset;
 	job->num_gathers++;
 }
 EXPORT_SYMBOL(host1x_job_add_gather);
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 4bda51d503ec..188400e00192 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -20,10 +20,10 @@
 #define __HOST1X_JOB_H
 
 struct host1x_job_gather {
-	u32 words;
+	unsigned int words;
 	dma_addr_t base;
 	struct host1x_bo *bo;
-	u32 offset;
+	unsigned int offset;
 	bool handled;
 };
 
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index dcb6140d39d7..89110d896d72 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -251,8 +251,8 @@ struct host1x_job {
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 				    u32 num_cmdbufs, u32 num_relocs);
-void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id,
-			   u32 words, u32 offset);
+void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
+			   unsigned int words, unsigned int offset);
 struct host1x_job *host1x_job_get(struct host1x_job *job);
 void host1x_job_put(struct host1x_job *job);
 int host1x_job_pin(struct host1x_job *job, struct device *dev);

From c850ece71f71c2a68a9921c52fb5fd8d3ec2b8d7 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Sat, 5 May 2018 08:12:53 +0200
Subject: [PATCH 29/36] drm/tegra: Use proper arguments for
 DRM_TEGRA_CLOSE_CHANNEL IOCTL

A separate data structure exists for the DRM_TEGRA_CLOSE_CHANNEL IOCTL,
but it is currently unused. The IOCTL was using the data structure for
the DRM_TEGRA_OPEN_CHANNEL IOCTL.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/uapi/drm/tegra_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index d954f8c33321..99e15d82d1e9 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -193,7 +193,7 @@ struct drm_tegra_gem_get_flags {
 #define DRM_IOCTL_TEGRA_SYNCPT_INCR DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_INCR, struct drm_tegra_syncpt_incr)
 #define DRM_IOCTL_TEGRA_SYNCPT_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_WAIT, struct drm_tegra_syncpt_wait)
 #define DRM_IOCTL_TEGRA_OPEN_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_OPEN_CHANNEL, struct drm_tegra_open_channel)
-#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_open_channel)
+#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_close_channel)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT, struct drm_tegra_get_syncpt)
 #define DRM_IOCTL_TEGRA_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SUBMIT, struct drm_tegra_submit)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT_BASE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT_BASE, struct drm_tegra_get_syncpt_base)

From 4bd91a5b5dbb8b536208396c3d032cba8e3c3913 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 18:49:04 +0200
Subject: [PATCH 30/36] drm/tegra: gem: Fill in missing export info

Set the owner and name of the exported DMA-BUF in addition to the
already filled-in fields.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gem.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 1c4011774c3f..00a5c9f32254 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -649,6 +649,8 @@ struct dma_buf *tegra_gem_prime_export(struct drm_device *drm,
 {
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 
+	exp_info.exp_name = KBUILD_MODNAME;
+	exp_info.owner = drm->driver->fops->owner;
 	exp_info.ops = &tegra_gem_prime_dmabuf_ops;
 	exp_info.size = gem->size;
 	exp_info.flags = flags;

From 995c5a509fb032ddd83eff4f3772c7fc8ff0b7ec Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 19 Mar 2018 17:20:46 +0100
Subject: [PATCH 31/36] drm/tegra: dc: Support rotation property

Currently only the DRM_MODE_REFLECT_Y rotation is supported. The driver
already supports reflection on the Y axis via a custom flag which is not
very useful because it requires custom userspace. Add the standard
rotation property that supports 0 degree rotation and Y axis reflection
for primary and overlay planes to provide a better interface than the
custom flag.

v2: keep custom flag for ABI compatibility (Dmitry)

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c    | 26 +++++++++++++++++++++++++-
 drivers/gpu/drm/tegra/plane.c |  1 +
 drivers/gpu/drm/tegra/plane.h |  2 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 31e12a9dfcb8..c3afe7b2237e 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -596,6 +596,7 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 				    struct drm_plane_state *state)
 {
 	struct tegra_plane_state *plane_state = to_tegra_plane_state(state);
+	unsigned int rotation = DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_Y;
 	struct tegra_bo_tiling *tiling = &plane_state->tiling;
 	struct tegra_plane *tegra = to_tegra_plane(plane);
 	struct tegra_dc *dc = to_tegra_dc(state->crtc);
@@ -633,6 +634,13 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 		return -EINVAL;
 	}
 
+	rotation = drm_rotation_simplify(state->rotation, rotation);
+
+	if (rotation & DRM_MODE_REFLECT_Y)
+		plane_state->bottom_up = true;
+	else
+		plane_state->bottom_up = false;
+
 	/*
 	 * Tegra doesn't support different strides for U and V planes so we
 	 * error out if the user tries to display a framebuffer with such a
@@ -693,7 +701,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 	window.dst.w = drm_rect_width(&plane->state->dst);
 	window.dst.h = drm_rect_height(&plane->state->dst);
 	window.bits_per_pixel = fb->format->cpp[0] * 8;
-	window.bottom_up = tegra_fb_is_bottom_up(fb);
+	window.bottom_up = tegra_fb_is_bottom_up(fb) || state->bottom_up;
 
 	/* copy from state */
 	window.zpos = plane->state->normalized_zpos;
@@ -776,6 +784,14 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
 	drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255);
 
+	err = drm_plane_create_rotation_property(&plane->base,
+						 DRM_MODE_ROTATE_0,
+						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_REFLECT_Y);
+	if (err < 0)
+		dev_err(dc->dev, "failed to create rotation property: %d\n",
+			err);
+
 	return &plane->base;
 }
 
@@ -1053,6 +1069,14 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
 	drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255);
 
+	err = drm_plane_create_rotation_property(&plane->base,
+						 DRM_MODE_ROTATE_0,
+						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_REFLECT_Y);
+	if (err < 0)
+		dev_err(dc->dev, "failed to create rotation property: %d\n",
+			err);
+
 	return &plane->base;
 }
 
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 0406c2ef432c..d068e8aa3553 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -56,6 +56,7 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
 	copy->tiling = state->tiling;
 	copy->format = state->format;
 	copy->swap = state->swap;
+	copy->bottom_up = state->bottom_up;
 	copy->opaque = state->opaque;
 
 	for (i = 0; i < 2; i++)
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
index 7360ddfafee8..e79e6b4a8e0a 100644
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -46,6 +46,8 @@ struct tegra_plane_state {
 	u32 format;
 	u32 swap;
 
+	bool bottom_up;
+
 	/* used for legacy blending support only */
 	struct tegra_plane_legacy_blending_state blending[2];
 	bool opaque;

From f3b3cfcc3f09490ffb8e1e997e8a8695a6a55b1b Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 17:05:04 +0200
Subject: [PATCH 32/36] drm/tegra: Track client version

Userspace needs to know the version of the interface implemented by a
client so it can create the proper command streams. Allow individual
drivers to store this version along with the client so that it can be
returned to userspace upon opening a channel.

Acked-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index f47a60592334..92d248784396 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -91,6 +91,7 @@ struct tegra_drm_client {
 	struct host1x_client base;
 	struct list_head list;
 
+	unsigned int version;
 	const struct tegra_drm_client_ops *ops;
 };
 

From 840fd213fca23b185f71b45a5b563e4e9b6d1759 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 17:06:36 +0200
Subject: [PATCH 33/36] drm/tegra: gr2d: Track interface version

Set the interface version implemented by the gr2d module. This allows
userspace to pass the correct command stream when programming the gr2d
module.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr2d.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 2cd0f66c8aa9..673059fd2fcb 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -8,17 +8,24 @@
 
 #include <linux/clk.h>
 #include <linux/iommu.h>
+#include <linux/of_device.h>
 
 #include "drm.h"
 #include "gem.h"
 #include "gr2d.h"
 
+struct gr2d_soc {
+	unsigned int version;
+};
+
 struct gr2d {
 	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk;
 
+	const struct gr2d_soc *soc;
+
 	DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS);
 };
 
@@ -150,9 +157,17 @@ static const struct tegra_drm_client_ops gr2d_ops = {
 	.submit = tegra_drm_submit,
 };
 
+static const struct gr2d_soc tegra20_gr2d_soc = {
+	.version = 0x20,
+};
+
+static const struct gr2d_soc tegra30_gr2d_soc = {
+	.version = 0x30,
+};
+
 static const struct of_device_id gr2d_match[] = {
-	{ .compatible = "nvidia,tegra30-gr2d" },
-	{ .compatible = "nvidia,tegra20-gr2d" },
+	{ .compatible = "nvidia,tegra30-gr2d", .data = &tegra20_gr2d_soc },
+	{ .compatible = "nvidia,tegra20-gr2d", .data = &tegra30_gr2d_soc },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, gr2d_match);
@@ -185,6 +200,8 @@ static int gr2d_probe(struct platform_device *pdev)
 	if (!gr2d)
 		return -ENOMEM;
 
+	gr2d->soc = of_device_get_match_data(dev);
+
 	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
 	if (!syncpts)
 		return -ENOMEM;
@@ -209,6 +226,7 @@ static int gr2d_probe(struct platform_device *pdev)
 	gr2d->client.base.num_syncpts = 1;
 
 	INIT_LIST_HEAD(&gr2d->client.list);
+	gr2d->client.version = gr2d->soc->version;
 	gr2d->client.ops = &gr2d_ops;
 
 	err = host1x_client_register(&gr2d->client.base);

From 33f150ea82ff029ec7e00345c6fbf00e44a8fd60 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 17:07:38 +0200
Subject: [PATCH 34/36] drm/tegra: gr3d: Track interface version

Set the interface version implemented by the gr3d module. This allows
userspace to pass the correct command stream when programming the gr3d
module.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr3d.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index b00002f1c590..4778ae999668 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -11,6 +11,7 @@
 #include <linux/host1x.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 
@@ -20,6 +21,10 @@
 #include "gem.h"
 #include "gr3d.h"
 
+struct gr3d_soc {
+	unsigned int version;
+};
+
 struct gr3d {
 	struct iommu_group *group;
 	struct tegra_drm_client client;
@@ -29,6 +34,8 @@ struct gr3d {
 	struct reset_control *rst_secondary;
 	struct reset_control *rst;
 
+	const struct gr3d_soc *soc;
+
 	DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS);
 };
 
@@ -151,10 +158,22 @@ static const struct tegra_drm_client_ops gr3d_ops = {
 	.submit = tegra_drm_submit,
 };
 
+static const struct gr3d_soc tegra20_gr3d_soc = {
+	.version = 0x20,
+};
+
+static const struct gr3d_soc tegra30_gr3d_soc = {
+	.version = 0x30,
+};
+
+static const struct gr3d_soc tegra114_gr3d_soc = {
+	.version = 0x35,
+};
+
 static const struct of_device_id tegra_gr3d_match[] = {
-	{ .compatible = "nvidia,tegra114-gr3d" },
-	{ .compatible = "nvidia,tegra30-gr3d" },
-	{ .compatible = "nvidia,tegra20-gr3d" },
+	{ .compatible = "nvidia,tegra114-gr3d", .data = &tegra114_gr3d_soc },
+	{ .compatible = "nvidia,tegra30-gr3d", .data = &tegra30_gr3d_soc },
+	{ .compatible = "nvidia,tegra20-gr3d", .data = &tegra20_gr3d_soc },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, tegra_gr3d_match);
@@ -276,6 +295,8 @@ static int gr3d_probe(struct platform_device *pdev)
 	if (!gr3d)
 		return -ENOMEM;
 
+	gr3d->soc = of_device_get_match_data(&pdev->dev);
+
 	syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL);
 	if (!syncpts)
 		return -ENOMEM;
@@ -333,6 +354,7 @@ static int gr3d_probe(struct platform_device *pdev)
 	gr3d->client.base.num_syncpts = 1;
 
 	INIT_LIST_HEAD(&gr3d->client.list);
+	gr3d->client.version = gr3d->soc->version;
 	gr3d->client.ops = &gr3d_ops;
 
 	err = host1x_client_register(&gr3d->client.base);

From acae8a9d054daa75a01e34b18f3627e6df330622 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 17:08:04 +0200
Subject: [PATCH 35/36] drm/tegra: vic: Track interface version

Set the interface version implemented by the VIC module. This allows
userspace to pass the correct command stream when programming the VIC
module.

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/vic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index f5794dd49f3b..9f657a63b0bb 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -25,6 +25,7 @@
 
 struct vic_config {
 	const char *firmware;
+	unsigned int version;
 };
 
 struct vic {
@@ -264,18 +265,21 @@ static const struct tegra_drm_client_ops vic_ops = {
 
 static const struct vic_config vic_t124_config = {
 	.firmware = NVIDIA_TEGRA_124_VIC_FIRMWARE,
+	.version = 0x40,
 };
 
 #define NVIDIA_TEGRA_210_VIC_FIRMWARE "nvidia/tegra210/vic04_ucode.bin"
 
 static const struct vic_config vic_t210_config = {
 	.firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE,
+	.version = 0x21,
 };
 
 #define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin"
 
 static const struct vic_config vic_t186_config = {
 	.firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE,
+	.version = 0x18,
 };
 
 static const struct of_device_id vic_match[] = {
@@ -342,6 +346,7 @@ static int vic_probe(struct platform_device *pdev)
 	vic->dev = dev;
 
 	INIT_LIST_HEAD(&vic->client.list);
+	vic->client.version = vic->config->version;
 	vic->client.ops = &vic_ops;
 
 	err = host1x_client_register(&vic->client.base);

From 6134534ca24f42043cacdd7108026803577f6c59 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 16 May 2018 16:43:11 +0200
Subject: [PATCH 36/36] drm/tegra: Add kerneldoc for UAPI

Document the userspace ABI with kerneldoc to provide some information on
how to use it.

v3:
- reword description of arrays and array lengths

v2:
- keep GEM object creation flags for ABI compatibility
- fix typo in struct drm_tegra_syncpt_incr kerneldoc
- fix typos in struct drm_tegra_submit kerneldoc
- reworded some descriptions as suggested

Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/uapi/drm/tegra_drm.h | 510 +++++++++++++++++++++++++++++++++--
 1 file changed, 491 insertions(+), 19 deletions(-)

diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index 99e15d82d1e9..c4df3c3668b3 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -32,143 +32,615 @@ extern "C" {
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
 
+/**
+ * struct drm_tegra_gem_create - parameters for the GEM object creation IOCTL
+ */
 struct drm_tegra_gem_create {
+	/**
+	 * @size:
+	 *
+	 * The size, in bytes, of the buffer object to be created.
+	 */
 	__u64 size;
+
+	/**
+	 * @flags:
+	 *
+	 * A bitmask of flags that influence the creation of GEM objects:
+	 *
+	 * DRM_TEGRA_GEM_CREATE_TILED
+	 *   Use the 16x16 tiling format for this buffer.
+	 *
+	 * DRM_TEGRA_GEM_CREATE_BOTTOM_UP
+	 *   The buffer has a bottom-up layout.
+	 */
 	__u32 flags;
+
+	/**
+	 * @handle:
+	 *
+	 * The handle of the created GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 handle;
 };
 
+/**
+ * struct drm_tegra_gem_mmap - parameters for the GEM mmap IOCTL
+ */
 struct drm_tegra_gem_mmap {
+	/**
+	 * @handle:
+	 *
+	 * Handle of the GEM object to obtain an mmap offset for.
+	 */
 	__u32 handle;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @offset:
+	 *
+	 * The mmap offset for the given GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u64 offset;
 };
 
+/**
+ * struct drm_tegra_syncpt_read - parameters for the read syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_read {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to read the current value from.
+	 */
 	__u32 id;
+
+	/**
+	 * @value:
+	 *
+	 * The current syncpoint value. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 value;
 };
 
+/**
+ * struct drm_tegra_syncpt_incr - parameters for the increment syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_incr {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to increment.
+	 */
 	__u32 id;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_syncpt_wait - parameters for the wait syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_wait {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to wait on.
+	 */
 	__u32 id;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to wait.
+	 */
 	__u32 thresh;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, to wait.
+	 */
 	__u32 timeout;
+
+	/**
+	 * @value:
+	 *
+	 * The new syncpoint value after the wait. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 value;
 };
 
 #define DRM_TEGRA_NO_TIMEOUT	(0xffffffff)
 
+/**
+ * struct drm_tegra_open_channel - parameters for the open channel IOCTL
+ */
 struct drm_tegra_open_channel {
+	/**
+	 * @client:
+	 *
+	 * The client ID for this channel.
+	 */
 	__u32 client;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. Set by the kernel upon
+	 * successful completion of the IOCTL. This context needs to be passed
+	 * to the DRM_TEGRA_CHANNEL_CLOSE or the DRM_TEGRA_SUBMIT IOCTLs.
+	 */
 	__u64 context;
 };
 
+/**
+ * struct drm_tegra_close_channel - parameters for the close channel IOCTL
+ */
 struct drm_tegra_close_channel {
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. This is obtained from the
+	 * DRM_TEGRA_OPEN_CHANNEL IOCTL.
+	 */
 	__u64 context;
 };
 
+/**
+ * struct drm_tegra_get_syncpt - parameters for the get syncpoint IOCTL
+ */
 struct drm_tegra_get_syncpt {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel for which to obtain
+	 * the syncpoint ID.
+	 */
 	__u64 context;
+
+	/**
+	 * @index:
+	 *
+	 * Index of the client syncpoint for which to obtain the ID.
+	 */
 	__u32 index;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the given syncpoint. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 id;
 };
 
+/**
+ * struct drm_tegra_get_syncpt_base - parameters for the get wait base IOCTL
+ */
 struct drm_tegra_get_syncpt_base {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying for which channel to obtain the
+	 * wait base.
+	 */
 	__u64 context;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint for which to obtain the wait base.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the wait base corresponding to the client syncpoint. Set
+	 * by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 id;
 };
 
+/**
+ * struct drm_tegra_syncpt - syncpoint increment operation
+ */
 struct drm_tegra_syncpt {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to operate on.
+	 */
 	__u32 id;
+
+	/**
+	 * @incrs:
+	 *
+	 * Number of increments to perform for the syncpoint.
+	 */
 	__u32 incrs;
 };
 
+/**
+ * struct drm_tegra_cmdbuf - structure describing a command buffer
+ */
 struct drm_tegra_cmdbuf {
+	/**
+	 * @handle:
+	 *
+	 * Handle to a GEM object containing the command buffer.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, into the GEM object identified by @handle at
+	 * which the command buffer starts.
+	 */
 	__u32 offset;
+
+	/**
+	 * @words:
+	 *
+	 * Number of 32-bit words in this command buffer.
+	 */
 	__u32 words;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_reloc - GEM object relocation structure
+ */
 struct drm_tegra_reloc {
 	struct {
+		/**
+		 * @cmdbuf.handle:
+		 *
+		 * Handle to the GEM object containing the command buffer for
+		 * which to perform this GEM object relocation.
+		 */
 		__u32 handle;
+
+		/**
+		 * @cmdbuf.offset:
+		 *
+		 * Offset, in bytes, into the command buffer at which to
+		 * insert the relocated address.
+		 */
 		__u32 offset;
 	} cmdbuf;
 	struct {
+		/**
+		 * @target.handle:
+		 *
+		 * Handle to the GEM object to be relocated.
+		 */
 		__u32 handle;
+
+		/**
+		 * @target.offset:
+		 *
+		 * Offset, in bytes, into the target GEM object at which the
+		 * relocated data starts.
+		 */
 		__u32 offset;
 	} target;
+
+	/**
+	 * @shift:
+	 *
+	 * The number of bits by which to shift relocated addresses.
+	 */
 	__u32 shift;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_waitchk - wait check structure
+ */
 struct drm_tegra_waitchk {
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object containing a command stream on which to
+	 * perform the wait check.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, of the location in the command stream to perform
+	 * the wait check on.
+	 */
 	__u32 offset;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint to wait check.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to check.
+	 */
 	__u32 thresh;
 };
 
+/**
+ * struct drm_tegra_submit - job submission structure
+ */
 struct drm_tegra_submit {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel to use for the
+	 * execution of this job.
+	 */
 	__u64 context;
-	__u32 num_syncpts;
-	__u32 num_cmdbufs;
-	__u32 num_relocs;
-	__u32 num_waitchks;
-	__u32 waitchk_mask;
-	__u32 timeout;
-	__u64 syncpts;
-	__u64 cmdbufs;
-	__u64 relocs;
-	__u64 waitchks;
-	__u32 fence;		/* Return value */
 
-	__u32 reserved[5];	/* future expansion */
+	/**
+	 * @num_syncpts:
+	 *
+	 * The number of syncpoints operated on by this job. This defines the
+	 * length of the array pointed to by @syncpts.
+	 */
+	__u32 num_syncpts;
+
+	/**
+	 * @num_cmdbufs:
+	 *
+	 * The number of command buffers to execute as part of this job. This
+	 * defines the length of the array pointed to by @cmdbufs.
+	 */
+	__u32 num_cmdbufs;
+
+	/**
+	 * @num_relocs:
+	 *
+	 * The number of relocations to perform before executing this job.
+	 * This defines the length of the array pointed to by @relocs.
+	 */
+	__u32 num_relocs;
+
+	/**
+	 * @num_waitchks:
+	 *
+	 * The number of wait checks to perform as part of this job. This
+	 * defines the length of the array pointed to by @waitchks.
+	 */
+	__u32 num_waitchks;
+
+	/**
+	 * @waitchk_mask:
+	 *
+	 * Bitmask of valid wait checks.
+	 */
+	__u32 waitchk_mask;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, before this job is cancelled.
+	 */
+	__u32 timeout;
+
+	/**
+	 * @syncpts:
+	 *
+	 * A pointer to an array of &struct drm_tegra_syncpt structures that
+	 * specify the syncpoint operations performed as part of this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_syncpts.
+	 */
+	__u64 syncpts;
+
+	/**
+	 * @cmdbufs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_cmdbuf structures that
+	 * define the command buffers to execute as part of this job. The
+	 * number of elements in the array must be equal to the value given
+	 * by @num_syncpts.
+	 */
+	__u64 cmdbufs;
+
+	/**
+	 * @relocs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_reloc structures that
+	 * specify the relocations that need to be performed before executing
+	 * this job. The number of elements in the array must be equal to the
+	 * value given by @num_relocs.
+	 */
+	__u64 relocs;
+
+	/**
+	 * @waitchks:
+	 *
+	 * A pointer to an array of &struct drm_tegra_waitchk structures that
+	 * specify the wait checks to be performed while executing this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_waitchks.
+	 */
+	__u64 waitchks;
+
+	/**
+	 * @fence:
+	 *
+	 * The threshold of the syncpoint associated with this job after it
+	 * has been completed. Set by the kernel upon successful completion of
+	 * the IOCTL. This can be used with the DRM_TEGRA_SYNCPT_WAIT IOCTL to
+	 * wait for this job to be finished.
+	 */
+	__u32 fence;
+
+	/**
+	 * @reserved:
+	 *
+	 * This field is reserved for future use. Must be 0.
+	 */
+	__u32 reserved[5];
 };
 
 #define DRM_TEGRA_GEM_TILING_MODE_PITCH 0
 #define DRM_TEGRA_GEM_TILING_MODE_TILED 1
 #define DRM_TEGRA_GEM_TILING_MODE_BLOCK 2
 
+/**
+ * struct drm_tegra_gem_set_tiling - parameters for the set tiling IOCTL
+ */
 struct drm_tegra_gem_set_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the tiling parameters.
+	 */
 	__u32 handle;
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode to set. Must be one of:
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_PITCH
+	 *   pitch linear format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_TILED
+	 *   16x16 tiling format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_BLOCK
+	 *   16Bx2 tiling format
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The value to set for the tiling mode parameter.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_gem_get_tiling - parameters for the get tiling IOCTL
+ */
 struct drm_tegra_gem_get_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the tiling parameters.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode currently associated with the GEM object. Set by
+	 * the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The tiling mode parameter currently associated with the GEM object.
+	 * Set by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
 #define DRM_TEGRA_GEM_BOTTOM_UP		(1 << 0)
 #define DRM_TEGRA_GEM_FLAGS		(DRM_TEGRA_GEM_BOTTOM_UP)
 
+/**
+ * struct drm_tegra_gem_set_flags - parameters for the set flags IOCTL
+ */
 struct drm_tegra_gem_set_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags to set for the GEM object.
+	 */
 	__u32 flags;
 };
 
+/**
+ * struct drm_tegra_gem_get_flags - parameters for the get flags IOCTL
+ */
 struct drm_tegra_gem_get_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags currently associated with the GEM object. Set by the
+	 * kernel upon successful completion of the IOCTL.
+	 */
 	__u32 flags;
 };