Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next

First radeon and amdgpu pull request for 4.6. Highlights: - ACP support for APUs with i2s audio - CS ioctl optimizations - GPU scheduler optimizations - GPUVM optimizations - Initial GPU reset support (not enabled yet) - New powerplay sysfs interface for manually selecting clocks - Powerplay fixes - Virtualization fixes - Removal of hw semaphore support - Lots of other misc fixes and cleanups * 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (118 commits) drm/amdgpu: Don't call interval_tree_remove in amdgpu_mn_destroy drm/amdgpu: Fix race condition in amdgpu_mn_unregister drm/amdgpu: cleanup gem init/finit drm/amdgpu: rework GEM info printing drm/amdgpu: print the GPU offset as well in gem_info drm/amdgpu: optionally print the pin count in gem_info as well drm/amdgpu: print the BO size only once in amdgpu_gem_info drm/amdgpu: print pid as integer drm/amdgpu: remove page flip work queue v3 drm/amdgpu: stop blocking for page filp fences drm/amdgpu: stop calling amdgpu_gpu_reset from the flip code drm/amdgpu: remove fence reset detection leftovers drm/amdgpu: Fix race condition in MMU notifier release drm/radeon: Fix WARN_ON if DRM_DP_AUX_CHARDEV is enabled drm/amdgpu/vi: move uvd tiling config setup into uvd code drm/amdgpu/vi: move sdma tiling config setup into sdma code drm/amdgpu/cik: move uvd tiling config setup into uvd code drm/amdgpu/cik: move sdma tiling config setup into sdma code drm/amdgpu/gfx7: rework gpu_init() drm/amdgpu/gfx: clean up harvest configuration (v2) ...

Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next
First radeon and amdgpu pull request for 4.6. Highlights: - ACP support for APUs with i2s audio - CS ioctl optimizations - GPU scheduler optimizations - GPUVM optimizations - Initial GPU reset support (not enabled yet) - New powerplay sysfs interface for manually selecting clocks - Powerplay fixes - Virtualization fixes - Removal of hw semaphore support - Lots of other misc fixes and cleanups * 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (118 commits) drm/amdgpu: Don't call interval_tree_remove in amdgpu_mn_destroy drm/amdgpu: Fix race condition in amdgpu_mn_unregister drm/amdgpu: cleanup gem init/finit drm/amdgpu: rework GEM info printing drm/amdgpu: print the GPU offset as well in gem_info drm/amdgpu: optionally print the pin count in gem_info as well drm/amdgpu: print the BO size only once in amdgpu_gem_info drm/amdgpu: print pid as integer drm/amdgpu: remove page flip work queue v3 drm/amdgpu: stop blocking for page filp fences drm/amdgpu: stop calling amdgpu_gpu_reset from the flip code drm/amdgpu: remove fence reset detection leftovers drm/amdgpu: Fix race condition in MMU notifier release drm/radeon: Fix WARN_ON if DRM_DP_AUX_CHARDEV is enabled drm/amdgpu/vi: move uvd tiling config setup into uvd code drm/amdgpu/vi: move sdma tiling config setup into sdma code drm/amdgpu/cik: move uvd tiling config setup into uvd code drm/amdgpu/cik: move sdma tiling config setup into sdma code drm/amdgpu/gfx7: rework gpu_init() drm/amdgpu/gfx: clean up harvest configuration (v2) ...
5263925c · Dave Airlie · 08244c00 · 390be282 · 5263925c · 5263925c
Commit 5263925c authored Feb 19, 2016 by Dave Airlie
85 changed files
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -172,6 +172,8 @@ config DRM_AMDGPU
 source "drivers/gpu/drm/amd/amdgpu/Kconfig"
 source "drivers/gpu/drm/amd/powerplay/Kconfig"

+source "drivers/gpu/drm/amd/acp/Kconfig"
+
 source "drivers/gpu/drm/nouveau/Kconfig"

 config DRM_I810

--- a/drivers/gpu/drm/amd/acp/Kconfig
+++ b/drivers/gpu/drm/amd/acp/Kconfig
+menu "ACP Configuration"
+
+config DRM_AMD_ACP
+       bool "Enable ACP IP support"
+       default y
+       select MFD_CORE
+       select PM_GENERIC_DOMAINS if PM
+       help
+	Choose this option to enable ACP IP support for AMD SOCs.
+
+endmenu
--- a/drivers/gpu/drm/amd/acp/Makefile
+++ b/drivers/gpu/drm/amd/acp/Makefile
+#
+# Makefile for the ACP, which is a sub-component
+# of AMDSOC/AMDGPU drm driver.
+# It provides the HW control for ACP related functionalities.
+
+subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
+
+AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
--- a/drivers/gpu/drm/amd/acp/acp_hw.c
+++ b/drivers/gpu/drm/amd/acp/acp_hw.c
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+
+#include "acp_gfx_if.h"
+
+#define ACP_MODE_I2S	0
+#define ACP_MODE_AZ	1
+
+#define mmACP_AZALIA_I2S_SELECT 0x51d4
+
+int amd_acp_hw_init(void *cgs_device,
+		    unsigned acp_version_major, unsigned acp_version_minor)
+{
+	unsigned int acp_mode = ACP_MODE_I2S;
+
+	if ((acp_version_major == 2) && (acp_version_minor == 2))
+		acp_mode = cgs_read_register(cgs_device,
+					mmACP_AZALIA_I2S_SELECT);
+
+	if (acp_mode != ACP_MODE_I2S)
+		return -ENODEV;
+
+	return 0;
+}
--- a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+#ifndef _ACP_GFX_IF_H
+#define _ACP_GFX_IF_H
+
+#include <linux/types.h>
+#include "cgs_linux.h"
+#include "cgs_common.h"
+
+int amd_acp_hw_init(void *cgs_device,
+		    unsigned acp_version_major, unsigned acp_version_minor);
+
+#endif /* _ACP_GFX_IF_H */
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/scheduler \
-	-I$(FULL_AMD_PATH)/powerplay/inc
+	-I$(FULL_AMD_PATH)/powerplay/inc \
+	-I$(FULL_AMD_PATH)/acp/include

 amdgpu-y := amdgpu_drv.o

@@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
 	amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
-	atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \
+	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o

@@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o
 amdgpu-y += \
 	../scheduler/gpu_scheduler.o \
 	../scheduler/sched_fence.o \
-	amdgpu_sched.o
+	amdgpu_job.o
+
+# ACP componet
+ifneq ($(CONFIG_DRM_AMD_ACP),)
+amdgpu-y += amdgpu_acp.o
+
+AMDACPPATH := ../acp
+include $(FULL_AMD_PATH)/acp/Makefile
+
+amdgpu-y += $(AMD_ACP_FILES)
+endif

 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_ACP_H__
+#define __AMDGPU_ACP_H__
+
+#include <linux/mfd/core.h>
+
+struct amdgpu_acp {
+	struct device *parent;
+	void *cgs_device;
+	struct amd_acp_private *private;
+	struct mfd_cell *acp_cell;
+	struct resource *acp_res;
+	struct acp_pm_domain *acp_genpd;
+};
+
+extern const struct amd_ip_funcs acp_ip_funcs;
+
+#endif /* __AMDGPU_ACP_H__ */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
 	return -EINVAL;
 }

+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
+{
+	int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
+	u8 frev, crev;
+	u16 data_offset, size;
+
+	if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+					  &frev, &crev, &data_offset))
+		return true;
+
+	return false;
+}
+
 void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
 {
 	uint32_t bios_6_scratch;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
 				      u8 module_index,
 				      struct atom_mc_reg_table *reg_table);

+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
+
 void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
 void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -32,6 +32,9 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"

+#define AMDGPU_BO_LIST_MAX_PRIORITY	32u
+#define AMDGPU_BO_LIST_NUM_BUCKETS	(AMDGPU_BO_LIST_MAX_PRIORITY + 1)
+
 static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
 				 struct amdgpu_bo_list **result,
 				 int *id)
@@ -90,6 +93,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,

 	bool has_userptr = false;
 	unsigned i;
+	int r;

 	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
 	if (!array)
@@ -99,31 +103,34 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	for (i = 0; i < num_entries; ++i) {
 		struct amdgpu_bo_list_entry *entry = &array[i];
 		struct drm_gem_object *gobj;
+		struct mm_struct *usermm;

 		gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
-		if (!gobj)
+		if (!gobj) {
+			r = -ENOENT;
 			goto error_free;
+		}

 		entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 		drm_gem_object_unreference_unlocked(gobj);
-		entry->priority = info[i].bo_priority;
-		entry->prefered_domains = entry->robj->initial_domain;
-		entry->allowed_domains = entry->prefered_domains;
-		if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
-			entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
-		if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) {
+		entry->priority = min(info[i].bo_priority,
+				      AMDGPU_BO_LIST_MAX_PRIORITY);
+		usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm);
+		if (usermm) {
+			if (usermm != current->mm) {
+				r = -EPERM;
+				goto error_free;
+			}
 			has_userptr = true;
-			entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
-			entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 		}
 		entry->tv.bo = &entry->robj->tbo;
 		entry->tv.shared = true;

-		if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
+		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
 			gds_obj = entry->robj;
-		if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
+		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
 			gws_obj = entry->robj;
-		if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
+		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
 			oa_obj = entry->robj;

 		trace_amdgpu_bo_list_set(list, entry->robj);
@@ -145,7 +152,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,

 error_free:
 	drm_free_large(array);
-	return -ENOENT;
+	return r;
 }

 struct amdgpu_bo_list *
@@ -161,6 +168,36 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
 	return result;
 }

+void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
+			     struct list_head *validated)
+{
+	/* This is based on the bucket sort with O(n) time complexity.
+	 * An item with priority "i" is added to bucket[i]. The lists are then
+	 * concatenated in descending order.
+	 */
+	struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
+	unsigned i;
+
+	for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
+		INIT_LIST_HEAD(&bucket[i]);
+
+	/* Since buffers which appear sooner in the relocation list are
+	 * likely to be used more often than buffers which appear later
+	 * in the list, the sort mustn't change the ordering of buffers
+	 * with the same priority, i.e. it must be stable.
+	 */
+	for (i = 0; i < list->num_entries; i++) {
+		unsigned priority = list->array[i].priority;
+
+		list_add_tail(&list->array[i].tv.head,
+			      &bucket[priority]);
+	}
+
+	/* Connect the sorted buckets in the output list. */
+	for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
+		list_splice(&bucket[i], validated);
+}
+
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
 {
 	mutex_unlock(&list->lock);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -25,8 +25,7 @@
 #include <drm/drmP.h>
 #include "amdgpu.h"

-int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
-		    struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 {
 	unsigned i, j;
 	int r;
@@ -35,44 +34,38 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
 	ctx->adev = adev;
 	kref_init(&ctx->refcount);
 	spin_lock_init(&ctx->ring_lock);
-	ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs *
-			 AMDGPU_MAX_RINGS, GFP_KERNEL);
+	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+			      sizeof(struct fence*), GFP_KERNEL);
 	if (!ctx->fences)
 		return -ENOMEM;

 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
-		ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) *
-			amdgpu_sched_jobs * i;
+		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
-	if (amdgpu_enable_scheduler) {
-		/* create context entity for each ring */
-		for (i = 0; i < adev->num_rings; i++) {
-			struct amd_sched_rq *rq;
-			if (pri >= AMD_SCHED_MAX_PRIORITY) {
-				kfree(ctx->fences);
-				return -EINVAL;
-			}
-			rq = &adev->rings[i]->sched.sched_rq[pri];
-			r = amd_sched_entity_init(&adev->rings[i]->sched,
-						  &ctx->rings[i].entity,
-						  rq, amdgpu_sched_jobs);
-			if (r)
-				break;
-		}
-
-		if (i < adev->num_rings) {
-			for (j = 0; j < i; j++)
-				amd_sched_entity_fini(&adev->rings[j]->sched,
-						      &ctx->rings[j].entity);
-			kfree(ctx->fences);
-			return r;
-		}
+	/* create context entity for each ring */
+	for (i = 0; i < adev->num_rings; i++) {
+		struct amdgpu_ring *ring = adev->rings[i];
+		struct amd_sched_rq *rq;
+
+		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
+					  rq, amdgpu_sched_jobs);
+		if (r)
+			break;
+	}
+
+	if (i < adev->num_rings) {
+		for (j = 0; j < i; j++)
+			amd_sched_entity_fini(&adev->rings[j]->sched,
+					      &ctx->rings[j].entity);
+		kfree(ctx->fences);
+		return r;
 	}
 	return 0;
 }

-void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 {
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
@@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 			fence_put(ctx->rings[i].fences[j]);
 	kfree(ctx->fences);

-	if (amdgpu_enable_scheduler) {
-		for (i = 0; i < adev->num_rings; i++)
-			amd_sched_entity_fini(&adev->rings[i]->sched,
-					      &ctx->rings[i].entity);
-	}
+	for (i = 0; i < adev->num_rings; i++)
+		amd_sched_entity_fini(&adev->rings[i]->sched,
+				      &ctx->rings[i].entity);
 }

 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 		return r;
 	}
 	*id = (uint32_t)r;
-	r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx);
+	r = amdgpu_ctx_init(adev, ctx);
 	if (r) {
 		idr_remove(&mgr->ctx_handles, *id);
 		*id = 0;
@@ -200,18 +191,18 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 	id = args->in.ctx_id;

 	switch (args->in.op) {
-		case AMDGPU_CTX_OP_ALLOC_CTX:
-			r = amdgpu_ctx_alloc(adev, fpriv, &id);
-			args->out.alloc.ctx_id = id;
-			break;
-		case AMDGPU_CTX_OP_FREE_CTX:
-			r = amdgpu_ctx_free(fpriv, id);
-			break;
-		case AMDGPU_CTX_OP_QUERY_STATE:
-			r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
-			break;
-		default:
-			return -EINVAL;
+	case AMDGPU_CTX_OP_ALLOC_CTX:
+		r = amdgpu_ctx_alloc(adev, fpriv, &id);
+		args->out.alloc.ctx_id = id;
+		break;
+	case AMDGPU_CTX_OP_FREE_CTX:
+		r = amdgpu_ctx_free(fpriv, id);
+		break;
+	case AMDGPU_CTX_OP_QUERY_STATE:
+		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
+		break;
+	default:
+		return -EINVAL;
 	}

 	return r;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -635,31 +635,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev)

 }

-/**
- * amdgpu_boot_test_post_card - check and possibly initialize the hw
- *
- * @adev: amdgpu_device pointer
- *
- * Check if the asic is initialized and if not, attempt to initialize
- * it (all asics).
- * Returns true if initialized or false if not.
- */
-bool amdgpu_boot_test_post_card(struct amdgpu_device *adev)
-{
-	if (amdgpu_card_posted(adev))
-		return true;
-
-	if (adev->bios) {
-		DRM_INFO("GPU not posted. posting now...\n");
-		if (adev->is_atom_bios)
-			amdgpu_atom_asic_init(adev->mode_info.atom_context);
-		return true;
-	} else {
-		dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
-		return false;
-	}
-}
-
 /**
 * amdgpu_dummy_page_init - init dummy page used by the driver
 *
@@ -959,12 +934,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
 			 amdgpu_sched_jobs);
 		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
 	}
-	/* vramlimit must be a power of two */
-	if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) {
-		dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n",
-				amdgpu_vram_limit);
-		amdgpu_vram_limit = 0;
-	}

 	if (amdgpu_gart_size != -1) {
 		/* gtt size must be power of two and greater or equal to 32M */
@@ -1434,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->mman.buffer_funcs = NULL;
 	adev->mman.buffer_funcs_ring = NULL;
 	adev->vm_manager.vm_pte_funcs = NULL;
-	adev->vm_manager.vm_pte_funcs_ring = NULL;
+	adev->vm_manager.vm_pte_num_rings = 0;
 	adev->gart.gart_funcs = NULL;
 	adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);

@@ -1455,9 +1424,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,

 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
-	mutex_init(&adev->ring_lock);
+	mutex_init(&adev->vm_manager.lock);
 	atomic_set(&adev->irq.ih.lock, 0);
-	mutex_init(&adev->gem.mutex);
 	mutex_init(&adev->pm.mutex);
 	mutex_init(&adev->gfx.gpu_clock_mutex);
 	mutex_init(&adev->srbm_mutex);
@@ -1531,8 +1499,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		return r;
 	}

+	/* See if the asic supports SR-IOV */
+	adev->virtualization.supports_sr_iov =
+		amdgpu_atombios_has_gpu_virtualization_table(adev);
+
 	/* Post card if necessary */
-	if (!amdgpu_card_posted(adev)) {
+	if (!amdgpu_card_posted(adev) ||
+	    adev->virtualization.supports_sr_iov) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
@@ -1577,11 +1550,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		return r;
 	}

-	r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx);
-	if (r) {
-		dev_err(adev->dev, "failed to create kernel context (%d).\n", r);
-		return r;
-	}
 	r = amdgpu_ib_ring_tests(adev);
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
@@ -1645,7 +1613,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	adev->shutdown = true;
 	/* evict vram memory */
 	amdgpu_bo_evict_vram(adev);
-	amdgpu_ctx_fini(&adev->kernel_ctx);
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
 	amdgpu_fbdev_fini(adev);
@@ -1889,6 +1856,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)

 retry:
 	r = amdgpu_asic_reset(adev);
+	/* post card */
+	amdgpu_atom_asic_init(adev->mode_info.atom_context);
+
 	if (!r) {
 		dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
 		r = amdgpu_resume(adev);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,32 +35,30 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>

-static void amdgpu_flip_wait_fence(struct amdgpu_device *adev,
-				   struct fence **f)
+static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
 {
-	struct amdgpu_fence *fence;
-	long r;
+	struct amdgpu_flip_work *work =
+		container_of(cb, struct amdgpu_flip_work, cb);

-	if (*f == NULL)
-		return;
+	fence_put(f);
+	schedule_work(&work->flip_work);
+}

-	fence = to_amdgpu_fence(*f);
-	if (fence) {
-		r = fence_wait(&fence->base, false);
-		if (r == -EDEADLK)
-			r = amdgpu_gpu_reset(adev);
-	} else
-		r = fence_wait(*f, false);
+static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
+				     struct fence **f)
+{
+	struct fence *fence= *f;

-	if (r)
-		DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r);
+	if (fence == NULL)
+		return false;

-	/* We continue with the page flip even if we failed to wait on
-	 * the fence, otherwise the DRM core and userspace will be
-	 * confused about which BO the CRTC is scanning out
-	 */
-	fence_put(*f);
 	*f = NULL;
+
+	if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+		return true;
+
+	fence_put(*f);
+	return false;
 }

 static void amdgpu_flip_work_func(struct work_struct *__work)
@@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	int vpos, hpos, stat, min_udelay;
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];

-	amdgpu_flip_wait_fence(adev, &work->excl);
+	if (amdgpu_flip_handle_fence(work, &work->excl))
+		return;
+
 	for (i = 0; i < work->shared_count; ++i)
-		amdgpu_flip_wait_fence(adev, &work->shared[i]);
+		if (amdgpu_flip_handle_fence(work, &work->shared[i]))
+			return;

 	/* We borrow the event spin lock for protecting flip_status */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
@@ -118,12 +119,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 	};

-	/* do the flip (mmio) */
-	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 	/* set the flip status */
 	amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
-
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+	/* Do the flip (mmio) */
+	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 }

 /*
@@ -242,7 +243,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 	/* update crtc fb */
 	crtc->primary->fb = fb;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-	queue_work(amdgpu_crtc->pflip_queue, &work->flip_work);
+	amdgpu_flip_work_func(&work->flip_work);
 	return 0;

 vblank_cleanup:

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,7 +69,6 @@ int amdgpu_dpm = -1;
 int amdgpu_smc_load_fw = 1;
 int amdgpu_aspm = -1;
 int amdgpu_runtime_pm = -1;
-int amdgpu_hard_reset = 0;
 unsigned amdgpu_ip_block_mask = 0xffffffff;
 int amdgpu_bapm = -1;
 int amdgpu_deep_color = 0;
@@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
 int amdgpu_exp_hw_support = 0;
-int amdgpu_enable_scheduler = 1;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
-int amdgpu_enable_semaphores = 0;
 int amdgpu_powerplay = -1;

 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
@@ -126,9 +123,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
 MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
 module_param_named(runpm, amdgpu_runtime_pm, int, 0444);

-MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
-module_param_named(hard_reset, amdgpu_hard_reset, int, 0444);
-
 MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
 module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);

@@ -153,18 +147,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
 MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
 module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);

-MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)");
-module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
-
 MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
 module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);

 MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
 module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);

-MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))");
-module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
-
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -107,7 +107,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
 	if ((*fence) == NULL) {
 		return -ENOMEM;
 	}
-	(*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx];
+	(*fence)->seq = ++ring->fence_drv.sync_seq;
 	(*fence)->ring = ring;
 	(*fence)->owner = owner;
 	fence_init(&(*fence)->base, &amdgpu_fence_ops,
@@ -171,7 +171,7 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
 	 */
 	last_seq = atomic64_read(&ring->fence_drv.last_seq);
 	do {
-		last_emitted = ring->fence_drv.sync_seq[ring->idx];
+		last_emitted = ring->fence_drv.sync_seq;
 		seq = amdgpu_fence_read(ring);
 		seq |= last_seq & 0xffffffff00000000LL;
 		if (seq < last_seq) {
@@ -260,34 +260,28 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
 }

 /*
- * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
+ * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
 * @ring: ring to wait on for the seq number
 * @seq: seq number wait for
 *
 * return value:
 * 0: seq signaled, and gpu not hang
- * -EDEADL: GPU hang detected
 * -EINVAL: some paramter is not valid
 */
 static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
 {
-	bool signaled = false;
-
 	BUG_ON(!ring);
-	if (seq > ring->fence_drv.sync_seq[ring->idx])
+	if (seq > ring->fence_drv.sync_seq)
 		return -EINVAL;

 	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
 		return 0;

 	amdgpu_fence_schedule_fallback(ring);
-	wait_event(ring->fence_drv.fence_queue, (
-		   (signaled = amdgpu_fence_seq_signaled(ring, seq))));
+	wait_event(ring->fence_drv.fence_queue,
+		   amdgpu_fence_seq_signaled(ring, seq));

-	if (signaled)
-		return 0;
-	else
-		return -EDEADLK;
+	return 0;
 }

 /**
@@ -304,7 +298,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
 {
 	uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;

-	if (seq >= ring->fence_drv.sync_seq[ring->idx])
+	if (seq >= ring->fence_drv.sync_seq)
 		return -ENOENT;

 	return amdgpu_fence_ring_wait_seq(ring, seq);
@@ -322,7 +316,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
 */
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 {
-	uint64_t seq = ring->fence_drv.sync_seq[ring->idx];
+	uint64_t seq = ring->fence_drv.sync_seq;

 	if (!seq)
 		return 0;
@@ -347,7 +341,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 	 * but it's ok to report slightly wrong fence count here.
 	 */
 	amdgpu_fence_process(ring);
-	emitted = ring->fence_drv.sync_seq[ring->idx]
+	emitted = ring->fence_drv.sync_seq
 		- atomic64_read(&ring->fence_drv.last_seq);
 	/* to avoid 32bits warp around */
 	if (emitted > 0x10000000)
@@ -356,68 +350,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 	return (unsigned)emitted;
 }

-/**
- * amdgpu_fence_need_sync - do we need a semaphore
- *
- * @fence: amdgpu fence object
- * @dst_ring: which ring to check against
- *
- * Check if the fence needs to be synced against another ring
- * (all asics).  If so, we need to emit a semaphore.
- * Returns true if we need to sync with another ring, false if
- * not.
- */
-bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
-			    struct amdgpu_ring *dst_ring)
-{
-	struct amdgpu_fence_driver *fdrv;
-
-	if (!fence)
-		return false;
-
-	if (fence->ring == dst_ring)
-		return false;
-
-	/* we are protected by the ring mutex */
-	fdrv = &dst_ring->fence_drv;
-	if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
-		return false;
-
-	return true;
-}
-
-/**
- * amdgpu_fence_note_sync - record the sync point
- *
- * @fence: amdgpu fence object
- * @dst_ring: which ring to check against
- *
- * Note the sequence number at which point the fence will
- * be synced with the requested ring (all asics).
- */
-void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
-			    struct amdgpu_ring *dst_ring)
-{
-	struct amdgpu_fence_driver *dst, *src;
-	unsigned i;
-
-	if (!fence)
-		return;
-
-	if (fence->ring == dst_ring)
-		return;
-
-	/* we are protected by the ring mutex */
-	src = &fence->ring->fence_drv;
-	dst = &dst_ring->fence_drv;
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		if (i == dst_ring->idx)
-			continue;
-
-		dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
-	}
-}
-
 /**
 * amdgpu_fence_driver_start_ring - make the fence driver
 * ready for use on the requested ring.
@@ -471,13 +403,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 */
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
 {
-	int i, r;
+	long timeout;
+	int r;

 	ring->fence_drv.cpu_addr = NULL;
 	ring->fence_drv.gpu_addr = 0;
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		ring->fence_drv.sync_seq[i] = 0;
-
+	ring->fence_drv.sync_seq = 0;
 	atomic64_set(&ring->fence_drv.last_seq, 0);
 	ring->fence_drv.initialized = false;

@@ -486,26 +417,24 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)

 	init_waitqueue_head(&ring->fence_drv.fence_queue);

-	if (amdgpu_enable_scheduler) {
-		long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
-		if (timeout == 0) {
-			/*
-			 * FIXME:
-			 * Delayed workqueue cannot use it directly,
-			 * so the scheduler will not use delayed workqueue if
-			 * MAX_SCHEDULE_TIMEOUT is set.
-			 * Currently keep it simple and silly.
-			 */
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		}
-		r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
-				   amdgpu_sched_hw_submission,
-				   timeout, ring->name);
-		if (r) {
-			DRM_ERROR("Failed to create scheduler on ring %s.\n",
-				  ring->name);
-			return r;
-		}
+	timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+	if (timeout == 0) {
+		/*
+		 * FIXME:
+		 * Delayed workqueue cannot use it directly,
+		 * so the scheduler will not use delayed workqueue if
+		 * MAX_SCHEDULE_TIMEOUT is set.
+		 * Currently keep it simple and silly.
+		 */
+		timeout = MAX_SCHEDULE_TIMEOUT;
+	}
+	r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
+			   amdgpu_sched_hw_submission,
+			   timeout, ring->name);
+	if (r) {
+		DRM_ERROR("Failed to create scheduler on ring %s.\n",
+			  ring->name);
+		return r;
 	}

 	return 0;
@@ -552,7 +481,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)

 	if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
 		kmem_cache_destroy(amdgpu_fence_slab);
-	mutex_lock(&adev->ring_lock);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];

@@ -570,7 +498,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 		del_timer_sync(&ring->fence_drv.fallback_timer);
 		ring->fence_drv.initialized = false;
 	}
-	mutex_unlock(&adev->ring_lock);
 }

 /**
@@ -585,7 +512,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
 {
 	int i, r;

-	mutex_lock(&adev->ring_lock);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		if (!ring || !ring->fence_drv.initialized)
@@ -602,7 +528,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
 		amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 	}
-	mutex_unlock(&adev->ring_lock);
 }

 /**
@@ -621,7 +546,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
 {
 	int i;

-	mutex_lock(&adev->ring_lock);
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		if (!ring || !ring->fence_drv.initialized)
@@ -631,7 +555,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
 		amdgpu_irq_get(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 	}
-	mutex_unlock(&adev->ring_lock);
 }

 /**
@@ -651,7 +574,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
 		if (!ring || !ring->fence_drv.initialized)
 			continue;

-		amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]);
+		amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
 	}
 }

@@ -781,7 +704,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	int i, j;
+	int i;

 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -794,28 +717,38 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 		seq_printf(m, "Last signaled fence 0x%016llx\n",
 			   (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
 		seq_printf(m, "Last emitted        0x%016llx\n",
-			   ring->fence_drv.sync_seq[i]);
-
-		for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
-			struct amdgpu_ring *other = adev->rings[j];
-			if (i != j && other && other->fence_drv.initialized &&
-			    ring->fence_drv.sync_seq[j])
-				seq_printf(m, "Last sync to ring %d 0x%016llx\n",
-					   j, ring->fence_drv.sync_seq[j]);
-		}
+			   ring->fence_drv.sync_seq);
 	}
 	return 0;
 }

+/**
+ * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset
+ *
+ * Manually trigger a gpu reset at the next fence wait.
+ */
+static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+
+	seq_printf(m, "gpu reset\n");
+	amdgpu_gpu_reset(adev);
+
+	return 0;
+}
+
 static struct drm_info_list amdgpu_debugfs_fence_list[] = {
 	{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
+	{"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
 };
 #endif

 int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
-	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1);
+	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
 #else
 	return 0;
 #endif

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -83,24 +83,32 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 		return r;
 	}
 	*obj = &robj->gem_base;
-	robj->pid = task_pid_nr(current);
-
-	mutex_lock(&adev->gem.mutex);
-	list_add_tail(&robj->list, &adev->gem.objects);
-	mutex_unlock(&adev->gem.mutex);

 	return 0;
 }

-int amdgpu_gem_init(struct amdgpu_device *adev)
+void amdgpu_gem_force_release(struct amdgpu_device *adev)
 {
-	INIT_LIST_HEAD(&adev->gem.objects);
-	return 0;
-}
+	struct drm_device *ddev = adev->ddev;
+	struct drm_file *file;

-void amdgpu_gem_fini(struct amdgpu_device *adev)
-{
-	amdgpu_bo_force_delete(adev);
+	mutex_lock(&ddev->struct_mutex);
+
+	list_for_each_entry(file, &ddev->filelist, lhead) {
+		struct drm_gem_object *gobj;
+		int handle;
+
+		WARN_ONCE(1, "Still active user space clients!\n");
+		spin_lock(&file->table_lock);
+		idr_for_each_entry(&file->object_idr, gobj, handle) {
+			WARN_ONCE(1, "And also active allocations!\n");
+			drm_gem_object_unreference(gobj);
+		}
+		idr_destroy(&file->object_idr);
+		spin_unlock(&file->table_lock);
+	}
+
+	mutex_unlock(&ddev->struct_mutex);
 }

 /*
@@ -252,6 +260,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 		goto handle_lockup;

 	bo = gem_to_amdgpu_bo(gobj);
+	bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
 	if (r)
 		goto release_object;
@@ -308,7 +318,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
 		return -ENOENT;
 	}
 	robj = gem_to_amdgpu_bo(gobj);
-	if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) ||
+	if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
 	    (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
 		drm_gem_object_unreference_unlocked(gobj);
 		return -EPERM;
@@ -628,7 +638,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,

 		info.bo_size = robj->gem_base.size;
 		info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
-		info.domains = robj->initial_domain;
+		info.domains = robj->prefered_domains;
 		info.domain_flags = robj->flags;
 		amdgpu_bo_unreserve(robj);
 		if (copy_to_user(out, &info, sizeof(info)))
@@ -636,14 +646,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case AMDGPU_GEM_OP_SET_PLACEMENT:
-		if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
+		if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
 			r = -EPERM;
 			amdgpu_bo_unreserve(robj);
 			break;
 		}
-		robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
-						      AMDGPU_GEM_DOMAIN_GTT |
-						      AMDGPU_GEM_DOMAIN_CPU);
+		robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
+							AMDGPU_GEM_DOMAIN_GTT |
+							AMDGPU_GEM_DOMAIN_CPU);
+		robj->allowed_domains = robj->prefered_domains;
+		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
+
 		amdgpu_bo_unreserve(robj);
 		break;
 	default:
@@ -688,38 +702,73 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 }

 #if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
+{
+	struct drm_gem_object *gobj = ptr;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+	struct seq_file *m = data;
+
+	unsigned domain;
+	const char *placement;
+	unsigned pin_count;
+
+	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+	switch (domain) {
+	case AMDGPU_GEM_DOMAIN_VRAM:
+		placement = "VRAM";
+		break;
+	case AMDGPU_GEM_DOMAIN_GTT:
+		placement = " GTT";
+		break;
+	case AMDGPU_GEM_DOMAIN_CPU:
+	default:
+		placement = " CPU";
+		break;
+	}
+	seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx",
+		   id, amdgpu_bo_size(bo), placement,
+		   amdgpu_bo_gpu_offset(bo));
+
+	pin_count = ACCESS_ONCE(bo->pin_count);
+	if (pin_count)
+		seq_printf(m, " pin count %d", pin_count);
+	seq_printf(m, "\n");
+
+	return 0;
+}
+
 static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_bo *rbo;
-	unsigned i = 0;
+	struct drm_file *file;
+	int r;

-	mutex_lock(&adev->gem.mutex);
-	list_for_each_entry(rbo, &adev->gem.objects, list) {
-		unsigned domain;
-		const char *placement;
+	r = mutex_lock_interruptible(&dev->struct_mutex);
+	if (r)
+		return r;

-		domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type);
-		switch (domain) {
-		case AMDGPU_GEM_DOMAIN_VRAM:
-			placement = "VRAM";
-			break;
-		case AMDGPU_GEM_DOMAIN_GTT:
-			placement = " GTT";
-			break;
-		case AMDGPU_GEM_DOMAIN_CPU:
-		default:
-			placement = " CPU";
-			break;
-		}
-		seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n",
-			   i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20,
-			   placement, (unsigned long)rbo->pid);
-		i++;
+	list_for_each_entry(file, &dev->filelist, lhead) {
+		struct task_struct *task;
+
+		/*
+		 * Although we have a valid reference on file->pid, that does
+		 * not guarantee that the task_struct who called get_pid() is
+		 * still alive (e.g. get_pid(current) => fork() => exit()).
+		 * Therefore, we need to protect this ->comm access using RCU.
+		 */
+		rcu_read_lock();
+		task = pid_task(file->pid, PIDTYPE_PID);
+		seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+			   task ? task->comm : "<unknown>");
+		rcu_read_unlock();
+
+		spin_lock(&file->table_lock);
+		idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m);
+		spin_unlock(&file->table_lock);
 	}
-	mutex_unlock(&adev->gem.mutex);
+
+	mutex_unlock(&dev->struct_mutex);
 	return 0;
 }


--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
 * suballocator.
 * Returns 0 on success, error on failure.
 */
-int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib)
 {
-	struct amdgpu_device *adev = ring->adev;
 	int r;

 	if (size) {
@@ -75,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
 			ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
 	}

-	amdgpu_sync_create(&ib->sync);
-
-	ib->ring = ring;
 	ib->vm = vm;

 	return 0;
@@ -93,7 +89,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
 */
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
 {
-	amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
 	amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
 	if (ib->fence)
 		fence_put(&ib->fence->base);
@@ -106,6 +101,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
 * @num_ibs: number of IBs to schedule
 * @ibs: IB objects to schedule
 * @owner: owner for creating the fences
+ * @f: fence created during this submission
 *
 * Schedule an IB on the associated ring (all asics).
 * Returns 0 on success, error on failure.
@@ -120,11 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
 * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
 * to SI there was just a DE IB.
 */
-int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
-		       struct amdgpu_ib *ibs, void *owner)
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+		       struct amdgpu_ib *ibs, void *owner,
+		       struct fence *last_vm_update,
+		       struct fence **f)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
-	struct amdgpu_ring *ring;
 	struct amdgpu_ctx *ctx, *old_ctx;
 	struct amdgpu_vm *vm;
 	unsigned i;
@@ -133,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 	if (num_ibs == 0)
 		return -EINVAL;

-	ring = ibs->ring;
 	ctx = ibs->ctx;
 	vm = ibs->vm;

@@ -141,36 +138,21 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 		dev_err(adev->dev, "couldn't schedule ib\n");
 		return -EINVAL;
 	}
-	r = amdgpu_sync_wait(&ibs->sync);
-	if (r) {
-		dev_err(adev->dev, "IB sync failed (%d).\n", r);
-		return r;
-	}
-	r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
-	if (r) {
-		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
-		return r;
-	}

-	if (vm) {
-		/* grab a vm id if necessary */
-		r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync);
-		if (r) {
-			amdgpu_ring_unlock_undo(ring);
-			return r;
-		}
+	if (vm && !ibs->grabbed_vmid) {
+		dev_err(adev->dev, "VM IB without ID\n");
+		return -EINVAL;
 	}

-	r = amdgpu_sync_rings(&ibs->sync, ring);
+	r = amdgpu_ring_alloc(ring, 256 * num_ibs);
 	if (r) {
-		amdgpu_ring_unlock_undo(ring);
-		dev_err(adev->dev, "failed to sync rings (%d)\n", r);
+		dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
 	}

 	if (vm) {
 		/* do context switch */
-		amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update);
+		amdgpu_vm_flush(ring, vm, last_vm_update);

 		if (ring->funcs->emit_gds_switch)
 			amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
@@ -186,9 +168,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];

-		if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) {
+		if (ib->ctx != ctx || ib->vm != vm) {
 			ring->current_ctx = old_ctx;
-			amdgpu_ring_unlock_undo(ring);
+			amdgpu_ring_undo(ring);
 			return -EINVAL;
 		}
 		amdgpu_ring_emit_ib(ring, ib);
@@ -199,14 +181,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		ring->current_ctx = old_ctx;
-		amdgpu_ring_unlock_undo(ring);
+		amdgpu_ring_undo(ring);
 		return r;
 	}

-	if (!amdgpu_enable_scheduler && ib->ctx)
-		ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
-						    &ib->fence->base);
-
 	/* wrap the last IB with fence */
 	if (ib->user) {
 		uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
@@ -215,10 +193,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
 				       AMDGPU_FENCE_FLAG_64BIT);
 	}

-	if (ib->vm)
-		amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
+	if (f)
+		*f = fence_get(&ib->fence->base);

-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 	return 0;
 }


--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -28,15 +28,103 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"

-static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+		     struct amdgpu_job **job)
+{
+	size_t size = sizeof(struct amdgpu_job);
+
+	if (num_ibs == 0)
+		return -EINVAL;
+
+	size += sizeof(struct amdgpu_ib) * num_ibs;
+
+	*job = kzalloc(size, GFP_KERNEL);
+	if (!*job)
+		return -ENOMEM;
+
+	(*job)->adev = adev;
+	(*job)->ibs = (void *)&(*job)[1];
+	(*job)->num_ibs = num_ibs;
+
+	amdgpu_sync_create(&(*job)->sync);
+
+	return 0;
+}
+
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+			     struct amdgpu_job **job)
+{
+	int r;
+
+	r = amdgpu_job_alloc(adev, 1, job);
+	if (r)
+		return r;
+
+	r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
+	if (r)
+		kfree(*job);
+
+	return r;
+}
+
+void amdgpu_job_free(struct amdgpu_job *job)
+{
+	unsigned i;
+
+	for (i = 0; i < job->num_ibs; ++i)
+		amdgpu_ib_free(job->adev, &job->ibs[i]);
+
+	amdgpu_bo_unref(&job->uf.bo);
+	amdgpu_sync_free(&job->sync);
+	kfree(job);
+}
+
+int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
+		      struct amd_sched_entity *entity, void *owner,
+		      struct fence **f)
+{
+	job->ring = ring;
+	job->base.sched = &ring->sched;
+	job->base.s_entity = entity;
+	job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
+	if (!job->base.s_fence)
+		return -ENOMEM;
+
+	*f = fence_get(&job->base.s_fence->base);
+
+	job->owner = owner;
+	amd_sched_entity_push_job(&job->base);
+
+	return 0;
+}
+
+static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 {
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
-	return amdgpu_sync_get_fence(&job->ibs->sync);
+	struct amdgpu_vm *vm = job->ibs->vm;
+
+	struct fence *fence = amdgpu_sync_get_fence(&job->sync);
+
+	if (fence == NULL && vm && !job->ibs->grabbed_vmid) {
+		struct amdgpu_ring *ring = job->ring;
+		int r;
+
+		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
+				      &job->base.s_fence->base);
+		if (r)
+			DRM_ERROR("Error getting VM ID (%d)\n", r);
+		else
+			job->ibs->grabbed_vmid = true;
+
+		fence = amdgpu_sync_get_fence(&job->sync);
+	}
+
+	return fence;
 }

-static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
+static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 {
-	struct amdgpu_fence *fence = NULL;
+	struct fence *fence = NULL;
 	struct amdgpu_job *job;
 	int r;

@@ -45,64 +133,27 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
 		return NULL;
 	}
 	job = to_amdgpu_job(sched_job);
+
+	r = amdgpu_sync_wait(&job->sync);
+	if (r) {
+		DRM_ERROR("failed to sync wait (%d)\n", r);
+		return NULL;
+	}
+
 	trace_amdgpu_sched_run_job(job);
-	r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
+	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner,
+			       job->sync.last_vm_update, &fence);
 	if (r) {
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
 		goto err;
 	}

-	fence = job->ibs[job->num_ibs - 1].fence;
-	fence_get(&fence->base);
-
 err:
-	if (job->free_job)
-		job->free_job(job);
-
-	kfree(job);
-	return fence ? &fence->base : NULL;
+	amdgpu_job_free(job);
+	return fence;
 }

 struct amd_sched_backend_ops amdgpu_sched_ops = {
-	.dependency = amdgpu_sched_dependency,
-	.run_job = amdgpu_sched_run_job,
+	.dependency = amdgpu_job_dependency,
+	.run_job = amdgpu_job_run,
 };
-
-int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
-					 struct amdgpu_ring *ring,
-					 struct amdgpu_ib *ibs,
-					 unsigned num_ibs,
-					 int (*free_job)(struct amdgpu_job *),
-					 void *owner,
-					 struct fence **f)
-{
-	int r = 0;
-	if (amdgpu_enable_scheduler) {
-		struct amdgpu_job *job =
-			kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
-		if (!job)
-			return -ENOMEM;
-		job->base.sched = &ring->sched;
-		job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
-		job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
-		if (!job->base.s_fence) {
-			kfree(job);
-			return -ENOMEM;
-		}
-		*f = fence_get(&job->base.s_fence->base);
-
-		job->adev = adev;
-		job->ibs = ibs;
-		job->num_ibs = num_ibs;
-		job->owner = owner;
-		job->free_job = free_job;
-		amd_sched_entity_push_job(&job->base);
-	} else {
-		r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
-		if (r)
-			return r;
-		*f = fence_get(&ibs[num_ibs - 1].fence->base);
-	}
-
-	return 0;
-}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
-					adev->gfx.config.max_shader_engines;
+		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
@@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,

 	/* Get associated drm_crtc: */
 	crtc = &adev->mode_info.crtcs[pipe]->base;
+	if (!crtc) {
+		/* This can occur on driver load if some component fails to
+		 * initialize completely and driver is unloaded */
+		DRM_ERROR("Uninitialized crtc %d\n", pipe);
+		return -EINVAL;
+	}

 	/* Helper routine in DRM core does all the work: */
 	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -48,8 +48,7 @@ struct amdgpu_mn {
 	/* protected by adev->mn_lock */
 	struct hlist_node	node;

-	/* objects protected by lock */
-	struct mutex		lock;
+	/* objects protected by mm->mmap_sem */
 	struct rb_root		objects;
 };

@@ -73,21 +72,19 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	struct amdgpu_bo *bo, *next_bo;

 	mutex_lock(&adev->mn_lock);
-	mutex_lock(&rmn->lock);
+	down_write(&rmn->mm->mmap_sem);
 	hash_del(&rmn->node);
 	rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
 					     it.rb) {
-
-		interval_tree_remove(&node->it, &rmn->objects);
 		list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 			bo->mn = NULL;
 			list_del_init(&bo->mn_list);
 		}
 		kfree(node);
 	}
-	mutex_unlock(&rmn->lock);
+	up_write(&rmn->mm->mmap_sem);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister(&rmn->mn, rmn->mm);
+	mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
 	kfree(rmn);
 }

@@ -129,8 +126,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;

-	mutex_lock(&rmn->lock);
-
 	it = interval_tree_iter_first(&rmn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
@@ -142,7 +137,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,

 		list_for_each_entry(bo, &node->bos, mn_list) {

-			if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
+			if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start,
+							  end))
 				continue;

 			r = amdgpu_bo_reserve(bo, true);
@@ -164,8 +160,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 			amdgpu_bo_unreserve(bo);
 		}
 	}
-
-	mutex_unlock(&rmn->lock);
 }

 static const struct mmu_notifier_ops amdgpu_mn_ops = {
@@ -186,8 +180,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	struct amdgpu_mn *rmn;
 	int r;

-	down_write(&mm->mmap_sem);
 	mutex_lock(&adev->mn_lock);
+	down_write(&mm->mmap_sem);

 	hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
 		if (rmn->mm == mm)
@@ -202,7 +196,6 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	rmn->adev = adev;
 	rmn->mm = mm;
 	rmn->mn.ops = &amdgpu_mn_ops;
-	mutex_init(&rmn->lock);
 	rmn->objects = RB_ROOT;

 	r = __mmu_notifier_register(&rmn->mn, mm);
@@ -212,14 +205,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);

 release_locks:
-	mutex_unlock(&adev->mn_lock);
 	up_write(&mm->mmap_sem);
+	mutex_unlock(&adev->mn_lock);

 	return rmn;

 free_rmn:
-	mutex_unlock(&adev->mn_lock);
 	up_write(&mm->mmap_sem);
+	mutex_unlock(&adev->mn_lock);
 	kfree(rmn);

 	return ERR_PTR(r);
@@ -249,7 +242,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)

 	INIT_LIST_HEAD(&bos);

-	mutex_lock(&rmn->lock);
+	down_write(&rmn->mm->mmap_sem);

 	while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
 		kfree(node);
@@ -263,7 +256,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	if (!node) {
 		node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
 		if (!node) {
-			mutex_unlock(&rmn->lock);
+			up_write(&rmn->mm->mmap_sem);
 			return -ENOMEM;
 		}
 	}
@@ -278,7 +271,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)

 	interval_tree_insert(&node->it, &rmn->objects);

-	mutex_unlock(&rmn->lock);
+	up_write(&rmn->mm->mmap_sem);

 	return 0;
 }
@@ -297,13 +290,15 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	struct list_head *head;

 	mutex_lock(&adev->mn_lock);
+
 	rmn = bo->mn;
 	if (rmn == NULL) {
 		mutex_unlock(&adev->mn_lock);
 		return;
 	}

-	mutex_lock(&rmn->lock);
+	down_write(&rmn->mm->mmap_sem);
+
 	/* save the next list entry for later */
 	head = bo->mn_list.next;

@@ -317,6 +312,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 		kfree(node);
 	}

-	mutex_unlock(&rmn->lock);
+	up_write(&rmn->mm->mmap_sem);
 	mutex_unlock(&adev->mn_lock);
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -390,7 +390,6 @@ struct amdgpu_crtc {
 	struct drm_display_mode native_mode;
 	u32 pll_id;
 	/* page flipping */
-	struct workqueue_struct *pflip_queue;
 	struct amdgpu_flip_work *pflip_works;
 	enum amdgpu_flip_status pflip_status;
 	int deferred_flip_completion;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)

 	amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);

-	mutex_lock(&bo->adev->gem.mutex);
-	list_del_init(&bo->list);
-	mutex_unlock(&bo->adev->gem.mutex);
 	drm_gem_object_release(&bo->gem_base);
 	amdgpu_bo_unref(&bo->parent);
 	kfree(bo->metadata);
@@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	bo->adev = adev;
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
-	bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM |
-				       AMDGPU_GEM_DOMAIN_GTT |
-				       AMDGPU_GEM_DOMAIN_CPU |
-				       AMDGPU_GEM_DOMAIN_GDS |
-				       AMDGPU_GEM_DOMAIN_GWS |
-				       AMDGPU_GEM_DOMAIN_OA);
+	bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
+					 AMDGPU_GEM_DOMAIN_GTT |
+					 AMDGPU_GEM_DOMAIN_CPU |
+					 AMDGPU_GEM_DOMAIN_GDS |
+					 AMDGPU_GEM_DOMAIN_GWS |
+					 AMDGPU_GEM_DOMAIN_OA);
+	bo->allowed_domains = bo->prefered_domains;
+	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;

 	bo->flags = flags;

@@ -367,7 +367,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	int r, i;
 	unsigned fpfn, lpfn;

-	if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
 		return -EPERM;

 	if (WARN_ON_ONCE(min_offset > max_offset))
@@ -470,26 +470,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
 }

-void amdgpu_bo_force_delete(struct amdgpu_device *adev)
-{
-	struct amdgpu_bo *bo, *n;
-
-	if (list_empty(&adev->gem.objects)) {
-		return;
-	}
-	dev_err(adev->dev, "Userspace still has active objects !\n");
-	list_for_each_entry_safe(bo, n, &adev->gem.objects, list) {
-		dev_err(adev->dev, "%p %p %lu %lu force free\n",
-			&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
-			*((unsigned long *)&bo->gem_base.refcount));
-		mutex_lock(&bo->adev->gem.mutex);
-		list_del_init(&bo->list);
-		mutex_unlock(&bo->adev->gem.mutex);
-		/* this should unref the ttm bo */
-		drm_gem_object_unreference_unlocked(&bo->gem_base);
-	}
-}
-
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
 	/* Add an MTRR for the VRAM */

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 			     u64 *gpu_addr);
 int amdgpu_bo_unpin(struct amdgpu_bo *bo);
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
-void amdgpu_bo_force_delete(struct amdgpu_device *adev);
 int amdgpu_bo_init(struct amdgpu_device *adev);
 void amdgpu_bo_fini(struct amdgpu_device *adev);
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	if (ret)
 		return ERR_PTR(ret);

-	mutex_lock(&adev->gem.mutex);
-	list_add_tail(&bo->list, &adev->gem.objects);
-	mutex_unlock(&adev->gem.mutex);
-
 	return &bo->gem_base;
 }

@@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);

-	if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
 		return ERR_PTR(-EPERM);

 	return drm_gem_prime_export(dev, gobj, flags);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -48,28 +48,6 @@
 */
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);

-/**
- * amdgpu_ring_free_size - update the free size
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- *
- * Update the free dw slots in the ring buffer (all asics).
- */
-void amdgpu_ring_free_size(struct amdgpu_ring *ring)
-{
-	uint32_t rptr = amdgpu_ring_get_rptr(ring);
-
-	/* This works because ring_size is a power of 2 */
-	ring->ring_free_dw = rptr + (ring->ring_size / 4);
-	ring->ring_free_dw -= ring->wptr;
-	ring->ring_free_dw &= ring->ptr_mask;
-	if (!ring->ring_free_dw) {
-		/* this is an empty ring */
-		ring->ring_free_dw = ring->ring_size / 4;
-	}
-}
-
 /**
 * amdgpu_ring_alloc - allocate space on the ring buffer
 *
@@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring)
 */
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
 {
-	int r;
-
-	/* make sure we aren't trying to allocate more space than there is on the ring */
-	if (ndw > (ring->ring_size / 4))
-		return -ENOMEM;
 	/* Align requested size with padding so unlock_commit can
 	 * pad safely */
-	amdgpu_ring_free_size(ring);
 	ndw = (ndw + ring->align_mask) & ~ring->align_mask;
-	while (ndw > (ring->ring_free_dw - 1)) {
-		amdgpu_ring_free_size(ring);
-		if (ndw < ring->ring_free_dw) {
-			break;
-		}
-		r = amdgpu_fence_wait_next(ring);
-		if (r)
-			return r;
-	}
-	ring->count_dw = ndw;
-	ring->wptr_old = ring->wptr;
-	return 0;
-}

-/**
- * amdgpu_ring_lock - lock the ring and allocate space on it
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Lock the ring and allocate @ndw dwords in the ring buffer
- * (all asics).
- * Returns 0 on success, error on failure.
- */
-int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
-{
-	int r;
+	/* Make sure we aren't trying to allocate more space
+	 * than the maximum for one submission
+	 */
+	if (WARN_ON_ONCE(ndw > ring->max_dw))
+		return -ENOMEM;

-	mutex_lock(ring->ring_lock);
-	r = amdgpu_ring_alloc(ring, ndw);
-	if (r) {
-		mutex_unlock(ring->ring_lock);
-		return r;
-	}
+	ring->count_dw = ndw;
+	ring->wptr_old = ring->wptr;
 	return 0;
 }

@@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 		amdgpu_ring_write(ring, ring->nop);
 }

+/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: IB to add NOP packets to
+ *
+ * This is the generic pad_ib function for rings except SDMA
+ */
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+	while (ib->length_dw & ring->align_mask)
+		ib->ptr[ib->length_dw++] = ring->nop;
+}
+
 /**
 * amdgpu_ring_commit - tell the GPU to execute the new
 * commands on the ring buffer
@@ -167,20 +126,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 	amdgpu_ring_set_wptr(ring);
 }

-/**
- * amdgpu_ring_unlock_commit - tell the GPU to execute the new
- * commands on the ring buffer and unlock it
- *
- * @ring: amdgpu_ring structure holding ring information
- *
- * Call amdgpu_ring_commit() then unlock the ring (all asics).
- */
-void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_commit(ring);
-	mutex_unlock(ring->ring_lock);
-}
-
 /**
 * amdgpu_ring_undo - reset the wptr
 *
@@ -193,19 +138,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 	ring->wptr = ring->wptr_old;
 }

-/**
- * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring
- *
- * @ring: amdgpu_ring structure holding ring information
- *
- * Call amdgpu_ring_undo() then unlock the ring (all asics).
- */
-void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
-{
-	amdgpu_ring_undo(ring);
-	mutex_unlock(ring->ring_lock);
-}
-
 /**
 * amdgpu_ring_backup - Back up the content of a ring
 *
@@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
 {
 	unsigned size, ptr, i;

-	/* just in case lock the ring */
-	mutex_lock(ring->ring_lock);
 	*data = NULL;

-	if (ring->ring_obj == NULL) {
-		mutex_unlock(ring->ring_lock);
+	if (ring->ring_obj == NULL)
 		return 0;
-	}

 	/* it doesn't make sense to save anything if all fences are signaled */
-	if (!amdgpu_fence_count_emitted(ring)) {
-		mutex_unlock(ring->ring_lock);
+	if (!amdgpu_fence_count_emitted(ring))
 		return 0;
-	}

 	ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);

 	size = ring->wptr + (ring->ring_size / 4);
 	size -= ptr;
 	size &= ring->ptr_mask;
-	if (size == 0) {
-		mutex_unlock(ring->ring_lock);
+	if (size == 0)
 		return 0;
-	}

 	/* and then save the content of the ring */
 	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
-	if (!*data) {
-		mutex_unlock(ring->ring_lock);
+	if (!*data)
 		return 0;
-	}
 	for (i = 0; i < size; ++i) {
 		(*data)[i] = ring->ring[ptr++];
 		ptr &= ring->ptr_mask;
 	}

-	mutex_unlock(ring->ring_lock);
 	return size;
 }

@@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
 		return 0;

 	/* restore the saved ring content */
-	r = amdgpu_ring_lock(ring, size);
+	r = amdgpu_ring_alloc(ring, size);
 	if (r)
 		return r;

@@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
 		amdgpu_ring_write(ring, data[i]);
 	}

-	amdgpu_ring_unlock_commit(ring);
+	amdgpu_ring_commit(ring);
 	kfree(data);
 	return 0;
 }
@@ -352,7 +273,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		return r;
 	}

-	ring->ring_lock = &adev->ring_lock;
 	/* Align ring size */
 	rb_bufsz = order_base_2(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
@@ -389,7 +309,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		}
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
-	ring->ring_free_dw = ring->ring_size / 4;
+	ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4,
+				    amdgpu_sched_hw_submission);

 	if (amdgpu_debugfs_ring_init(adev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
@@ -410,15 +331,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	int r;
 	struct amdgpu_bo *ring_obj;

-	if (ring->ring_lock == NULL)
-		return;
-
-	mutex_lock(ring->ring_lock);
 	ring_obj = ring->ring_obj;
 	ring->ready = false;
 	ring->ring = NULL;
 	ring->ring_obj = NULL;
-	mutex_unlock(ring->ring_lock);

 	amdgpu_wb_free(ring->adev, ring->fence_offs);
 	amdgpu_wb_free(ring->adev, ring->rptr_offs);
@@ -474,29 +390,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
 	struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);

 	uint32_t rptr, wptr, rptr_next;
-	unsigned count, i, j;
-
-	amdgpu_ring_free_size(ring);
-	count = (ring->ring_size / 4) - ring->ring_free_dw;
+	unsigned i;

 	wptr = amdgpu_ring_get_wptr(ring);
-	seq_printf(m, "wptr: 0x%08x [%5d]\n",
-		   wptr, wptr);
+	seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);

 	rptr = amdgpu_ring_get_rptr(ring);
-	seq_printf(m, "rptr: 0x%08x [%5d]\n",
-		   rptr, rptr);
-
 	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);

+	seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
+
 	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
 		   ring->wptr, ring->wptr);
-	seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
-		   ring->last_semaphore_signal_addr);
-	seq_printf(m, "last semaphore wait addr   : 0x%016llx\n",
-		   ring->last_semaphore_wait_addr);
-	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
-	seq_printf(m, "%u dwords in ring\n", count);

 	if (!ring->ready)
 		return 0;
@@ -505,11 +410,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
 	 * packet that is the root issue
 	 */
 	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
-	for (j = 0; j <= (count + 32); j++) {
+	while (i != rptr) {
+		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
+		if (i == rptr)
+			seq_puts(m, " *");
+		if (i == rptr_next)
+			seq_puts(m, " #");
+		seq_puts(m, "\n");
+		i = (i + 1) & ring->ptr_mask;
+	}
+	while (i != wptr) {
 		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (rptr == i)
+		if (i == rptr)
 			seq_puts(m, " *");
-		if (rptr_next == i)
+		if (i == rptr_next)
 			seq_puts(m, " #");
 		seq_puts(m, "\n");
 		i = (i + 1) & ring->ptr_mask;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -321,8 +321,11 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
 	int i, r;
 	signed long t;

-	BUG_ON(align > sa_manager->align);
-	BUG_ON(size > sa_manager->size);
+	if (WARN_ON_ONCE(align > sa_manager->align))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(size > sa_manager->size))
+		return -EINVAL;

 	*sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
 	if ((*sa_bo) == NULL) {

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
-/*
- * Copyright 2011 Christian König.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/*
- * Authors:
- *    Christian König <deathsimple@vodafone.de>
- */
-#include <drm/drmP.h>
-#include "amdgpu.h"
-#include "amdgpu_trace.h"
-
-int amdgpu_semaphore_create(struct amdgpu_device *adev,
-			    struct amdgpu_semaphore **semaphore)
-{
-	int r;
-
-	*semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL);
-	if (*semaphore == NULL) {
-		return -ENOMEM;
-	}
-	r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
-			     &(*semaphore)->sa_bo, 8, 8);
-	if (r) {
-		kfree(*semaphore);
-		*semaphore = NULL;
-		return r;
-	}
-	(*semaphore)->waiters = 0;
-	(*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo);
-
-	*((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
-
-	return 0;
-}
-
-bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
-				  struct amdgpu_semaphore *semaphore)
-{
-	trace_amdgpu_semaphore_signale(ring->idx, semaphore);
-
-	if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) {
-		--semaphore->waiters;
-
-		/* for debugging lockup only, used by sysfs debug files */
-		ring->last_semaphore_signal_addr = semaphore->gpu_addr;
-		return true;
-	}
-	return false;
-}
-
-bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
-				struct amdgpu_semaphore *semaphore)
-{
-	trace_amdgpu_semaphore_wait(ring->idx, semaphore);
-
-	if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) {
-		++semaphore->waiters;
-
-		/* for debugging lockup only, used by sysfs debug files */
-		ring->last_semaphore_wait_addr = semaphore->gpu_addr;
-		return true;
-	}
-	return false;
-}
-
-void amdgpu_semaphore_free(struct amdgpu_device *adev,
-			   struct amdgpu_semaphore **semaphore,
-			   struct fence *fence)
-{
-	if (semaphore == NULL || *semaphore == NULL) {
-		return;
-	}
-	if ((*semaphore)->waiters > 0) {
-		dev_err(adev->dev, "semaphore %p has more waiters than signalers,"
-			" hardware lockup imminent!\n", *semaphore);
-	}
-	amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence);
-	kfree(*semaphore);
-	*semaphore = NULL;
-}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -46,14 +46,6 @@ struct amdgpu_sync_entry {
 */
 void amdgpu_sync_create(struct amdgpu_sync *sync)
 {
-	unsigned i;
-
-	for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
-		sync->semaphores[i] = NULL;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		sync->sync_to[i] = NULL;
-
 	hash_init(sync->fences);
 	sync->last_vm_update = NULL;
 }
@@ -107,7 +99,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		      struct fence *f)
 {
 	struct amdgpu_sync_entry *e;
-	struct amdgpu_fence *fence;

 	if (!f)
 		return 0;
@@ -116,27 +107,20 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
 		amdgpu_sync_keep_later(&sync->last_vm_update, f);

-	fence = to_amdgpu_fence(f);
-	if (!fence || fence->ring->adev != adev) {
-		hash_for_each_possible(sync->fences, e, node, f->context) {
-			if (unlikely(e->fence->context != f->context))
-				continue;
-
-			amdgpu_sync_keep_later(&e->fence, f);
-			return 0;
-		}
-
-		e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
-		if (!e)
-			return -ENOMEM;
+	hash_for_each_possible(sync->fences, e, node, f->context) {
+		if (unlikely(e->fence->context != f->context))
+			continue;

-		hash_add(sync->fences, &e->node, f->context);
-		e->fence = fence_get(f);
+		amdgpu_sync_keep_later(&e->fence, f);
 		return 0;
 	}

-	amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
+	e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;

+	hash_add(sync->fences, &e->node, f->context);
+	e->fence = fence_get(f);
 	return 0;
 }

@@ -153,13 +137,13 @@ static void *amdgpu_sync_get_owner(struct fence *f)
 }

 /**
- * amdgpu_sync_resv - use the semaphores to sync to a reservation object
+ * amdgpu_sync_resv - sync to a reservation object
 *
 * @sync: sync object to add fences from reservation object to
 * @resv: reservation object with embedded fence
 * @shared: true if we should only sync to the exclusive fence
 *
- * Sync to the fence using the semaphore objects
+ * Sync to the fence
 */
 int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
@@ -250,123 +234,17 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
 		kfree(e);
 	}

-	if (amdgpu_enable_semaphores)
-		return 0;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct fence *fence = sync->sync_to[i];
-		if (!fence)
-			continue;
-
-		r = fence_wait(fence, false);
-		if (r)
-			return r;
-	}
-
-	return 0;
-}
-
-/**
- * amdgpu_sync_rings - sync ring to all registered fences
- *
- * @sync: sync object to use
- * @ring: ring that needs sync
- *
- * Ensure that all registered fences are signaled before letting
- * the ring continue. The caller must hold the ring lock.
- */
-int amdgpu_sync_rings(struct amdgpu_sync *sync,
-		      struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	unsigned count = 0;
-	int i, r;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *other = adev->rings[i];
-		struct amdgpu_semaphore *semaphore;
-		struct amdgpu_fence *fence;
-
-		if (!sync->sync_to[i])
-			continue;
-
-		fence = to_amdgpu_fence(sync->sync_to[i]);
-
-		/* check if we really need to sync */
-		if (!amdgpu_enable_scheduler &&
-		    !amdgpu_fence_need_sync(fence, ring))
-			continue;
-
-		/* prevent GPU deadlocks */
-		if (!other->ready) {
-			dev_err(adev->dev, "Syncing to a disabled ring!");
-			return -EINVAL;
-		}
-
-		if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
-			r = fence_wait(sync->sync_to[i], true);
-			if (r)
-				return r;
-			continue;
-		}
-
-		if (count >= AMDGPU_NUM_SYNCS) {
-			/* not enough room, wait manually */
-			r = fence_wait(&fence->base, false);
-			if (r)
-				return r;
-			continue;
-		}
-		r = amdgpu_semaphore_create(adev, &semaphore);
-		if (r)
-			return r;
-
-		sync->semaphores[count++] = semaphore;
-
-		/* allocate enough space for sync command */
-		r = amdgpu_ring_alloc(other, 16);
-		if (r)
-			return r;
-
-		/* emit the signal semaphore */
-		if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
-			/* signaling wasn't successful wait manually */
-			amdgpu_ring_undo(other);
-			r = fence_wait(&fence->base, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		/* we assume caller has already allocated space on waiters ring */
-		if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
-			/* waiting wasn't successful wait manually */
-			amdgpu_ring_undo(other);
-			r = fence_wait(&fence->base, false);
-			if (r)
-				return r;
-			continue;
-		}
-
-		amdgpu_ring_commit(other);
-		amdgpu_fence_note_sync(fence, ring);
-	}
-
 	return 0;
 }

 /**
 * amdgpu_sync_free - free the sync object
 *
- * @adev: amdgpu_device pointer
 * @sync: sync object to use
- * @fence: fence to use for the free
 *
- * Free the sync object by freeing all semaphores in it.
+ * Free the sync object.
 */
-void amdgpu_sync_free(struct amdgpu_device *adev,
-		      struct amdgpu_sync *sync,
-		      struct fence *fence)
+void amdgpu_sync_free(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
@@ -378,11 +256,5 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
 		kfree(e);
 	}

-	for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
-		amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		fence_put(sync->sync_to[i]);
-
 	fence_put(sync->last_vm_update);
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
 		amdgpu_do_test_moves(adev);
 }

-static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
-					     struct amdgpu_ring *ring,
-					     struct fence **fence)
-{
-	uint32_t handle = ring->idx ^ 0xdeafbeef;
-	int r;
-
-	if (ring == &adev->uvd.ring) {
-		r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
-		if (r) {
-			DRM_ERROR("Failed to get dummy create msg\n");
-			return r;
-		}
-
-		r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
-		if (r) {
-			DRM_ERROR("Failed to get dummy destroy msg\n");
-			return r;
-		}
-
-	} else if (ring == &adev->vce.ring[0] ||
-		   ring == &adev->vce.ring[1]) {
-		r = amdgpu_vce_get_create_msg(ring, handle, NULL);
-		if (r) {
-			DRM_ERROR("Failed to get dummy create msg\n");
-			return r;
-		}
-
-		r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
-		if (r) {
-			DRM_ERROR("Failed to get dummy destroy msg\n");
-			return r;
-		}
-	} else {
-		struct amdgpu_fence *a_fence = NULL;
-		r = amdgpu_ring_lock(ring, 64);
-		if (r) {
-			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
-			return r;
-		}
-		amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
-		amdgpu_ring_unlock_commit(ring);
-		*fence = &a_fence->base;
-	}
-	return 0;
-}
-
 void amdgpu_test_ring_sync(struct amdgpu_device *adev,
 			   struct amdgpu_ring *ringA,
 			   struct amdgpu_ring *ringB)
 {
-	struct fence *fence1 = NULL, *fence2 = NULL;
-	struct amdgpu_semaphore *semaphore = NULL;
-	int r;
-
-	r = amdgpu_semaphore_create(adev, &semaphore);
-	if (r) {
-		DRM_ERROR("Failed to create semaphore\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringA, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringA, semaphore);
-	amdgpu_ring_unlock_commit(ringA);
-
-	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
-	if (r)
-		goto out_cleanup;
-
-	r = amdgpu_ring_lock(ringA, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringA, semaphore);
-	amdgpu_ring_unlock_commit(ringA);
-
-	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
-	if (r)
-		goto out_cleanup;
-
-	mdelay(1000);
-
-	if (fence_is_signaled(fence1)) {
-		DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringB, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringB);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringB, semaphore);
-	amdgpu_ring_unlock_commit(ringB);
-
-	r = fence_wait(fence1, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence 1\n");
-		goto out_cleanup;
-	}
-
-	mdelay(1000);
-
-	if (fence_is_signaled(fence2)) {
-		DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringB, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringB);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringB, semaphore);
-	amdgpu_ring_unlock_commit(ringB);
-
-	r = fence_wait(fence2, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence 1\n");
-		goto out_cleanup;
-	}
-
-out_cleanup:
-	amdgpu_semaphore_free(adev, &semaphore, NULL);
-
-	if (fence1)
-		fence_put(fence1);
-
-	if (fence2)
-		fence_put(fence2);
-
-	if (r)
-		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
 }

 static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
@@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
 			    struct amdgpu_ring *ringB,
 			    struct amdgpu_ring *ringC)
 {
-	struct fence *fenceA = NULL, *fenceB = NULL;
-	struct amdgpu_semaphore *semaphore = NULL;
-	bool sigA, sigB;
-	int i, r;
-
-	r = amdgpu_semaphore_create(adev, &semaphore);
-	if (r) {
-		DRM_ERROR("Failed to create semaphore\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringA, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringA, semaphore);
-	amdgpu_ring_unlock_commit(ringA);
-
-	r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
-	if (r)
-		goto out_cleanup;
-
-	r = amdgpu_ring_lock(ringB, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_wait(ringB, semaphore);
-	amdgpu_ring_unlock_commit(ringB);
-	r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
-	if (r)
-		goto out_cleanup;
-
-	mdelay(1000);
-
-	if (fence_is_signaled(fenceA)) {
-		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-	if (fence_is_signaled(fenceB)) {
-		DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
-		goto out_cleanup;
-	}
-
-	r = amdgpu_ring_lock(ringC, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringC);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringC, semaphore);
-	amdgpu_ring_unlock_commit(ringC);
-
-	for (i = 0; i < 30; ++i) {
-		mdelay(100);
-		sigA = fence_is_signaled(fenceA);
-		sigB = fence_is_signaled(fenceB);
-		if (sigA || sigB)
-			break;
-	}
-
-	if (!sigA && !sigB) {
-		DRM_ERROR("Neither fence A nor B has been signaled\n");
-		goto out_cleanup;
-	} else if (sigA && sigB) {
-		DRM_ERROR("Both fence A and B has been signaled\n");
-		goto out_cleanup;
-	}
-
-	DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
-
-	r = amdgpu_ring_lock(ringC, 64);
-	if (r) {
-		DRM_ERROR("Failed to lock ring B %p\n", ringC);
-		goto out_cleanup;
-	}
-	amdgpu_semaphore_emit_signal(ringC, semaphore);
-	amdgpu_ring_unlock_commit(ringC);
-
-	mdelay(1000);
-
-	r = fence_wait(fenceA, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence A\n");
-		goto out_cleanup;
-	}
-	r = fence_wait(fenceB, false);
-	if (r) {
-		DRM_ERROR("Failed to wait for sync fence B\n");
-		goto out_cleanup;
-	}
-
-out_cleanup:
-	amdgpu_semaphore_free(adev, &semaphore, NULL);
-
-	if (fenceA)
-		fence_put(fenceA);
-
-	if (fenceB)
-		fence_put(fenceB);
-
-	if (r)
-		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
 }

 static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs,

 	    TP_fast_assign(
 			   __entry->bo_list = p->bo_list;
-			   __entry->ring = p->ibs[i].ring->idx;
-			   __entry->dw = p->ibs[i].length_dw;
+			   __entry->ring = p->job->ring->idx;
+			   __entry->dw = p->job->ibs[i].length_dw;
 			   __entry->fences = amdgpu_fence_count_emitted(
-				p->ibs[i].ring);
+				p->job->ring);
 			   ),
 	    TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
 		      __entry->bo_list, __entry->ring, __entry->dw,
@@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
 			   __entry->fence = &job->base.s_fence->base;
-			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
 			   __entry->fence = &job->base.s_fence->base;
-			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -100,18 +100,21 @@ TRACE_EVENT(amdgpu_sched_run_job,


 TRACE_EVENT(amdgpu_vm_grab_id,
-	    TP_PROTO(unsigned vmid, int ring),
-	    TP_ARGS(vmid, ring),
+	    TP_PROTO(struct amdgpu_vm *vm, unsigned vmid, int ring),
+	    TP_ARGS(vm, vmid, ring),
 	    TP_STRUCT__entry(
+			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, vmid)
 			     __field(u32, ring)
 			     ),

 	    TP_fast_assign(
+			   __entry->vm = vm;
 			   __entry->vmid = vmid;
 			   __entry->ring = ring;
 			   ),
-	    TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
+	    TP_printk("vm=%p, id=%u, ring=%u", __entry->vm, __entry->vmid,
+		      __entry->ring)
 );

 TRACE_EVENT(amdgpu_vm_bo_map,
@@ -247,42 +250,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
 );

-DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
-
-	    TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
-	    TP_ARGS(ring, sem),
-
-	    TP_STRUCT__entry(
-			     __field(int, ring)
-			     __field(signed, waiters)
-			     __field(uint64_t, gpu_addr)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->ring = ring;
-			   __entry->waiters = sem->waiters;
-			   __entry->gpu_addr = sem->gpu_addr;
-			   ),
-
-	    TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
-		      __entry->waiters, __entry->gpu_addr)
-);
-
-DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale,
-
-	    TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
-	    TP_ARGS(ring, sem)
-);
-
-DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait,
-
-	    TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
-	    TP_ARGS(ring, sem)
-);
-
 #endif

 /* This part must be outside protection */

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev);
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct fence **fence);
 int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       struct fence **fence);
+			       bool direct, struct fence **fence);
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
 			     struct drm_file *filp);
 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev);
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 			      struct fence **fence);
 int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
-			       struct fence **fence);
+			       bool direct, struct fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
-				    struct amdgpu_semaphore *semaphore,
-				    bool emit_wait);
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -2670,7 +2670,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);

 	drm_crtc_cleanup(crtc);
-	destroy_workqueue(amdgpu_crtc->pflip_queue);
 	kfree(amdgpu_crtc);
 }

@@ -2890,7 +2889,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)

 	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
 	amdgpu_crtc->crtc_id = index;
-	amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
 	adev->mode_info.crtcs[index] = amdgpu_crtc;

 	amdgpu_crtc->max_cursor_width = 128;
@@ -3366,7 +3364,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);

 	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
-	queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+	schedule_work(&works->unpin_work);

 	return 0;
 }

--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -2661,7 +2661,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);

 	drm_crtc_cleanup(crtc);
-	destroy_workqueue(amdgpu_crtc->pflip_queue);
 	kfree(amdgpu_crtc);
 }

@@ -2881,7 +2880,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)

 	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
 	amdgpu_crtc->crtc_id = index;
-	amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
 	adev->mode_info.crtcs[index] = amdgpu_crtc;

 	amdgpu_crtc->max_cursor_width = 128;
@@ -3361,7 +3359,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);

 	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
-	queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+	schedule_work(&works->unpin_work);

 	return 0;
 }

--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -2582,7 +2582,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);

 	drm_crtc_cleanup(crtc);
-	destroy_workqueue(amdgpu_crtc->pflip_queue);
 	kfree(amdgpu_crtc);
 }

@@ -2809,7 +2808,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)

 	drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
 	amdgpu_crtc->crtc_id = index;
-	amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
 	adev->mode_info.crtcs[index] = amdgpu_crtc;

 	amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
@@ -3375,7 +3373,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);

 	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
-	queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+	schedule_work(&works->unpin_work);

 	return 0;
 }

--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
--- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -73,6 +73,7 @@ enum amd_ip_block_type {
 	AMD_IP_BLOCK_TYPE_SDMA,
 	AMD_IP_BLOCK_TYPE_UVD,
 	AMD_IP_BLOCK_TYPE_VCE,
+	AMD_IP_BLOCK_TYPE_ACP,
 };

 enum amd_clockgating_state {

--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
--- a/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
+++ b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c