diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f9cb0d2cc49f90d966e268c60a5ca7bf244c761f..7bfec6d521b015ae4ef7325d23b655d1b685c26f 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -55,7 +55,13 @@ static void __dma_page_cpu_to_dev(struct page *, unsigned long,
 		size_t, enum dma_data_direction);
 static void __dma_page_dev_to_cpu(struct page *, unsigned long,
 		size_t, enum dma_data_direction);
+static void *
+__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
+	const void *caller);
+
+static void __dma_free_remap(void *cpu_addr, size_t size, bool no_warn);
 
+static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot);
 /**
  * arm_dma_map_page - map a portion of a page for streaming DMA
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
@@ -125,6 +131,37 @@ static void arm_dma_sync_single_for_device(struct device *dev,
 	__dma_page_cpu_to_dev(page, offset, size, dir);
 }
 
+static void *arm_dma_remap(struct device *dev, void *cpu_addr,
+			dma_addr_t handle, size_t size,
+			struct dma_attrs *attrs)
+{
+	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
+	unsigned long offset = handle & ~PAGE_MASK;
+
+	size = PAGE_ALIGN(size + offset);
+	return __dma_alloc_remap(page, size, GFP_KERNEL, prot,
+					__builtin_return_address(0)) + offset;
+
+}
+
+static void arm_dma_unremap(struct device *dev, void *remapped_addr,
+				size_t size)
+{
+	unsigned int flags = VM_ARM_DMA_CONSISTENT | VM_USERMAP;
+	struct vm_struct *area;
+
+	remapped_addr = (void *)((unsigned long)remapped_addr & PAGE_MASK);
+
+	area = find_vm_area(remapped_addr);
+	if (!area || (area->flags & flags) != flags) {
+		WARN(1, "trying to free invalid coherent area: %p\n",
+				remapped_addr);
+		return;
+	}
+	vunmap(remapped_addr);
+}
+
 struct dma_map_ops arm_dma_ops = {
 	.alloc			= arm_dma_alloc,
 	.free			= arm_dma_free,
@@ -139,6 +176,8 @@ struct dma_map_ops arm_dma_ops = {
 	.sync_sg_for_cpu	= arm_dma_sync_sg_for_cpu,
 	.sync_sg_for_device	= arm_dma_sync_sg_for_device,
 	.set_dma_mask		= arm_dma_set_mask,
+	.remap			= arm_dma_remap,
+	.unremap		= arm_dma_unremap,
 };
 EXPORT_SYMBOL(arm_dma_ops);
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 4a8cf81ad2053c71cd08c5606f1f98c31f3cffe9..38f363b71e9629587b77dc2f71931b4b9a46cfad 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -24,6 +24,8 @@
 #include <linux/dma-contiguous.h>
 #include <linux/vmalloc.h>
 #include <linux/swiotlb.h>
+#include <linux/sched.h>
+#include <linux/io.h>
 
 #include <asm/cacheflush.h>
 
@@ -263,7 +265,51 @@ int arm64_swiotlb_mmap(struct device *dev, struct vm_area_struct *vma,
 	return ret;
 }
 
+static void *arm64_dma_remap(struct device *dev, void *cpu_addr,
+			dma_addr_t handle, size_t size,
+			struct dma_attrs *attrs)
+{
+	struct page *page = phys_to_page(dma_to_phys(dev, handle));
+	pgprot_t prot = __get_dma_pgprot(PAGE_KERNEL, attrs);
+	unsigned long offset = handle & ~PAGE_MASK;
+	struct vm_struct *area;
+	unsigned long addr;
+
+	size = PAGE_ALIGN(size + offset);
+
+	/*
+	 * DMA allocation can be mapped to user space, so lets
+	 * set VM_USERMAP flags too.
+	 */
+	area = get_vm_area(size, VM_USERMAP);
+	if (!area)
+		return NULL;
+
+	addr = (unsigned long)area->addr;
+	area->phys_addr = __pfn_to_phys(page_to_pfn(page));
+
+	if (ioremap_page_range(addr, addr + size, area->phys_addr, prot)) {
+		vunmap((void *)addr);
+		return NULL;
+	}
+	return (void *)addr + offset;
+}
 
+static void arm64_dma_unremap(struct device *dev, void *remapped_addr,
+				size_t size)
+{
+	struct vm_struct *area;
+
+	remapped_addr = (void *)((unsigned long)remapped_addr & PAGE_MASK);
+
+	area = find_vm_area(remapped_addr);
+	if (!area) {
+		WARN(1, "trying to free invalid coherent area: %p\n",
+			remapped_addr);
+		return;
+	}
+	vunmap(remapped_addr);
+}
 
 struct dma_map_ops noncoherent_swiotlb_dma_ops = {
 	.alloc = arm64_swiotlb_alloc_noncoherent,
@@ -279,6 +325,8 @@ struct dma_map_ops noncoherent_swiotlb_dma_ops = {
 	.sync_sg_for_device = arm64_swiotlb_sync_sg_for_device,
 	.dma_supported = swiotlb_dma_supported,
 	.mapping_error = swiotlb_dma_mapping_error,
+	.remap = arm64_dma_remap,
+	.unremap = arm64_dma_unremap,
 };
 EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
 
@@ -295,6 +343,8 @@ struct dma_map_ops coherent_swiotlb_dma_ops = {
 	.sync_sg_for_device = swiotlb_sync_sg_for_device,
 	.dma_supported = swiotlb_dma_supported,
 	.mapping_error = swiotlb_dma_mapping_error,
+	.remap = arm64_dma_remap,
+	.unremap = arm64_dma_unremap,
 };
 EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
 
diff --git a/drivers/base/dma-removed.c b/drivers/base/dma-removed.c
index 004827a65e4893684e401d6baf947a83910fbdbf..bfd3e3adabc0ea2beaa43766f4fad8c07c4b9ea1 100644
--- a/drivers/base/dma-removed.c
+++ b/drivers/base/dma-removed.c
@@ -135,6 +135,17 @@ void removed_sync_sg_for_device(struct device *dev,
 	return;
 }
 
+void *removed_remap(struct device *dev, void *cpu_addr, dma_addr_t handle,
+			size_t size, struct dma_attrs *attrs)
+{
+	return ioremap(handle, size);
+}
+
+void removed_unremap(struct device *dev, void *remapped_address, size_t size)
+{
+	iounmap(remapped_address);
+}
+
 struct dma_map_ops removed_dma_ops = {
 	.alloc			= removed_alloc,
 	.free			= removed_free,
@@ -147,6 +158,8 @@ struct dma_map_ops removed_dma_ops = {
 	.sync_single_for_device	= removed_sync_single_for_device,
 	.sync_sg_for_cpu	= removed_sync_sg_for_cpu,
 	.sync_sg_for_device	= removed_sync_sg_for_device,
+	.remap			= removed_remap,
+	.unremap		= removed_unremap,
 };
 EXPORT_SYMBOL(removed_dma_ops);
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 94af418585135a6fdaabb987e284caad4a8b1e99..801e91df7ba9e9aa605fab84c834a5a438193f00 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -50,6 +50,11 @@ struct dma_map_ops {
 	int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
 	int (*dma_supported)(struct device *dev, u64 mask);
 	int (*set_dma_mask)(struct device *dev, u64 mask);
+	void *(*remap)(struct device *dev, void *cpu_addr,
+			dma_addr_t dma_handle, size_t size,
+			struct dma_attrs *attrs);
+	void (*unremap)(struct device *dev, void *remapped_address,
+			size_t size);
 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 	u64 (*get_required_mask)(struct device *dev);
 #endif
@@ -78,6 +83,37 @@ static inline int is_device_dma_capable(struct device *dev)
 #include <asm-generic/dma-mapping-broken.h>
 #endif
 
+static inline void *dma_remap(struct device *dev, void *cpu_addr,
+		dma_addr_t dma_handle, size_t size, struct dma_attrs *attrs)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	BUG_ON(!ops);
+
+	if (!ops->remap) {
+		WARN_ONCE(1, "Remap function not implemented for %pS\n",
+				ops->remap);
+		return NULL;
+	}
+
+	return ops->remap(dev, cpu_addr, dma_handle, size, attrs);
+}
+
+
+static inline void dma_unremap(struct device *dev, void *remapped_addr,
+				size_t size)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	BUG_ON(!ops);
+
+	if (!ops->unremap) {
+		WARN_ONCE(1, "unremap function not implemented for %pS\n",
+				ops->unremap);
+		return;
+	}
+
+	return ops->unremap(dev, remapped_addr, size);
+}
+
 static inline u64 dma_get_mask(struct device *dev)
 {
 	if (dev && dev->dma_mask && *dev->dma_mask)