Skip to content
Snippets Groups Projects
Commit f6e71eaa authored by Iliyan Malchev's avatar Iliyan Malchev
Browse files

kgsl: do not vmap/memset to zero-out pages


b/18402205 External reports: Video playback failing on Flo after upgrade to
	   Lollipop

Change-Id: I358328ba2bd543d77e4218f32b0695c2f6f6e6c9
Signed-off-by: default avatarIliyan Malchev <malchev@google.com>
parent d4a22760
Branches
Tags
No related merge requests found
...@@ -579,13 +579,10 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, ...@@ -579,13 +579,10 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable, struct kgsl_pagetable *pagetable,
size_t size) size_t size)
{ {
int pcount = 0, order, ret = 0; int order, ret = 0;
int j, len, page_size, sglen_alloc, sglen = 0; int len, page_size, sglen_alloc, sglen = 0;
struct page **pages = NULL;
pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
void *ptr; void *ptr;
unsigned int align; unsigned int align;
int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT; align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
...@@ -613,24 +610,6 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, ...@@ -613,24 +610,6 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
goto done; goto done;
} }
/*
* Allocate space to store the list of pages to send to vmap.
* This is an array of pointers so we can t rack 1024 pages per page
* of allocation. Since allocations can be as large as the user dares,
* we have to use the kmalloc/vmalloc trick here to make sure we can
* get the memory we need.
*/
if ((memdesc->sglen_alloc * sizeof(struct page *)) > PAGE_SIZE)
pages = vmalloc(memdesc->sglen_alloc * sizeof(struct page *));
else
pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (pages == NULL) {
ret = -ENOMEM;
goto done;
}
kmemleak_not_leak(memdesc->sg); kmemleak_not_leak(memdesc->sg);
sg_init_table(memdesc->sg, memdesc->sglen_alloc); sg_init_table(memdesc->sg, memdesc->sglen_alloc);
...@@ -656,6 +635,8 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, ...@@ -656,6 +635,8 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
else else
gfp_mask |= GFP_KERNEL; gfp_mask |= GFP_KERNEL;
gfp_mask |= __GFP_ZERO;
page = alloc_pages(gfp_mask, get_order(page_size)); page = alloc_pages(gfp_mask, get_order(page_size));
if (page == NULL) { if (page == NULL) {
...@@ -680,8 +661,12 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, ...@@ -680,8 +661,12 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
goto done; goto done;
} }
for (j = 0; j < page_size >> PAGE_SHIFT; j++) for (j = 0; j < page_size >> PAGE_SHIFT; j++) {
pages[pcount++] = nth_page(page, j); struct page *p = nth_page(page, j);
ptr = kmap_atomic(p);
dmac_flush_range(ptr, ptr + PAGE_SIZE);
kunmap_atomic(ptr);
}
sg_set_page(&memdesc->sg[sglen++], page, page_size, 0); sg_set_page(&memdesc->sg[sglen++], page, page_size, 0);
len -= page_size; len -= page_size;
...@@ -690,48 +675,6 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc, ...@@ -690,48 +675,6 @@ _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
memdesc->sglen = sglen; memdesc->sglen = sglen;
memdesc->size = size; memdesc->size = size;
/*
* All memory that goes to the user has to be zeroed out before it gets
* exposed to userspace. This means that the memory has to be mapped in
* the kernel, zeroed (memset) and then unmapped. This also means that
* the dcache has to be flushed to ensure coherency between the kernel
* and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
* zeroed and unmaped each individual page, and then we had to turn
* around and call flush_dcache_page() on that page to clear the caches.
* This was killing us for performance. Instead, we found it is much
* faster to allocate the pages without GFP_ZERO, map a chunk of the
* range ('step' pages), memset it, flush it and then unmap
* - this results in a factor of 4 improvement for speed for large
* buffers. There is a small decrease in speed for small buffers,
* but only on the order of a few microseconds at best. The 'step'
* size is based on a guess at the amount of free vmalloc space, but
* will scale down if there's not enough free space.
*/
for (j = 0; j < pcount; j += step) {
step = min(step, pcount - j);
ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);
if (ptr != NULL) {
memset(ptr, 0, step * PAGE_SIZE);
dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
vunmap(ptr);
} else {
int k;
/* Very, very, very slow path */
for (k = j; k < j + step; k++) {
ptr = kmap_atomic(pages[k]);
memset(ptr, 0, PAGE_SIZE);
dmac_flush_range(ptr, ptr + PAGE_SIZE);
kunmap_atomic(ptr);
}
/* scale down the step size to avoid this path */
if (step > 1)
step >>= 1;
}
}
outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, outer_cache_range_op_sg(memdesc->sg, memdesc->sglen,
KGSL_CACHE_OP_FLUSH); KGSL_CACHE_OP_FLUSH);
...@@ -744,11 +687,6 @@ done: ...@@ -744,11 +687,6 @@ done:
KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.page_alloc, KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.page_alloc,
kgsl_driver.stats.page_alloc_max); kgsl_driver.stats.page_alloc_max);
if ((memdesc->sglen_alloc * sizeof(struct page *)) > PAGE_SIZE)
vfree(pages);
else
kfree(pages);
if (ret) if (ret)
kgsl_sharedmem_free(memdesc); kgsl_sharedmem_free(memdesc);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment