ikwzm / udmabuf

User space mappable dma buffer device driver for Linux.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Attempting to allocate a large buffer in the CMA area of reserved_memory fails to allocate pages

ikwzm opened this issue · comments

Related issues: #60, #97
Board: Xilinx ZCU102
Chip: ZynqMP
Arch: arm64
OS: Petalinux 2020.2 (Linux 5.4.0-xilinx-v2020.2 )

Device Tree:

/include/ "system-conf.dtsi"
/ {
   memory {
       #address-cells = <2>;
       #size-cells = <2>;
       device_type = "memory";
       reg = <0x0 0x0 0x0 0x80000000>, <0x00000008 0x0 0x0 0x80000000>;
   };
   reserved-memory {
   	#address-cells = <2>;
   	#size-cells = <2>;
   	ranges;
   	image_buf0: image_buf@0 {
   		compatible = "shared-dma-pool";
   		reusable;
   		reg = <0x00000008 0x0 0x0 0x80000000>;
                alignment = <0x0 0x1000>;
   		label = "image_buf0";
   	};
   };
   udmabuf@0 {
   	compatible = "ikwzm,u-dma-buf";
   	device-name = "udmabuf0";
   	size = <0x80000000>;
        dma-coherent;
   	memory-region = <&image_buf0>;
        dma-mask = <64>;
   };

    chosen {
        bootargs = "earlycon clk_ignore_unused   uio_pdrv_genirq.of_id=generic-uio root=/dev/mmcblk0p2 rw rootwait";
        stdout-path = "serial0:115200n8";
    };

};

The error information:

root@xilinx-zcu102-2020_2:~# insmod /home/linaro/test/u-dma-buf.ko
[   29.126201] u_dma_buf: loading out-of-tree module taints kernel.
[   29.134407] u-dma-buf udmabuf@0: assigned reserved memory node image_buf@0
[   29.212638] cma: cma_alloc: alloc failed, req-size: 524288 pages, ret: -16
[   29.219538] cma: number of available pages: 524288@0=> 524288 free of 524288 total pages
[   29.227691] ------------[ cut here ]------------
[   29.232304] WARNING: CPU: 1 PID: 1828 at mm/page_alloc.c:4738 __alloc_pages_nodemask+0x158/0x240
[   29.241081] Modules linked in: u_dma_buf(O+)
[   29.245345] CPU: 1 PID: 1828 Comm: insmod Tainted: G           O      5.4.0-xilinx-v2020.2 #1
[   29.253859] Hardware name: ZynqMP ZCU102 Rev1.0 (DT)
[   29.258815] pstate: 20000005 (nzCv daif -PAN -UAO)
[   29.263599] pc : __alloc_pages_nodemask+0x158/0x240
[   29.268470] lr : __dma_direct_alloc_pages+0x118/0x1c8
[   29.273510] sp : ffff8000140ab780
[   29.276816] x29: ffff8000140ab780 x28: 0000000000000100
[   29.282121] x27: ffffffffffffffff x26: ffff0000634015c0
[   29.287425] x25: 0000000000000000 x24: 0000000080000000
[   29.292729] x23: 0000000080000000 x22: ffff000066b43810
[   29.298033] x21: 0000000000000013 x20: ffff000066042fc0
[   29.303337] x19: 0000000000000cc0 x18: 0000000000000030
[   29.308641] x17: 0000aaaad451aff0 x16: 0000aaaad451aff0
[   29.313945] x15: ffff800011195000 x14: ffff80001123b63a
[   29.319249] x13: 0000000000000000 x12: ffff80001123a000
[   29.324553] x11: ffff800011195000 x10: 0000000000000000
[   29.329857] x9 : 0000000000000007 x8 : 00000000000001c3
[   29.335161] x7 : 0000000000000001 x6 : 0000000000000001
[   29.340465] x5 : 0000000000000000 x4 : 000000000000003f
[   29.345769] x3 : 0000000000000000 x2 : 0000000000000000
[   29.351073] x1 : 0000000000000000 x0 : 0000000000000cc0
[   29.356377] Call trace:
[   29.358818]  __alloc_pages_nodemask+0x158/0x240
[   29.363341]  __dma_direct_alloc_pages+0x118/0x1c8
[   29.368037]  dma_direct_alloc_pages+0x28/0xe8
[   29.372386]  dma_direct_alloc+0x4c/0x58
[   29.376214]  dma_alloc_attrs+0x7c/0xe8
[   29.379965]  udmabuf_platform_driver_probe+0x480/0x968 [u_dma_buf]
[   29.386140]  platform_drv_probe+0x50/0xa0
[   29.390140]  really_probe+0xd8/0x2f8
[   29.393707]  driver_probe_device+0x54/0xe8
[   29.397795]  device_driver_attach+0x6c/0x78
[   29.401970]  __driver_attach+0x54/0xd0
[   29.405711]  bus_for_each_dev+0x6c/0xc0
[   29.409540]  driver_attach+0x20/0x28
[   29.413108]  bus_add_driver+0x148/0x1e0
[   29.416936]  driver_register+0x60/0x110
[   29.420765]  __platform_driver_register+0x44/0x50
[   29.425465]  u_dma_buf_init+0x22c/0x1000 [u_dma_buf]
[   29.430419]  do_one_initcall+0x50/0x190
[   29.434247]  do_init_module+0x50/0x1f0
[   29.437987]  load_module+0x1ca4/0x2218
[   29.441728]  __do_sys_finit_module+0xd0/0xe8
[   29.445991]  __arm64_sys_finit_module+0x1c/0x28
[   29.450516]  el0_svc_common.constprop.0+0x68/0x160
[   29.455298]  el0_svc_handler+0x6c/0x88
[   29.459038]  el0_svc+0x8/0xc
[   29.461910] ---[ end trace 16f0ccc79524ba8a ]---
[   29.466543] dma_alloc_coherent(size=2147483648) failed. return(0)
[   29.472638] u-dma-buf udmabuf@0: driver setup failed. return=-12
[   29.478824] u-dma-buf udmabuf@0: driver installed.
[   29.483623] u-dma-buf: probe of udmabuf@0 failed with error -12

Does this issue only happen on Arm64 when the CMA region is over 256MB?

Does this issue only happen on Arm64 when the CMA region is over 256MB?

The cause is currently under investigation, but I guess it's some kind of Linux Kernel limitation.

The Linux Kernel issued a warning at __alloc_pages_nodemask() in mm/page_alloc.c.

struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
							nodemask_t *nodemask)
{
	struct page *page;
	unsigned int alloc_flags = ALLOC_WMARK_LOW;
	gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
	struct alloc_context ac = { };

	/*
	 * There are several places where we assume that the order value is sane
	 * so bail out early if the request is out of bound.
	 */
	if (unlikely(order >= MAX_ORDER)) {
		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
		return NULL;
	}

And __alloc_pages_nodemask() is called by page_alloc_node() in __dma_direct_alloc_pages() in kernel/dma/direct.c.

struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
	size_t alloc_size = PAGE_ALIGN(size);
	int node = dev_to_node(dev);
	struct page *page = NULL;
	u64 phys_mask;

	if (attrs & DMA_ATTR_NO_WARN)
		gfp |= __GFP_NOWARN;

	/* we always manually zero the memory once we are done: */
	gfp &= ~__GFP_ZERO;
	gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
			&phys_mask);
	page = dma_alloc_contiguous(dev, alloc_size, gfp);
	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
		dma_free_contiguous(dev, page, alloc_size);
		page = NULL;
	}
again:
	if (!page)
		page = alloc_pages_node(node, gfp, get_order(alloc_size));
	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
		dma_free_contiguous(dev, page, size);
		page = NULL;

		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
		    phys_mask < DMA_BIT_MASK(64) &&
		    !(gfp & (GFP_DMA32 | GFP_DMA))) {
			gfp |= GFP_DMA32;
			goto again;
		}

		if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) {
			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
			goto again;
		}
	}

	return page;
}

And MAX_ORDER is defined in include/linux/mmzone.h.

/* Free memory management - zoned buddy allocator.  */
#ifndef CONFIG_FORCE_MAX_ZONEORDER
#define MAX_ORDER 11
#else
#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
#endif
#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))

After allocating a buffer, trying to allocate a page area for mapping seems to have exceeded some limit of the linux kernel.

This limit seems to vary by Linux kernel version and architecture.
Since u-dma-buf operates within the framework of the linux kernel, it accepts the limitations of the linux kernel as they are.
Therefore, I cannot solve this problem because it is necessary to change the framework of the linux kernel to solve this.
Also, it is not possible to predict what this limit will be in the future.

At the moment, I would like to only alert you that there are some limitations on the amount of buffer space that u-dma-buf can allocate.