openbmc / linux

OpenBMC Linux kernel source tree

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

ftgmac: device driver frees DMA memory with different size

shenki opened this issue · comments

v5.11 with CONFIG_DMA_API_DEBUG on qemu, tacoma machine.

[    5.149278] ------------[ cut here ]------------
[    5.149828] WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:973 check_unmap+0x1f8/0x9c4
[    5.152714] DMA-API: ftgmac100 1e670000.ftgmac: device driver frees DMA memory with different size [device address=0x0000000081f7f8c2] [map size=1536 bytes] [unmap size=42 bytes]
[    5.153883] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.11.0-00002-g65303e260692 #14
[    5.154692] Hardware name: Generic DT based system
[    5.155353] Backtrace: 
[    5.155816] [<80867a20>] (dump_backtrace) from [<80867c74>] (show_stack+0x20/0x24)
[    5.156449]  r7:00000009 r6:60000193 r5:00000000 r4:80c8fbb4
[    5.156761] [<80867c54>] (show_stack) from [<80873528>] (dump_stack+0xc8/0xe4)
[    5.157128] [<80873460>] (dump_stack) from [<801223c0>] (__warn+0xe4/0x150)
[    5.157463]  r7:00000009 r6:000003cd r5:8019f4fc r4:80a04bc0
[    5.157859] [<801222dc>] (__warn) from [<808683f0>] (warn_slowpath_fmt+0xa0/0xe4)
[    5.158373]  r7:8019f4fc r6:000003cd r5:80a04bc0 r4:80a05328
[    5.158731] [<80868354>] (warn_slowpath_fmt) from [<8019f4fc>] (check_unmap+0x1f8/0x9c4)
[    5.159093]  r8:00000600 r7:80c01c80 r6:80c479ac r5:80cc604c r4:811d2380
[    5.159378] [<8019f304>] (check_unmap) from [<8019fd6c>] (debug_dma_unmap_page+0xa4/0xb4)
[    5.159762]  r10:81e46540 r9:81e22900 r8:81153010 r7:81f7f8c2 r6:0000002a r5:00000002
[    5.160159]  r4:80c01c80
[    5.160331] [<8019fcc8>] (debug_dma_unmap_page) from [<8019a970>] (dma_unmap_page_attrs+0xbc/0x138)
[    5.160845]  r8:80901c78 r7:81e46000 r6:0000002a r5:00000000 r4:8011652c
[    5.161204] [<8019a8b4>] (dma_unmap_page_attrs) from [<80598394>] (ftgmac100_poll+0x2f8/0x4a0)
[    5.161703]  r10:81e46540 r9:81e22900 r8:be044000 r7:81e46000 r6:0000002a r5:00000000
[    5.162113]  r4:81e46598
[    5.162281] [<8059809c>] (ftgmac100_poll) from [<806df920>] (net_rx_action+0x194/0x54c)
[    5.162765]  r10:00000040 r9:0000012c r8:80c01ddc r7:80c00000 r6:ffff8ccc r5:00000000
[    5.163202]  r4:81e46598
[    5.163374] [<806df78c>] (net_rx_action) from [<8010157c>] (__do_softirq+0xec/0x3c8)
[    5.163875]  r10:00000008 r9:80b68b7c r8:00000100 r7:80c00000 r6:00000003 r5:80c0308c
[    5.164289]  r4:00000004
[    5.164449] [<80101490>] (__do_softirq) from [<801297d4>] (irq_exit+0xc0/0x11c)
[    5.164877]  r10:80c01ee0 r9:80b68b7c r8:81095000 r7:00000001 r6:00000000 r5:80b694d4
[    5.165297]  r4:ffffe000
[    5.165460] [<80129714>] (irq_exit) from [<8017d6a8>] (__handle_domain_irq+0x8c/0xe8)
[    5.165908]  r5:80b694d4 r4:00000000
[    5.166122] [<8017d61c>] (__handle_domain_irq) from [<801013f8>] (gic_handle_irq+0x88/0xac)
[    5.166614]  r9:80b68b7c r8:00000000 r7:bf80200c r6:80c063ec r5:000003ff r4:bf802000
[    5.167026] [<80101370>] (gic_handle_irq) from [<80100b6c>] (__irq_svc+0x6c/0x90)
[    5.167605] Exception stack(0x80c01ee0 to 0x80c01f28)
[    5.168346] 1ee0: 000286ac 00000000 bd7d3144 8011c280 00000000 80c00000 80c05d14 80c05d50
[    5.168989] 1f00: 80c937db 80a007a0 10c5387d 80c01f3c 80c01f40 80c01f30 80108d80 80108d84
[    5.169608] 1f20: 60000013 ffffffff
[    5.170014]  r10:10c5387d r9:80c00000 r8:80c937db r7:80c01f14 r6:ffffffff r5:60000013
[    5.170503]  r4:80108d84
[    5.170665] [<80108d44>] (arch_cpu_idle) from [<8087c610>] (default_idle_call+0x38/0x100)
[    5.171158] [<8087c5d8>] (default_idle_call) from [<8015b720>] (do_idle+0xcc/0x150)
[    5.171717] [<8015b654>] (do_idle) from [<8015bac0>] (cpu_startup_entry+0x28/0x2c)
[    5.172225]  r9:809f8ea0 r8:80b3aa5c r7:80c05cc0 r6:ffffffff r5:00000001 r4:000000d9
[    5.172715] [<8015ba98>] (cpu_startup_entry) from [<80873e74>] (rest_init+0xa4/0xc4)
[    5.173231] [<80873dd0>] (rest_init) from [<80b00c3c>] (arch_call_rest_init+0x18/0x1c)
[    5.173912]  r5:00000001 r4:80cb1040
[    5.174112] [<80b00c24>] (arch_call_rest_init) from [<80b01168>] (start_kernel+0x4b0/0x590)
[    5.174640] [<80b00cb8>] (start_kernel) from [<00000000>] (0x0)
[    5.175193] ---[ end trace c0c37fca2f5238b6 ]---
[    5.175600] DMA-API: Mapped at:
[    5.175929]  debug_dma_map_page+0x58/0x110
[    5.176410]  dma_map_page_attrs+0x1b0/0x3d0
[    5.176712]  ftgmac100_alloc_rx_buf.constprop.0+0xb8/0x1d8
[    5.177082]  ftgmac100_init_all+0xd8/0x2c4
[    5.177387]  ftgmac100_open+0x17c/0x254

From some cursory code inspection, it looks like this stems from the cycle-shaving optimization in ftgmac100_rx_packet() added in commit 7b49cd1:

#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_DMA_USE_IOMMU)
	/* When we don't have an iommu, we can save cycles by not
	 * invalidating the cache for the part of the packet that
	 * wasn't received.
	 */
	dma_unmap_single(priv->dev, map, size, DMA_FROM_DEVICE);
#else
	dma_unmap_single(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE);
#endif

(in contrast with the dma_map_single() call in ftgmac100_alloc_rx_buf() using RX_BUF_SIZE). Should that part just be backed out, or might there be some reason that's a "legitimate" mangling of what I gather are the rules of the API?

With this patch applied I haven't seen any instances of the check_unmap() warning:

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index e04bb9d6a9af..8391c7ae443e 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -540,16 +540,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed)
 	/* Tear down DMA mapping, do necessary cache management */
 	map = le32_to_cpu(rxdes->rxdes3);
 
-#if defined(CONFIG_ARM) && !defined(CONFIG_ARM_DMA_USE_IOMMU)
-	/* When we don't have an iommu, we can save cycles by not
-	 * invalidating the cache for the part of the packet that
-	 * wasn't received.
-	 */
-	dma_unmap_single(priv->dev, map, size, DMA_FROM_DEVICE);
-#else
 	dma_unmap_single(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE);
-#endif
-
 
 	/* Resplenish rx ring */
 	ftgmac100_alloc_rx_buf(priv, pointer, rxdes, GFP_ATOMIC);

This is purely cycle shaving. I remember it making quite a measurable difference on the ast2400 and ast2500 chips though.

Unfortunately the API doesn't provide a way to specify how much data was actually DMAed, which for network tx would be a legitimate optimisation.