xsk_socket__create returns succeed, but need to wait extra about 30 seconds to send packages
comphilip opened this issue · comments
Environment
Linux 5.15.0-91-generic #101-Ubuntu SMP Tue Nov 14 13:30:08 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
NIC: Intel Corporation Ethernet Controller 10-Gigabit X540-AT2
driver: ixgbe
version: 5.15.0-91-generic
firmware-version: 0x00015e11, 1.3050.0
expansion-rom-version:
bus-info: 0000:81:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: yes
supports-register-dump: yes
supports-priv-flags: yes
Reproduce
AF_XDP initializtion
static std::unique_ptr<xsk_socket_info> xsk_configure_socket(xsk_umem_info& umem_info, const XDPSocketOption& option) {
unsigned int if_index = if_nametoindex(option.if_name.c_str());
if (!if_index) {
perror("failed to get ifindex");
return NULL;
}
std::unique_ptr<xsk_socket_info> xsk_info(new xsk_socket_info());
if (!xsk_info)
return NULL;
xsk_info->umem_info = &umem_info;
xsk_socket_config xsk_cfg = {0};
xsk_cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
xsk_cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
xsk_cfg.xdp_flags &= ~XDP_FLAGS_MODES;
xsk_cfg.xdp_flags |= XDP_FLAGS_DRV_MODE; // NIC driver should support XDP to achieve high performance
xsk_cfg.bind_flags &= ~XDP_COPY;
xsk_cfg.bind_flags |= XDP_ZEROCOPY; // NIC driver should support zero copy to achieve high performance
if (option.busy_poll)
xsk_cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
xsk_cfg.libbpf_flags = 0;
int ret = xsk_socket__create(&xsk_info->xsk, option.if_name.c_str(), option.queue_id, umem_info.umem, &xsk_info->rx,
&xsk_info->tx, &xsk_cfg);
if (ret)
goto error_exit;
unsigned int prog_id;
/* check XDP program loaded */
if (0 != (ret = bpf_xdp_query_id(if_index, xsk_cfg.xdp_flags, &prog_id)))
goto error_exit;
if (option.busy_poll) {
int sock_opt = 1;
if (setsockopt(xsk_socket__fd(xsk_info->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, (void*)&sock_opt,
sizeof(sock_opt)) < 0) {
goto exit;
}
sock_opt = 20;
if (setsockopt(xsk_socket__fd(xsk_info->xsk), SOL_SOCKET, SO_BUSY_POLL, (void*)&sock_opt, sizeof(sock_opt)) <
0) {
goto exit;
}
}
/* Initialize umem frame allocation */
for (uint64_t i = 0; i < NUM_FRAMES; i++)
xsk_info->umem_frame_addr[i] = i * FRAME_SIZE; // init all frame addresses
xsk_info->umem_frame_free = NUM_FRAMES; // free frames count in pool
unsigned int idx;
/* fill frames into fill ring to let kernel fill rx packets */
ret = xsk_ring_prod__reserve(&xsk_info->umem_info->fq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx);
if (ret != XSK_RING_CONS__DEFAULT_NUM_DESCS)
goto error_exit;
for (int i = 0; i < XSK_RING_CONS__DEFAULT_NUM_DESCS; i++) {
*xsk_ring_prod__fill_addr(&xsk_info->umem_info->fq, idx++) =
xsk_alloc_umem_frame(*xsk_info); // alloc frame to fill ring
}
xsk_ring_prod__submit(&xsk_info->umem_info->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); // ownership to kernel
return xsk_info;
error_exit:
errno = -ret;
exit:
return NULL;
}
Send package
bool XDPSocket::SendRawEthernetPacketData(std::function<size_t(char*, size_t)> callback) {
uint32_t tx_idx;
uint32_t ret = xsk_ring_prod__reserve(&mData->socket_info->tx, 1, &tx_idx);
if (UNLIKELY(ret != 1)) {
return false;
}
uint64_t frame_addr = xsk_alloc_umem_frame(*mData->socket_info);
if (UNLIKELY(frame_addr == INVALID_UMEM_FRAME)) {
return false;
}
*xsk_ring_prod__fill_addr(&mData->socket_info->tx, tx_idx) = frame_addr;
struct xdp_desc* tx_desc = xsk_ring_prod__tx_desc(&mData->socket_info->tx, tx_idx);
char* pkt = (char*)xsk_umem__get_data(mData->umem_info->buffer, frame_addr);
tx_desc->addr = frame_addr;
tx_desc->len = callback(pkt, FRAME_SIZE); // fill pkt and return frame len
if (UNLIKELY(tx_desc->len == 0 || tx_desc->len > FRAME_SIZE)) {
return false;
}
if (UNLIKELY(mData->pcap_file && mData->capture_tx_packets)) {
mData->pcap_file->AddFrame(pkt, tx_desc->len);
}
xsk_ring_prod__submit(&mData->socket_info->tx, 1);
mData->socket_info->outstanding_tx++;
return true;
}
void XDPSocket::FlushTxBuffer() {
unsigned int completed;
while (mData->socket_info->outstanding_tx) {
if (mData->busy_poll)
sendto(xsk_socket__fd(mData->socket_info->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
uint32_t idx_cq;
/* Collect/free completed TX buffers */
completed = xsk_ring_cons__peek(&mData->umem_info->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx_cq);
if (completed > 0) {
for (uint32_t i = 0; i < completed; i++) {
xsk_free_umem_frame(*mData->socket_info, *xsk_ring_cons__comp_addr(&mData->umem_info->cq, idx_cq++));
}
xsk_ring_cons__release(&mData->umem_info->cq, completed);
mData->socket_info->outstanding_tx -=
completed < mData->socket_info->outstanding_tx ? completed : mData->socket_info->outstanding_tx;
}
}
}
Log
libxdp: Loading XDP program 'xsk_def_xdp_prog.o' from embedded object file
libbpf: elf: skipping unrecognized data section(8) .xdp_run_config
libbpf: elf: skipping unrecognized data section(9) xdp_metadata
libxdp: Generating multi-prog dispatcher for 1 programs
libxdp: Checking for kernel frags support
libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
libxdp: DATASEC '.xdp_run_config' not found.
libbpf: prog 'xdp_pass': BPF program load failed: Invalid argument
libbpf: prog 'xdp_pass': failed to load: -22
libbpf: failed to load object 'xdp-dispatcher.o'
libxdp: Kernel DOES NOT support XDP programs with frags
libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
libxdp: DATASEC '.xdp_run_config' not found.
libxdp: Loading multiprog dispatcher for 1 programs without frags support
libxdp: Loaded XDP program xdp_dispatcher, got fd 7
libxdp: Duplicated fd 7 to 9 for prog xdp_dispatcher
libxdp: Checking dispatcher compatibility
libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
libxdp: DATASEC '.xdp_run_config' not found.
libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
libxdp: DATASEC '.xdp_run_config' not found.
libxdp: Loaded XDP program xdp_pass, got fd 13
libxdp: Duplicated fd 13 to 14 for prog xdp_pass
libxdp: Loaded XDP program xdp_pass, got fd 18
libxdp: Duplicated fd 18 to 19 for prog xdp_pass
libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 21
libxdp: Released lock fd 21
libxdp: Linking prog xsk_def_prog as multiprog entry 0
libxdp: Loaded XDP program xsk_def_prog, got fd 13
libxdp: Duplicated fd 13 to 14 for prog xsk_def_prog
libxdp: Duplicated fd 14 to 15 for prog xsk_def_prog
libxdp: Attached prog 'xsk_def_prog' with priority 20 in dispatcher entry 'prog0' with fd 16
libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 17
libxdp: Pinning multiprog fd 9 beneath /sys/fs/bpf/xdp/dispatch-6-5804
libxdp: Pinned link for prog xsk_def_prog at /sys/fs/bpf/xdp/dispatch-6-5804/prog0-link
libxdp: Pinned prog xsk_def_prog at /sys/fs/bpf/xdp/dispatch-6-5804/prog0-prog
libxdp: Released lock fd 17
libxdp: Replacing XDP fd -1 with 9 on ifindex 6
libxdp: Loaded 1 programs on ifindex 6
BUG workaround: sleep one minute to make sure xdp ready to send packets
send package 0
send package 1
send package 2
send package 3
send package 4
send package 5
send package 6
send package 7
send package 8
send package 9
^C libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 6
libxdp: Released lock fd 6
libxdp: Verified XDP dispatcher version 2 <= 2
libxdp: Duplicated fd 6 to 7 for prog xdp_dispatcher
libxdp: DATASEC '.xdp_run_config' not found.
libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 8
libxdp: Reading multiprog component programs from pinned directory
libxdp: Duplicated fd 9 to 15 for prog xsk_def_prog
libxdp: Released lock fd 8
libxdp: Found multiprog with id 5804 and 1 component progs
libxdp: Replacing XDP fd 7 with -1 on ifindex 6
libxdp: Detached multiprog on ifindex 6
libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 6
libxdp: Unpinning multiprog fd 7 beneath /sys/fs/bpf/xdp/dispatch-6-5804
libxdp: Unpinned link for prog xsk_def_prog from /sys/fs/bpf/xdp/dispatch-6-5804/prog0-link
libxdp: Unpinned prog xsk_def_prog from /sys/fs/bpf/xdp/dispatch-6-5804/prog0-prog
libxdp: Removed pin directory /sys/fs/bpf/xdp/dispatch-6-5804
libxdp: Released lock fd 6
Result
Packet fill to tx
ring should be sent after xsk_socket
initialized, but have to wait extra about 30 seconds to send (call SendRawEthernetPacketData
and FlushTxBuffer
) or the packets are gone (cannot received on target machine with tcpdump
)
xsk_socket__create
seems do a lot of things:
- set the NIC down
- load the BPF program
- set the NIC up (the attached bpf prog shown with
ip link
)
Have no idea why extra 30 seconds needed to wait before send working.
30 seconds is quite excessive indeed! For a large system with around 100 cores, it might take nearly a second to set everything up, but not 30 s. Could you please try on a newer kernel and see if you get the same problem? Mainline 6.7 would be one good choice.
Do you know how much time the steps 1, 2, or 3 above consumes? Or is it after step 3 is completed you have to wait for 30 seconds?
@magnus-karlsson use kernel 6.6.11-x64v3-xanmod1
and not fixed: need to wait extra 30 seconds after step 3.
If BPF prog is already attached to the NIC, then step 1~3 will be skipped and no need to wait extra 30 seconds.