xdp-project / xdp-tools

Utilities and example programs for use with XDP

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

xsk_socket__create returns succeed, but need to wait extra about 30 seconds to send packages

comphilip opened this issue · comments

Environment

Linux 5.15.0-91-generic #101-Ubuntu SMP Tue Nov 14 13:30:08 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

NIC: Intel Corporation Ethernet Controller 10-Gigabit X540-AT2

driver: ixgbe
version: 5.15.0-91-generic
firmware-version: 0x00015e11, 1.3050.0
expansion-rom-version: 
bus-info: 0000:81:00.0
supports-statistics: yes
supports-test: yes
supports-eeprom-access: yes
supports-register-dump: yes
supports-priv-flags: yes

Reproduce

AF_XDP initializtion

static std::unique_ptr<xsk_socket_info> xsk_configure_socket(xsk_umem_info& umem_info, const XDPSocketOption& option) {
    unsigned int if_index = if_nametoindex(option.if_name.c_str());
    if (!if_index) {
        perror("failed to get ifindex");
        return NULL;
    }
    std::unique_ptr<xsk_socket_info> xsk_info(new xsk_socket_info());
    if (!xsk_info)
        return NULL;

    xsk_info->umem_info = &umem_info;
    xsk_socket_config xsk_cfg = {0};
    xsk_cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
    xsk_cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
    xsk_cfg.xdp_flags &= ~XDP_FLAGS_MODES;
    xsk_cfg.xdp_flags |= XDP_FLAGS_DRV_MODE; // NIC driver should support XDP to achieve high performance
    xsk_cfg.bind_flags &= ~XDP_COPY;
    xsk_cfg.bind_flags |= XDP_ZEROCOPY; // NIC driver should support zero copy to achieve high performance
    if (option.busy_poll)
        xsk_cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
    xsk_cfg.libbpf_flags = 0;
    int ret = xsk_socket__create(&xsk_info->xsk, option.if_name.c_str(), option.queue_id, umem_info.umem, &xsk_info->rx,
                                 &xsk_info->tx, &xsk_cfg);
    if (ret)
        goto error_exit;

    unsigned int prog_id;
    /* check XDP program loaded */
    if (0 != (ret = bpf_xdp_query_id(if_index, xsk_cfg.xdp_flags, &prog_id)))
        goto error_exit;

    if (option.busy_poll) {
        int sock_opt = 1;
        if (setsockopt(xsk_socket__fd(xsk_info->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, (void*)&sock_opt,
                       sizeof(sock_opt)) < 0) {
            goto exit;
        }

        sock_opt = 20;
        if (setsockopt(xsk_socket__fd(xsk_info->xsk), SOL_SOCKET, SO_BUSY_POLL, (void*)&sock_opt, sizeof(sock_opt)) <
            0) {
            goto exit;
        }
    }

    /* Initialize umem frame allocation */
    for (uint64_t i = 0; i < NUM_FRAMES; i++)
        xsk_info->umem_frame_addr[i] = i * FRAME_SIZE; // init all frame addresses

    xsk_info->umem_frame_free = NUM_FRAMES; // free frames count in pool

    unsigned int idx;
    /* fill frames into fill ring to let kernel fill rx packets */
    ret = xsk_ring_prod__reserve(&xsk_info->umem_info->fq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx);
    if (ret != XSK_RING_CONS__DEFAULT_NUM_DESCS)
        goto error_exit;

    for (int i = 0; i < XSK_RING_CONS__DEFAULT_NUM_DESCS; i++) {
        *xsk_ring_prod__fill_addr(&xsk_info->umem_info->fq, idx++) =
            xsk_alloc_umem_frame(*xsk_info); // alloc frame to fill ring
    }

    xsk_ring_prod__submit(&xsk_info->umem_info->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); // ownership to kernel

    return xsk_info;

error_exit:
    errno = -ret;
exit:
    return NULL;
}

Send package

bool XDPSocket::SendRawEthernetPacketData(std::function<size_t(char*, size_t)> callback) {
    uint32_t tx_idx;
    uint32_t ret = xsk_ring_prod__reserve(&mData->socket_info->tx, 1, &tx_idx);
    if (UNLIKELY(ret != 1)) {
        return false;
    }
    uint64_t frame_addr = xsk_alloc_umem_frame(*mData->socket_info);
    if (UNLIKELY(frame_addr == INVALID_UMEM_FRAME)) {
        return false;
    }
    *xsk_ring_prod__fill_addr(&mData->socket_info->tx, tx_idx) = frame_addr;
    struct xdp_desc* tx_desc = xsk_ring_prod__tx_desc(&mData->socket_info->tx, tx_idx);
    char* pkt = (char*)xsk_umem__get_data(mData->umem_info->buffer, frame_addr);
    tx_desc->addr = frame_addr;
    tx_desc->len = callback(pkt, FRAME_SIZE); // fill pkt and return frame len
    if (UNLIKELY(tx_desc->len == 0 || tx_desc->len > FRAME_SIZE)) {
        return false;
    }
    if (UNLIKELY(mData->pcap_file && mData->capture_tx_packets)) {
        mData->pcap_file->AddFrame(pkt, tx_desc->len);
    }
    xsk_ring_prod__submit(&mData->socket_info->tx, 1);
    mData->socket_info->outstanding_tx++;
    return true;
}
void XDPSocket::FlushTxBuffer() {
    unsigned int completed;

    while (mData->socket_info->outstanding_tx) {
        if (mData->busy_poll)
            sendto(xsk_socket__fd(mData->socket_info->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);

        uint32_t idx_cq;
        /* Collect/free completed TX buffers */
        completed = xsk_ring_cons__peek(&mData->umem_info->cq, XSK_RING_CONS__DEFAULT_NUM_DESCS, &idx_cq);

        if (completed > 0) {
            for (uint32_t i = 0; i < completed; i++) {
                xsk_free_umem_frame(*mData->socket_info, *xsk_ring_cons__comp_addr(&mData->umem_info->cq, idx_cq++));
            }

            xsk_ring_cons__release(&mData->umem_info->cq, completed);
            mData->socket_info->outstanding_tx -=
                completed < mData->socket_info->outstanding_tx ? completed : mData->socket_info->outstanding_tx;
        }
    }
}

Log

  libxdp: Loading XDP program 'xsk_def_xdp_prog.o' from embedded object file
libbpf: elf: skipping unrecognized data section(8) .xdp_run_config
libbpf: elf: skipping unrecognized data section(9) xdp_metadata
  libxdp: Generating multi-prog dispatcher for 1 programs
  libxdp: Checking for kernel frags support
  libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
  libxdp: DATASEC '.xdp_run_config' not found.
libbpf: prog 'xdp_pass': BPF program load failed: Invalid argument
libbpf: prog 'xdp_pass': failed to load: -22
libbpf: failed to load object 'xdp-dispatcher.o'
  libxdp: Kernel DOES NOT support XDP programs with frags
  libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
  libxdp: DATASEC '.xdp_run_config' not found.
  libxdp: Loading multiprog dispatcher for 1 programs without frags support
  libxdp: Loaded XDP program xdp_dispatcher, got fd 7
  libxdp: Duplicated fd 7 to 9 for prog xdp_dispatcher
  libxdp: Checking dispatcher compatibility
  libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
  libxdp: DATASEC '.xdp_run_config' not found.
  libxdp: Loading XDP program 'xdp-dispatcher.o' from embedded object file
libbpf: elf: skipping unrecognized data section(7) xdp_metadata
  libxdp: DATASEC '.xdp_run_config' not found.
  libxdp: Loaded XDP program xdp_pass, got fd 13
  libxdp: Duplicated fd 13 to 14 for prog xdp_pass
  libxdp: Loaded XDP program xdp_pass, got fd 18
  libxdp: Duplicated fd 18 to 19 for prog xdp_pass
  libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 21
  libxdp: Released lock fd 21
  libxdp: Linking prog xsk_def_prog as multiprog entry 0
  libxdp: Loaded XDP program xsk_def_prog, got fd 13
  libxdp: Duplicated fd 13 to 14 for prog xsk_def_prog
  libxdp: Duplicated fd 14 to 15 for prog xsk_def_prog
  libxdp: Attached prog 'xsk_def_prog' with priority 20 in dispatcher entry 'prog0' with fd 16
  libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 17
  libxdp: Pinning multiprog fd 9 beneath /sys/fs/bpf/xdp/dispatch-6-5804
  libxdp: Pinned link for prog xsk_def_prog at /sys/fs/bpf/xdp/dispatch-6-5804/prog0-link
  libxdp: Pinned prog xsk_def_prog at /sys/fs/bpf/xdp/dispatch-6-5804/prog0-prog
  libxdp: Released lock fd 17
  libxdp: Replacing XDP fd -1 with 9 on ifindex 6
  libxdp: Loaded 1 programs on ifindex 6
BUG workaround: sleep one minute to make sure xdp ready to send packets
send package 0
send package 1
send package 2
send package 3
send package 4
send package 5
send package 6
send package 7
send package 8
send package 9
^C  libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 6
  libxdp: Released lock fd 6
  libxdp: Verified XDP dispatcher version 2 <= 2
  libxdp: Duplicated fd 6 to 7 for prog xdp_dispatcher
  libxdp: DATASEC '.xdp_run_config' not found.
  libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 8
  libxdp: Reading multiprog component programs from pinned directory
  libxdp: Duplicated fd 9 to 15 for prog xsk_def_prog
  libxdp: Released lock fd 8
  libxdp: Found multiprog with id 5804 and 1 component progs
  libxdp: Replacing XDP fd 7 with -1 on ifindex 6
  libxdp: Detached multiprog on ifindex 6
  libxdp: Acquired lock from /sys/fs/bpf/xdp with fd 6
  libxdp: Unpinning multiprog fd 7 beneath /sys/fs/bpf/xdp/dispatch-6-5804
  libxdp: Unpinned link for prog xsk_def_prog from /sys/fs/bpf/xdp/dispatch-6-5804/prog0-link
  libxdp: Unpinned prog xsk_def_prog from /sys/fs/bpf/xdp/dispatch-6-5804/prog0-prog
  libxdp: Removed pin directory /sys/fs/bpf/xdp/dispatch-6-5804
  libxdp: Released lock fd 6

Result

Packet fill to tx ring should be sent after xsk_socket initialized, but have to wait extra about 30 seconds to send (call SendRawEthernetPacketData and FlushTxBuffer) or the packets are gone (cannot received on target machine with tcpdump)

xsk_socket__create seems do a lot of things:

  1. set the NIC down
  2. load the BPF program
  3. set the NIC up (the attached bpf prog shown with ip link)

Have no idea why extra 30 seconds needed to wait before send working.

30 seconds is quite excessive indeed! For a large system with around 100 cores, it might take nearly a second to set everything up, but not 30 s. Could you please try on a newer kernel and see if you get the same problem? Mainline 6.7 would be one good choice.

Do you know how much time the steps 1, 2, or 3 above consumes? Or is it after step 3 is completed you have to wait for 30 seconds?

@magnus-karlsson use kernel 6.6.11-x64v3-xanmod1 and not fixed: need to wait extra 30 seconds after step 3.

If BPF prog is already attached to the NIC, then step 1~3 will be skipped and no need to wait extra 30 seconds.